@yasserkhanorg/e2e-agents 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/feedback.d.ts +16 -0
- package/dist/agent/feedback.d.ts.map +1 -1
- package/dist/agent/feedback.js +62 -0
- package/dist/agent/process_runner.d.ts +1 -1
- package/dist/agent/process_runner.d.ts.map +1 -1
- package/dist/agent/process_runner.js +3 -3
- package/dist/api.d.ts.map +1 -1
- package/dist/api.js +5 -2
- package/dist/engine/plan_builder.d.ts +2 -1
- package/dist/engine/plan_builder.d.ts.map +1 -1
- package/dist/engine/plan_builder.js +22 -9
- package/dist/esm/agent/feedback.js +61 -0
- package/dist/esm/agent/process_runner.js +3 -3
- package/dist/esm/api.js +5 -2
- package/dist/esm/engine/plan_builder.js +22 -9
- package/dist/esm/index.js +1 -1
- package/dist/esm/pipeline/spec_verifier.js +75 -0
- package/dist/esm/pipeline/stage3_generation.js +122 -4
- package/dist/esm/pipeline/stage4_heal.js +146 -3
- package/dist/esm/prompts/heal.js +4 -0
- package/dist/esm/qa-agent/phase2/agent_loop.js +60 -24
- package/dist/esm/qa-agent/phase2/exploration_state.js +21 -0
- package/dist/esm/qa-agent/phase2/tools.js +99 -1
- package/dist/esm/qa-agent/phase3/reporter.js +31 -4
- package/dist/esm/validation/guardrails.js +1 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/pipeline/orchestrator.d.ts.map +1 -1
- package/dist/pipeline/spec_verifier.d.ts +20 -0
- package/dist/pipeline/spec_verifier.d.ts.map +1 -0
- package/dist/pipeline/spec_verifier.js +79 -0
- package/dist/pipeline/stage3_generation.d.ts +10 -0
- package/dist/pipeline/stage3_generation.d.ts.map +1 -1
- package/dist/pipeline/stage3_generation.js +120 -2
- package/dist/pipeline/stage4_heal.d.ts +4 -0
- package/dist/pipeline/stage4_heal.d.ts.map +1 -1
- package/dist/pipeline/stage4_heal.js +145 -2
- package/dist/prompts/heal.d.ts +2 -0
- package/dist/prompts/heal.d.ts.map +1 -1
- package/dist/prompts/heal.js +4 -0
- package/dist/qa-agent/phase2/agent_loop.d.ts.map +1 -1
- package/dist/qa-agent/phase2/agent_loop.js +60 -24
- package/dist/qa-agent/phase2/exploration_state.d.ts.map +1 -1
- package/dist/qa-agent/phase2/exploration_state.js +21 -0
- package/dist/qa-agent/phase2/tools.d.ts.map +1 -1
- package/dist/qa-agent/phase2/tools.js +99 -1
- package/dist/qa-agent/phase3/reporter.js +31 -4
- package/dist/qa-agent/types.d.ts +9 -1
- package/dist/qa-agent/types.d.ts.map +1 -1
- package/dist/validation/guardrails.d.ts +2 -0
- package/dist/validation/guardrails.d.ts.map +1 -1
- package/dist/validation/guardrails.js +4 -1
- package/package.json +1 -1
package/dist/agent/feedback.d.ts
CHANGED
|
@@ -71,5 +71,21 @@ export declare function appendFeedbackAndRecompute(appRoot: string, input: Recom
|
|
|
71
71
|
calibration: CalibrationSummary;
|
|
72
72
|
};
|
|
73
73
|
export declare function readCalibration(appRoot: string): CalibrationSummary | null;
|
|
74
|
+
export interface AdaptiveThresholds {
|
|
75
|
+
minConfidenceForTargeted: number;
|
|
76
|
+
safeMergeMinConfidence: number;
|
|
77
|
+
/** Subsystems that should always be included regardless of confidence */
|
|
78
|
+
alwaysIncludeSubsystems: string[];
|
|
79
|
+
/** Human-readable adjustment reasons for logging */
|
|
80
|
+
adjustmentReasons: string[];
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Compute adaptive thresholds based on calibration data.
|
|
84
|
+
* - If recent recall < 0.8: lower minConfidence (catch more escapes)
|
|
85
|
+
* - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
|
|
86
|
+
* - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
|
|
87
|
+
* Returns defaults if no calibration data exists.
|
|
88
|
+
*/
|
|
89
|
+
export declare function getAdaptiveThresholds(appRoot: string): AdaptiveThresholds;
|
|
74
90
|
export declare function readFlakyTests(appRoot: string): FlakySummary | null;
|
|
75
91
|
//# sourceMappingURL=feedback.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
|
|
1
|
+
{"version":3,"file":"feedback.d.ts","sourceRoot":"","sources":["../../src/agent/feedback.ts"],"names":[],"mappings":"AAOA,MAAM,WAAW,2BAA2B;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,OAAO,GAAG,UAAU,GAAG,MAAM,CAAC;IACtC,gBAAgB,EAAE,MAAM,EAAE,CAAC;IAC3B,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IAC/B,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,QAAQ,EAAE;QACN,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,SAAS,EAAE;QACP,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,WAAW,EAAE,MAAM,CACnB,MAAM,EACN;QACI,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,CAAC;QACf,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE;YACN,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;QACF,SAAS,EAAE;YACP,SAAS,EAAE,MAAM,CAAC;YAClB,MAAM,EAAE,MAAM,CAAC;YACf,iBAAiB,EAAE,MAAM,CAAC;YAC1B,OAAO,EAAE,MAAM,CAAC;SACnB,CAAC;KACL,CACA,CAAC;CACL;AAOD,MAAM,WAAW,YAAY;IACzB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;QACT,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,MAAM,EAAE,MAAM,EAAE,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC;QAChC,UAAU,EAAE,OAAO,CAAC;QACpB,eAAe,EAAE,MAAM,GAAG,QAAQ,GAAG,kBAAkB,CAAC;QACxD,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACtB,CAAC,CAAC;CACN;AAyQD,wBAAgB,0BAA0B,CACtC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,2BAA2B,GACnC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,kBAAkB,CAAA;CAAC,CAwBlF;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,GAAG,IAAI,CAE1E;AAED,MAAM,WAAW,kBAAkB;IAC/B,wBAAwB,EAAE,MAAM,CAAC;IACjC,sBAAsB,EAAE,MAAM,CAAC;IAC/B,yEAAyE;IACzE,uBAAuB,EAAE,MAAM,EAAE,CAAC;IAClC,oDAAoD;IACpD,iBAAiB,EAAE,MAAM,EAAE,CAAC;CAC/B;AAOD;;;;;;GAMG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,CA6DzE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAEnE"}
|
package/dist/agent/feedback.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
5
|
exports.appendFeedbackAndRecompute = appendFeedbackAndRecompute;
|
|
6
6
|
exports.readCalibration = readCalibration;
|
|
7
|
+
exports.getAdaptiveThresholds = getAdaptiveThresholds;
|
|
7
8
|
exports.readFlakyTests = readFlakyTests;
|
|
8
9
|
const fs_1 = require("fs");
|
|
9
10
|
const path_1 = require("path");
|
|
@@ -256,6 +257,67 @@ function appendFeedbackAndRecompute(appRoot, input) {
|
|
|
256
257
|
function readCalibration(appRoot) {
|
|
257
258
|
return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'calibration.json'));
|
|
258
259
|
}
|
|
260
|
+
const DEFAULT_MIN_CONFIDENCE = 60;
|
|
261
|
+
const DEFAULT_SAFE_MERGE = 85;
|
|
262
|
+
const MIN_CONFIDENCE_FLOOR = 40;
|
|
263
|
+
const MIN_CONFIDENCE_CEILING = 80;
|
|
264
|
+
/**
|
|
265
|
+
* Compute adaptive thresholds based on calibration data.
|
|
266
|
+
* - If recent recall < 0.8: lower minConfidence (catch more escapes)
|
|
267
|
+
* - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
|
|
268
|
+
* - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
|
|
269
|
+
* Returns defaults if no calibration data exists.
|
|
270
|
+
*/
|
|
271
|
+
function getAdaptiveThresholds(appRoot) {
|
|
272
|
+
const calibration = readCalibration(appRoot);
|
|
273
|
+
const reasons = [];
|
|
274
|
+
const alwaysInclude = [];
|
|
275
|
+
if (!calibration || calibration.samples === 0) {
|
|
276
|
+
return {
|
|
277
|
+
minConfidenceForTargeted: DEFAULT_MIN_CONFIDENCE,
|
|
278
|
+
safeMergeMinConfidence: DEFAULT_SAFE_MERGE,
|
|
279
|
+
alwaysIncludeSubsystems: [],
|
|
280
|
+
adjustmentReasons: ['No calibration data — using defaults'],
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
let minConfidence = DEFAULT_MIN_CONFIDENCE;
|
|
284
|
+
let safeMerge = DEFAULT_SAFE_MERGE;
|
|
285
|
+
// Adjust based on 7-day recall
|
|
286
|
+
if (calibration.recent7d.samples >= 3) {
|
|
287
|
+
if (calibration.recent7d.recall < 0.8) {
|
|
288
|
+
const adjustment = 10;
|
|
289
|
+
minConfidence -= adjustment;
|
|
290
|
+
safeMerge -= adjustment;
|
|
291
|
+
reasons.push(`Lowering confidence threshold by ${adjustment} (7d recall: ${calibration.recent7d.recall.toFixed(2)})`);
|
|
292
|
+
}
|
|
293
|
+
else if (calibration.recent7d.precision > 0.9) {
|
|
294
|
+
const adjustment = 5;
|
|
295
|
+
minConfidence += adjustment;
|
|
296
|
+
safeMerge += adjustment;
|
|
297
|
+
reasons.push(`Raising confidence threshold by ${adjustment} (7d precision: ${calibration.recent7d.precision.toFixed(2)})`);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
// Clamp to safe ranges
|
|
301
|
+
minConfidence = Math.max(MIN_CONFIDENCE_FLOOR, Math.min(MIN_CONFIDENCE_CEILING, minConfidence));
|
|
302
|
+
safeMerge = Math.max(70, Math.min(95, safeMerge));
|
|
303
|
+
// Per-subsystem blind spot detection (30-day window)
|
|
304
|
+
for (const [subsystem, metrics] of Object.entries(calibration.bySubsystem)) {
|
|
305
|
+
const recent = metrics.recent30d;
|
|
306
|
+
if (recent.samples >= 3 && recent.falseNegativeRate > 0.3) {
|
|
307
|
+
alwaysInclude.push(subsystem);
|
|
308
|
+
reasons.push(`Always including ${subsystem} tests (30d false-negative rate: ${recent.falseNegativeRate.toFixed(2)})`);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
if (reasons.length === 0) {
|
|
312
|
+
reasons.push('Calibration data within normal range — using defaults');
|
|
313
|
+
}
|
|
314
|
+
return {
|
|
315
|
+
minConfidenceForTargeted: minConfidence,
|
|
316
|
+
safeMergeMinConfidence: safeMerge,
|
|
317
|
+
alwaysIncludeSubsystems: alwaysInclude,
|
|
318
|
+
adjustmentReasons: reasons,
|
|
319
|
+
};
|
|
320
|
+
}
|
|
259
321
|
function readFlakyTests(appRoot) {
|
|
260
322
|
return readJson((0, path_1.join)(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
|
|
261
323
|
}
|
|
@@ -2,7 +2,7 @@ import type { PipelineConfig } from './config.js';
|
|
|
2
2
|
import type { CommandResult } from './pipeline_types.js';
|
|
3
3
|
export declare function resolvePlaywrightBinary(testsRoot: string): string | null;
|
|
4
4
|
export declare function summarizeCommandOutput(stdout: string, stderr: string): string;
|
|
5
|
-
export declare function runCommand(command: string, args: string[], cwd: string, timeoutMs?: number): CommandResult;
|
|
5
|
+
export declare function runCommand(command: string, args: string[], cwd: string, timeoutMs?: number, envOverride?: NodeJS.ProcessEnv): CommandResult;
|
|
6
6
|
export declare function resolveMcpCommandTimeoutMs(pipeline: PipelineConfig): number;
|
|
7
7
|
export declare function resolveMcpRetries(pipeline: PipelineConfig): number;
|
|
8
8
|
export declare function isRetryableMcpFailure(result: CommandResult): boolean;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"process_runner.d.ts","sourceRoot":"","sources":["../../src/agent/process_runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,qBAAqB,CAAC;AAEvD,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAUxE;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAO7E;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,SAAiB,GAAG,aAAa,
|
|
1
|
+
{"version":3,"file":"process_runner.d.ts","sourceRoot":"","sources":["../../src/agent/process_runner.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,qBAAqB,CAAC;AAEvD,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAUxE;AAED,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAO7E;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,SAAiB,EAAE,WAAW,CAAC,EAAE,MAAM,CAAC,UAAU,GAAG,aAAa,CAsBnJ;AAED,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAM3E;AAED,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,cAAc,GAAG,MAAM,CAMlE;AAED,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAQpE;AAED,wBAAgB,qBAAqB,CACjC,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,EAAE,EACd,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,GAChB,aAAa,CAYf"}
|
|
@@ -31,12 +31,12 @@ function summarizeCommandOutput(stdout, stderr) {
|
|
|
31
31
|
const lines = combined.split('\n').slice(-20);
|
|
32
32
|
return lines.join('\n').slice(0, 2000);
|
|
33
33
|
}
|
|
34
|
-
function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000) {
|
|
34
|
+
function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000, envOverride) {
|
|
35
35
|
// When spawning `claude`, unset CLAUDECODE so nested invocations are allowed.
|
|
36
36
|
// Claude Code sets this variable to block nested sessions; child processes
|
|
37
37
|
// that spawn their own claude instance must run without it.
|
|
38
|
-
let env;
|
|
39
|
-
if (command === 'claude') {
|
|
38
|
+
let env = envOverride;
|
|
39
|
+
if (!env && command === 'claude') {
|
|
40
40
|
const { CLAUDECODE: _, ...rest } = process.env;
|
|
41
41
|
env = rest;
|
|
42
42
|
}
|
package/dist/api.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAIA,OAAO,EAAgB,KAAK,eAAe,EAAC,MAAM,mBAAmB,CAAC;AACtE,OAAO,EAEH,KAAK,UAAU,EAClB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAmC,KAAK,YAAY,EAAC,MAAM,2BAA2B,CAAC;
|
|
1
|
+
{"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../src/api.ts"],"names":[],"mappings":"AAIA,OAAO,EAAgB,KAAK,eAAe,EAAC,MAAM,mBAAmB,CAAC;AACtE,OAAO,EAEH,KAAK,UAAU,EAClB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAmC,KAAK,YAAY,EAAC,MAAM,2BAA2B,CAAC;AAU9F,OAAO,EAAqB,KAAK,kBAAkB,EAAC,MAAM,2BAA2B,CAAC;AAEtF,OAAO,EAAyB,KAAK,6BAA6B,EAAE,KAAK,4BAA4B,EAAC,MAAM,oBAAoB,CAAC;AACjI,OAAO,EAEH,KAAK,yBAAyB,EAC9B,KAAK,wBAAwB,EAChC,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAGH,KAAK,yBAAyB,EACjC,MAAM,iCAAiC,CAAC;AAEzC,MAAM,WAAW,eAAgB,SAAQ,IAAI,CAAC,eAAe,EAAE,MAAM,CAAC;IAClE,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,aAAa,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,4BAA4B;IACzC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACvC;AAED,MAAM,WAAW,6BAA6B;IAC1C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAcD,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,6BAA6B,GAAG,4BAA4B,CAE1G;AAED,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,4BAA4B,GAAG,wBAAwB,CASlG;AAED,MAAM,WAAW,sBAAsB;IACnC,MAAM,EAAE,YAAY,CAAC;IACrB,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACzB;AAED,wBAAgB,0BAA0B,CAAC,OAAO,GAAE,eAAoB,GAAG,YAAY,CAQtF;AAED,wBAAgB,2BAA2B,CAAC,OAAO,GAAE,eAAoB,GAAG,sBAAsB,CAejG;AAED,wBAAsB,gBAAgB,CAAC,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,sBAAsB,GAAG;IAAE,YAAY,CAAC,EAAE,kBAAkB,CAAA;CAAE,CAAC,CAiD7I;AAED,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,6BAA6B,GAAG,yBAAyB,CAkBrG"}
|
package/dist/api.js
CHANGED
|
@@ -13,6 +13,7 @@ const plan_js_1 = require("./agent/plan.js");
|
|
|
13
13
|
const impact_engine_js_1 = require("./engine/impact_engine.js");
|
|
14
14
|
const plan_builder_js_1 = require("./engine/plan_builder.js");
|
|
15
15
|
const git_js_1 = require("./agent/git.js");
|
|
16
|
+
const feedback_js_1 = require("./agent/feedback.js");
|
|
16
17
|
const diff_loader_js_1 = require("./engine/diff_loader.js");
|
|
17
18
|
const ai_enrichment_js_1 = require("./engine/ai_enrichment.js");
|
|
18
19
|
const anthropic_provider_js_1 = require("./anthropic_provider.js");
|
|
@@ -60,7 +61,8 @@ function recommendTestsDeterministic(options = {}) {
|
|
|
60
61
|
testsRoot: reportRoot,
|
|
61
62
|
routeFamilies: config.routeFamilies,
|
|
62
63
|
});
|
|
63
|
-
const
|
|
64
|
+
const adaptive = (0, feedback_js_1.getAdaptiveThresholds)(reportRoot);
|
|
65
|
+
const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, undefined, adaptive);
|
|
64
66
|
const planPath = (0, plan_builder_js_1.writePlanReport)(reportRoot, plan);
|
|
65
67
|
const ciSummaryMarkdown = (0, plan_builder_js_1.renderCiSummaryMarkdown)(plan);
|
|
66
68
|
const ciSummaryPath = (0, plan_builder_js_1.writeCiSummary)(reportRoot, ciSummaryMarkdown);
|
|
@@ -106,7 +108,8 @@ async function recommendTestsAI(options = {}) {
|
|
|
106
108
|
specDetails: [...specDetailsMap.values()],
|
|
107
109
|
});
|
|
108
110
|
}
|
|
109
|
-
const
|
|
111
|
+
const adaptive = (0, feedback_js_1.getAdaptiveThresholds)(reportRoot);
|
|
112
|
+
const plan = (0, plan_builder_js_1.buildPlanFromImpact)(impact, config.policy, aiEnrichment, adaptive);
|
|
110
113
|
const planPath = (0, plan_builder_js_1.writePlanReport)(reportRoot, plan);
|
|
111
114
|
const ciSummaryMarkdown = (0, plan_builder_js_1.renderCiSummaryMarkdown)(plan);
|
|
112
115
|
const ciSummaryPath = (0, plan_builder_js_1.writeCiSummary)(reportRoot, ciSummaryMarkdown);
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import type { PolicyConfig } from '../agent/config.js';
|
|
2
2
|
import type { ImpactResult } from './impact_engine.js';
|
|
3
3
|
import type { AIEnrichmentResult } from './ai_enrichment.js';
|
|
4
|
+
import type { AdaptiveThresholds } from '../agent/feedback.js';
|
|
4
5
|
import type { PlanReport, GapDetail, CoveredFlowSummary } from '../agent/plan.js';
|
|
5
6
|
export type { PlanReport, GapDetail, CoveredFlowSummary };
|
|
6
|
-
export declare function buildPlanFromImpact(impact: ImpactResult, policyOverride?: Partial<PolicyConfig>, aiEnrichment?: AIEnrichmentResult): PlanReport;
|
|
7
|
+
export declare function buildPlanFromImpact(impact: ImpactResult, policyOverride?: Partial<PolicyConfig>, aiEnrichment?: AIEnrichmentResult, adaptiveThresholds?: AdaptiveThresholds): PlanReport;
|
|
7
8
|
export declare function writePlanReport(appRoot: string, plan: PlanReport): string;
|
|
8
9
|
export declare function renderCiSummaryMarkdown(plan: PlanReport): string;
|
|
9
10
|
export declare function writeCiSummary(appRoot: string, markdown: string, relativePath?: string): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"plan_builder.d.ts","sourceRoot":"","sources":["../../src/engine/plan_builder.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,oBAAoB,CAAC;AAErD,OAAO,KAAK,EAAC,YAAY,EAAkB,MAAM,oBAAoB,CAAC;AAEtE,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,oBAAoB,CAAC;AAC3D,OAAO,KAAK,EAAC,kBAAkB,EAAC,MAAM,sBAAsB,CAAC;AAG7D,OAAO,KAAK,EACR,UAAU,EACV,SAAS,EACT,kBAAkB,EAIrB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAC,UAAU,EAAE,SAAS,EAAE,kBAAkB,EAAC,CAAC;AAoPxD,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,YAAY,EACpB,cAAc,CAAC,EAAE,OAAO,CAAC,YAAY,CAAC,EACtC,YAAY,CAAC,EAAE,kBAAkB,EACjC,kBAAkB,CAAC,EAAE,kBAAkB,GACxC,UAAU,CAsJZ;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,GAAG,MAAM,CAMzE;AAED,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,UAAU,GAAG,MAAM,CAwHhE;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,YAAY,SAAiC,GAAG,MAAM,CAMvH"}
|
|
@@ -9,6 +9,7 @@ exports.writeCiSummary = writeCiSummary;
|
|
|
9
9
|
const fs_1 = require("fs");
|
|
10
10
|
const path_1 = require("path");
|
|
11
11
|
const minimatch_1 = require("minimatch");
|
|
12
|
+
const test_path_js_1 = require("../agent/test_path.js");
|
|
12
13
|
const impact_engine_js_1 = require("./impact_engine.js");
|
|
13
14
|
const DEFAULT_POLICY = {
|
|
14
15
|
minConfidenceForTargeted: 60,
|
|
@@ -196,22 +197,34 @@ function evaluateEnforcement(decision, policy) {
|
|
|
196
197
|
}
|
|
197
198
|
/**
|
|
198
199
|
* Build recommended test list from impacted features' Playwright specs.
|
|
200
|
+
* When alwaysIncludeSubsystems is provided, specs from those subsystems are
|
|
201
|
+
* included regardless of their coverage status (blind-spot protection).
|
|
199
202
|
*/
|
|
200
|
-
function buildRecommendedTests(impact) {
|
|
201
|
-
const tests =
|
|
203
|
+
function buildRecommendedTests(impact, alwaysIncludeSubsystems = []) {
|
|
204
|
+
const tests = new Set();
|
|
205
|
+
const alwaysSet = new Set(alwaysIncludeSubsystems);
|
|
202
206
|
for (const feature of impact.impactedFeatures) {
|
|
203
|
-
|
|
207
|
+
const shouldInclude = feature.coverageStatus !== 'uncovered' ||
|
|
208
|
+
feature.playwrightSpecs.some((spec) => alwaysSet.has((0, test_path_js_1.inferSubsystemFromTestPath)(spec)));
|
|
209
|
+
if (shouldInclude) {
|
|
204
210
|
for (const spec of feature.playwrightSpecs) {
|
|
205
|
-
|
|
206
|
-
tests.push(spec);
|
|
207
|
-
}
|
|
211
|
+
tests.add(spec);
|
|
208
212
|
}
|
|
209
213
|
}
|
|
210
214
|
}
|
|
211
|
-
return tests;
|
|
215
|
+
return [...tests];
|
|
212
216
|
}
|
|
213
|
-
function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
|
|
217
|
+
function buildPlanFromImpact(impact, policyOverride, aiEnrichment, adaptiveThresholds) {
|
|
214
218
|
const policy = { ...DEFAULT_POLICY, ...(policyOverride || {}) };
|
|
219
|
+
// Apply adaptive calibration overrides (if available and not explicitly overridden)
|
|
220
|
+
if (adaptiveThresholds && policyOverride?.minConfidenceForTargeted === undefined) {
|
|
221
|
+
policy.minConfidenceForTargeted = adaptiveThresholds.minConfidenceForTargeted;
|
|
222
|
+
}
|
|
223
|
+
if (adaptiveThresholds && policyOverride?.safeMergeMinConfidence === undefined) {
|
|
224
|
+
policy.safeMergeMinConfidence = adaptiveThresholds.safeMergeMinConfidence;
|
|
225
|
+
}
|
|
226
|
+
// Apply alwaysIncludeSubsystems: force their tests into the recommended set
|
|
227
|
+
const alwaysIncludeSubsystems = adaptiveThresholds?.alwaysIncludeSubsystems ?? [];
|
|
215
228
|
const confidence = computeConfidence(impact);
|
|
216
229
|
const runSetResult = pickRunSet(impact, confidence, policy);
|
|
217
230
|
const decision = buildDecision(impact, runSetResult.runSet, confidence, policy);
|
|
@@ -294,7 +307,7 @@ function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
|
|
|
294
307
|
advisoryScenarios,
|
|
295
308
|
};
|
|
296
309
|
});
|
|
297
|
-
const recommendedTests = buildRecommendedTests(impact);
|
|
310
|
+
const recommendedTests = buildRecommendedTests(impact, alwaysIncludeSubsystems);
|
|
298
311
|
const requiredNewTests = gaps.map((f) => `${featureLabel(f)}: Add E2E tests`);
|
|
299
312
|
const p0 = impact.impactedFeatures.filter((f) => f.priority === 'P0').length;
|
|
300
313
|
const p1 = impact.impactedFeatures.filter((f) => f.priority === 'P1').length;
|
|
@@ -251,6 +251,67 @@ export function appendFeedbackAndRecompute(appRoot, input) {
|
|
|
251
251
|
export function readCalibration(appRoot) {
|
|
252
252
|
return readJson(join(appRoot, '.e2e-ai-agents', 'calibration.json'));
|
|
253
253
|
}
|
|
254
|
+
const DEFAULT_MIN_CONFIDENCE = 60;
|
|
255
|
+
const DEFAULT_SAFE_MERGE = 85;
|
|
256
|
+
const MIN_CONFIDENCE_FLOOR = 40;
|
|
257
|
+
const MIN_CONFIDENCE_CEILING = 80;
|
|
258
|
+
/**
|
|
259
|
+
* Compute adaptive thresholds based on calibration data.
|
|
260
|
+
* - If recent recall < 0.8: lower minConfidence (catch more escapes)
|
|
261
|
+
* - If recent precision > 0.9: raise minConfidence (fewer unnecessary tests)
|
|
262
|
+
* - Per-subsystem: if falseNegativeRate > 0.3 in 30d, always include tests
|
|
263
|
+
* Returns defaults if no calibration data exists.
|
|
264
|
+
*/
|
|
265
|
+
export function getAdaptiveThresholds(appRoot) {
|
|
266
|
+
const calibration = readCalibration(appRoot);
|
|
267
|
+
const reasons = [];
|
|
268
|
+
const alwaysInclude = [];
|
|
269
|
+
if (!calibration || calibration.samples === 0) {
|
|
270
|
+
return {
|
|
271
|
+
minConfidenceForTargeted: DEFAULT_MIN_CONFIDENCE,
|
|
272
|
+
safeMergeMinConfidence: DEFAULT_SAFE_MERGE,
|
|
273
|
+
alwaysIncludeSubsystems: [],
|
|
274
|
+
adjustmentReasons: ['No calibration data — using defaults'],
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
let minConfidence = DEFAULT_MIN_CONFIDENCE;
|
|
278
|
+
let safeMerge = DEFAULT_SAFE_MERGE;
|
|
279
|
+
// Adjust based on 7-day recall
|
|
280
|
+
if (calibration.recent7d.samples >= 3) {
|
|
281
|
+
if (calibration.recent7d.recall < 0.8) {
|
|
282
|
+
const adjustment = 10;
|
|
283
|
+
minConfidence -= adjustment;
|
|
284
|
+
safeMerge -= adjustment;
|
|
285
|
+
reasons.push(`Lowering confidence threshold by ${adjustment} (7d recall: ${calibration.recent7d.recall.toFixed(2)})`);
|
|
286
|
+
}
|
|
287
|
+
else if (calibration.recent7d.precision > 0.9) {
|
|
288
|
+
const adjustment = 5;
|
|
289
|
+
minConfidence += adjustment;
|
|
290
|
+
safeMerge += adjustment;
|
|
291
|
+
reasons.push(`Raising confidence threshold by ${adjustment} (7d precision: ${calibration.recent7d.precision.toFixed(2)})`);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
// Clamp to safe ranges
|
|
295
|
+
minConfidence = Math.max(MIN_CONFIDENCE_FLOOR, Math.min(MIN_CONFIDENCE_CEILING, minConfidence));
|
|
296
|
+
safeMerge = Math.max(70, Math.min(95, safeMerge));
|
|
297
|
+
// Per-subsystem blind spot detection (30-day window)
|
|
298
|
+
for (const [subsystem, metrics] of Object.entries(calibration.bySubsystem)) {
|
|
299
|
+
const recent = metrics.recent30d;
|
|
300
|
+
if (recent.samples >= 3 && recent.falseNegativeRate > 0.3) {
|
|
301
|
+
alwaysInclude.push(subsystem);
|
|
302
|
+
reasons.push(`Always including ${subsystem} tests (30d false-negative rate: ${recent.falseNegativeRate.toFixed(2)})`);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
if (reasons.length === 0) {
|
|
306
|
+
reasons.push('Calibration data within normal range — using defaults');
|
|
307
|
+
}
|
|
308
|
+
return {
|
|
309
|
+
minConfidenceForTargeted: minConfidence,
|
|
310
|
+
safeMergeMinConfidence: safeMerge,
|
|
311
|
+
alwaysIncludeSubsystems: alwaysInclude,
|
|
312
|
+
adjustmentReasons: reasons,
|
|
313
|
+
};
|
|
314
|
+
}
|
|
254
315
|
export function readFlakyTests(appRoot) {
|
|
255
316
|
return readJson(join(appRoot, '.e2e-ai-agents', 'flaky-tests.json'));
|
|
256
317
|
}
|
|
@@ -22,12 +22,12 @@ export function summarizeCommandOutput(stdout, stderr) {
|
|
|
22
22
|
const lines = combined.split('\n').slice(-20);
|
|
23
23
|
return lines.join('\n').slice(0, 2000);
|
|
24
24
|
}
|
|
25
|
-
export function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000) {
|
|
25
|
+
export function runCommand(command, args, cwd, timeoutMs = 60 * 60 * 1000, envOverride) {
|
|
26
26
|
// When spawning `claude`, unset CLAUDECODE so nested invocations are allowed.
|
|
27
27
|
// Claude Code sets this variable to block nested sessions; child processes
|
|
28
28
|
// that spawn their own claude instance must run without it.
|
|
29
|
-
let env;
|
|
30
|
-
if (command === 'claude') {
|
|
29
|
+
let env = envOverride;
|
|
30
|
+
if (!env && command === 'claude') {
|
|
31
31
|
const { CLAUDECODE: _, ...rest } = process.env;
|
|
32
32
|
env = rest;
|
|
33
33
|
}
|
package/dist/esm/api.js
CHANGED
|
@@ -5,6 +5,7 @@ import { appendPlanMetrics, } from './agent/plan.js';
|
|
|
5
5
|
import { analyzeImpact as analyzeImpactV2 } from './engine/impact_engine.js';
|
|
6
6
|
import { buildPlanFromImpact, renderCiSummaryMarkdown, writeCiSummary, writePlanReport, } from './engine/plan_builder.js';
|
|
7
7
|
import { getChangedFiles } from './agent/git.js';
|
|
8
|
+
import { getAdaptiveThresholds } from './agent/feedback.js';
|
|
8
9
|
import { loadDiffs } from './engine/diff_loader.js';
|
|
9
10
|
import { enrichImpactWithAI } from './engine/ai_enrichment.js';
|
|
10
11
|
import { AnthropicProvider } from './anthropic_provider.js';
|
|
@@ -52,7 +53,8 @@ export function recommendTestsDeterministic(options = {}) {
|
|
|
52
53
|
testsRoot: reportRoot,
|
|
53
54
|
routeFamilies: config.routeFamilies,
|
|
54
55
|
});
|
|
55
|
-
const
|
|
56
|
+
const adaptive = getAdaptiveThresholds(reportRoot);
|
|
57
|
+
const plan = buildPlanFromImpact(impact, config.policy, undefined, adaptive);
|
|
56
58
|
const planPath = writePlanReport(reportRoot, plan);
|
|
57
59
|
const ciSummaryMarkdown = renderCiSummaryMarkdown(plan);
|
|
58
60
|
const ciSummaryPath = writeCiSummary(reportRoot, ciSummaryMarkdown);
|
|
@@ -98,7 +100,8 @@ export async function recommendTestsAI(options = {}) {
|
|
|
98
100
|
specDetails: [...specDetailsMap.values()],
|
|
99
101
|
});
|
|
100
102
|
}
|
|
101
|
-
const
|
|
103
|
+
const adaptive = getAdaptiveThresholds(reportRoot);
|
|
104
|
+
const plan = buildPlanFromImpact(impact, config.policy, aiEnrichment, adaptive);
|
|
102
105
|
const planPath = writePlanReport(reportRoot, plan);
|
|
103
106
|
const ciSummaryMarkdown = renderCiSummaryMarkdown(plan);
|
|
104
107
|
const ciSummaryPath = writeCiSummary(reportRoot, ciSummaryMarkdown);
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import { mkdirSync, writeFileSync } from 'fs';
|
|
4
4
|
import { dirname, join } from 'path';
|
|
5
5
|
import { minimatch } from 'minimatch';
|
|
6
|
+
import { inferSubsystemFromTestPath } from '../agent/test_path.js';
|
|
6
7
|
import { getGaps, getPartialGaps } from './impact_engine.js';
|
|
7
8
|
const DEFAULT_POLICY = {
|
|
8
9
|
minConfidenceForTargeted: 60,
|
|
@@ -190,22 +191,34 @@ function evaluateEnforcement(decision, policy) {
|
|
|
190
191
|
}
|
|
191
192
|
/**
|
|
192
193
|
* Build recommended test list from impacted features' Playwright specs.
|
|
194
|
+
* When alwaysIncludeSubsystems is provided, specs from those subsystems are
|
|
195
|
+
* included regardless of their coverage status (blind-spot protection).
|
|
193
196
|
*/
|
|
194
|
-
function buildRecommendedTests(impact) {
|
|
195
|
-
const tests =
|
|
197
|
+
function buildRecommendedTests(impact, alwaysIncludeSubsystems = []) {
|
|
198
|
+
const tests = new Set();
|
|
199
|
+
const alwaysSet = new Set(alwaysIncludeSubsystems);
|
|
196
200
|
for (const feature of impact.impactedFeatures) {
|
|
197
|
-
|
|
201
|
+
const shouldInclude = feature.coverageStatus !== 'uncovered' ||
|
|
202
|
+
feature.playwrightSpecs.some((spec) => alwaysSet.has(inferSubsystemFromTestPath(spec)));
|
|
203
|
+
if (shouldInclude) {
|
|
198
204
|
for (const spec of feature.playwrightSpecs) {
|
|
199
|
-
|
|
200
|
-
tests.push(spec);
|
|
201
|
-
}
|
|
205
|
+
tests.add(spec);
|
|
202
206
|
}
|
|
203
207
|
}
|
|
204
208
|
}
|
|
205
|
-
return tests;
|
|
209
|
+
return [...tests];
|
|
206
210
|
}
|
|
207
|
-
export function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
|
|
211
|
+
export function buildPlanFromImpact(impact, policyOverride, aiEnrichment, adaptiveThresholds) {
|
|
208
212
|
const policy = { ...DEFAULT_POLICY, ...(policyOverride || {}) };
|
|
213
|
+
// Apply adaptive calibration overrides (if available and not explicitly overridden)
|
|
214
|
+
if (adaptiveThresholds && policyOverride?.minConfidenceForTargeted === undefined) {
|
|
215
|
+
policy.minConfidenceForTargeted = adaptiveThresholds.minConfidenceForTargeted;
|
|
216
|
+
}
|
|
217
|
+
if (adaptiveThresholds && policyOverride?.safeMergeMinConfidence === undefined) {
|
|
218
|
+
policy.safeMergeMinConfidence = adaptiveThresholds.safeMergeMinConfidence;
|
|
219
|
+
}
|
|
220
|
+
// Apply alwaysIncludeSubsystems: force their tests into the recommended set
|
|
221
|
+
const alwaysIncludeSubsystems = adaptiveThresholds?.alwaysIncludeSubsystems ?? [];
|
|
209
222
|
const confidence = computeConfidence(impact);
|
|
210
223
|
const runSetResult = pickRunSet(impact, confidence, policy);
|
|
211
224
|
const decision = buildDecision(impact, runSetResult.runSet, confidence, policy);
|
|
@@ -288,7 +301,7 @@ export function buildPlanFromImpact(impact, policyOverride, aiEnrichment) {
|
|
|
288
301
|
advisoryScenarios,
|
|
289
302
|
};
|
|
290
303
|
});
|
|
291
|
-
const recommendedTests = buildRecommendedTests(impact);
|
|
304
|
+
const recommendedTests = buildRecommendedTests(impact, alwaysIncludeSubsystems);
|
|
292
305
|
const requiredNewTests = gaps.map((f) => `${featureLabel(f)}: Add E2E tests`);
|
|
293
306
|
const p0 = impact.impactedFeatures.filter((f) => f.priority === 'P0').length;
|
|
294
307
|
const p1 = impact.impactedFeatures.filter((f) => f.priority === 'P1').length;
|
package/dist/esm/index.js
CHANGED
|
@@ -14,7 +14,7 @@ export { analyzeImpactDeterministic, recommendTestsDeterministic, handoffGenerat
|
|
|
14
14
|
export { analyzeImpact as analyzeImpactV2, getGaps, getPartialGaps } from './engine/impact_engine.js';
|
|
15
15
|
export { extractScenarios } from './engine/impact_engine.js';
|
|
16
16
|
export { buildPlanFromImpact } from './engine/plan_builder.js';
|
|
17
|
-
export { appendFeedbackAndRecompute, readCalibration, readFlakyTests } from './agent/feedback.js';
|
|
17
|
+
export { appendFeedbackAndRecompute, readCalibration, readFlakyTests, getAdaptiveThresholds } from './agent/feedback.js';
|
|
18
18
|
export { finalizeGeneratedTests } from './agent/handoff.js';
|
|
19
19
|
export { ingestTraceabilityInput } from './agent/traceability_ingest.js';
|
|
20
20
|
export { captureTraceabilityInput } from './agent/traceability_capture.js';
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
|
|
2
|
+
// See LICENSE.txt for license information.
|
|
3
|
+
import { resolve } from 'path';
|
|
4
|
+
import { runCommand } from '../agent/process_runner.js';
|
|
5
|
+
/** Env var prefixes/names stripped when running LLM-generated specs */
|
|
6
|
+
const SENSITIVE_ENV_PREFIXES = [
|
|
7
|
+
'AWS_', 'AZURE_', 'GCP_', 'GOOGLE_', 'ANTHROPIC_', 'OPENAI_',
|
|
8
|
+
'GITHUB_TOKEN', 'NPM_TOKEN', 'SSH_', 'SECRET_', 'PRIVATE_',
|
|
9
|
+
'DATABASE_URL', 'DB_', 'REDIS_', 'POSTGRES_', 'MYSQL_', 'MONGO_',
|
|
10
|
+
'API_KEY', 'API_SECRET', 'AUTH_', 'JWT_', 'STRIPE_', 'TWILIO_',
|
|
11
|
+
'SENDGRID_', 'SLACK_TOKEN', 'SLACK_BOT', 'MATTERMOST_',
|
|
12
|
+
];
|
|
13
|
+
/**
|
|
14
|
+
* Build a restricted environment for running LLM-generated spec files.
|
|
15
|
+
* Strips credentials and secrets to limit damage from malicious generated code.
|
|
16
|
+
*/
|
|
17
|
+
function buildRestrictedEnv() {
|
|
18
|
+
const env = {};
|
|
19
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
20
|
+
const isSensitive = SENSITIVE_ENV_PREFIXES.some((prefix) => key.startsWith(prefix));
|
|
21
|
+
if (!isSensitive) {
|
|
22
|
+
env[key] = value;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
return env;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Validate and normalize a spec path to prevent argument injection.
|
|
29
|
+
* Rejects raw input that starts with '-' (could be interpreted as flags by tsc/playwright).
|
|
30
|
+
*/
|
|
31
|
+
function sanitizeSpecPath(specPath) {
|
|
32
|
+
if (specPath.startsWith('-')) {
|
|
33
|
+
throw new Error(`Invalid spec path: "${specPath}" — path must not start with a dash`);
|
|
34
|
+
}
|
|
35
|
+
return resolve(specPath);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Compile-check a generated spec file using tsc --noEmit.
|
|
39
|
+
* Returns success: true if compilation succeeds, or errors array on failure.
|
|
40
|
+
*/
|
|
41
|
+
export function compileCheckSpec(specPath, testsRoot) {
|
|
42
|
+
const safeSpecPath = sanitizeSpecPath(specPath);
|
|
43
|
+
const result = runCommand('npx', ['tsc', '--noEmit', '--esModuleInterop', '--resolveJsonModule', '--moduleResolution', 'node', '--target', 'ES2020', safeSpecPath], testsRoot, 30000, buildRestrictedEnv());
|
|
44
|
+
if (result.status === 0) {
|
|
45
|
+
return { success: true, errors: [] };
|
|
46
|
+
}
|
|
47
|
+
const output = [result.stdout, result.stderr].filter(Boolean).join('\n');
|
|
48
|
+
const errorLines = output.split('\n')
|
|
49
|
+
.filter((l) => l.includes('error TS') || l.includes('Error:'))
|
|
50
|
+
.slice(0, 10);
|
|
51
|
+
return {
|
|
52
|
+
success: false,
|
|
53
|
+
errors: errorLines.length > 0 ? errorLines : [output.slice(0, 500) || 'Compilation failed'],
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Smoke-run a generated spec against a running app.
|
|
58
|
+
* Runs in a restricted environment with sensitive env vars stripped.
|
|
59
|
+
* Returns success: true if the test passes with retries.
|
|
60
|
+
*/
|
|
61
|
+
export function smokeRunSpec(specPath, testsRoot, playwrightBinary) {
|
|
62
|
+
const safeSpecPath = sanitizeSpecPath(specPath);
|
|
63
|
+
const result = runCommand(playwrightBinary, ['test', safeSpecPath, '--retries', '2', '--reporter', 'list'], testsRoot, 120000, buildRestrictedEnv());
|
|
64
|
+
if (result.status === 0) {
|
|
65
|
+
return { success: true };
|
|
66
|
+
}
|
|
67
|
+
const output = [result.stdout, result.stderr].filter(Boolean).join('\n');
|
|
68
|
+
const errorLines = output.split('\n')
|
|
69
|
+
.filter((l) => l.includes('Error') || l.includes('FAILED') || l.includes('Timeout'))
|
|
70
|
+
.slice(0, 5);
|
|
71
|
+
return {
|
|
72
|
+
success: false,
|
|
73
|
+
error: errorLines.join('\n') || result.error || 'Smoke run failed',
|
|
74
|
+
};
|
|
75
|
+
}
|