open-multi-agent-kit 0.78.2 → 0.78.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +27 -2
  2. package/MATURITY.md +2 -2
  3. package/README.md +4 -4
  4. package/dist/benchmark/contracts.d.ts +116 -0
  5. package/dist/benchmark/contracts.js +6 -0
  6. package/dist/benchmark/fixtures.d.ts +11 -0
  7. package/dist/benchmark/fixtures.js +124 -0
  8. package/dist/benchmark/harness.d.ts +13 -0
  9. package/dist/benchmark/harness.js +191 -0
  10. package/dist/benchmark/shadow-mode.d.ts +17 -0
  11. package/dist/benchmark/shadow-mode.js +95 -0
  12. package/dist/cli/release-promotion-gate.js +14 -4
  13. package/dist/commands/merge.js +102 -56
  14. package/dist/contracts/provider-health.d.ts +37 -0
  15. package/dist/contracts/provider-health.js +49 -1
  16. package/dist/evidence/evidence-trust-score.d.ts +101 -0
  17. package/dist/evidence/evidence-trust-score.js +408 -0
  18. package/dist/evidence/index.d.ts +2 -0
  19. package/dist/evidence/index.js +1 -0
  20. package/dist/native/linux-x64/omk-safety +0 -0
  21. package/dist/orchestration/merge-arbiter.d.ts +91 -0
  22. package/dist/orchestration/merge-arbiter.js +376 -0
  23. package/dist/providers/health.d.ts +3 -0
  24. package/dist/providers/health.js +46 -0
  25. package/dist/providers/index.d.ts +1 -0
  26. package/dist/providers/index.js +1 -0
  27. package/dist/providers/provider-health.d.ts +8 -1
  28. package/dist/providers/provider-health.js +39 -0
  29. package/dist/providers/provider-task-runner.js +31 -0
  30. package/dist/providers/provider.d.ts +2 -0
  31. package/dist/providers/router.js +80 -3
  32. package/dist/providers/types.d.ts +4 -0
  33. package/dist/runtime/contracts/weakness-remediation.d.ts +6 -0
  34. package/dist/runtime/provider-maturity-gate.d.ts +2 -0
  35. package/dist/runtime/provider-maturity-gate.js +26 -0
  36. package/dist/runtime/tool-dispatch-contracts.d.ts +24 -3
  37. package/dist/runtime/tool-dispatch-contracts.js +42 -2
  38. package/dist/runtime/weakness-remediation-index.d.ts +1 -1
  39. package/dist/runtime/weakness-remediation-index.js +1 -1
  40. package/dist/safety/enforcement-engine.d.ts +89 -0
  41. package/dist/safety/enforcement-engine.js +279 -0
  42. package/dist/safety/tool-authority-gate.d.ts +40 -0
  43. package/dist/safety/tool-authority-gate.js +92 -0
  44. package/dist/schema/evidence.schema.d.ts +2 -2
  45. package/dist/schema/proof-bundle.schema.d.ts +2 -2
  46. package/docs/benchmark-design.md +122 -0
  47. package/docs/getting-started.md +1 -1
  48. package/docs/provider-maturity.md +1 -1
  49. package/docs/versioning.md +3 -3
  50. package/package.json +7 -3
@@ -0,0 +1,408 @@
1
+ /**
2
+ * Evidence Trust Score (ETS) v2 — Algorithm 10
3
+ *
4
+ * Pipeline:
5
+ * ClaimExtractor(output) → RequiredEvidence(claim, taskType, risk)
6
+ * → EvidenceCollector(runArtifacts) → EvidenceVerifier(required, collected)
7
+ * → EvidenceTrustScore() → Pass | Warn | Fail
8
+ *
9
+ * Formula:
10
+ * ETS = 0.30*reproducibility + 0.25*independence + 0.20*coverage_relevance
11
+ * + 0.15*provenance_integrity + 0.10*freshness
12
+ * - gaming_penalty - stale_result_penalty - unverifiable_claim_penalty
13
+ */
14
+ import { readFile } from "node:fs/promises";
15
+ import { existsSync } from "node:fs";
16
+ import { join } from "node:path";
17
+ // ─── Constants ─────────────────────────────────────────────────────────────
18
+ const WEIGHTS = {
19
+ reproducibility: 0.30,
20
+ independence: 0.25,
21
+ coverageRelevance: 0.20,
22
+ provenanceIntegrity: 0.15,
23
+ freshness: 0.10,
24
+ };
25
+ const STALE_HOURS_BY_RISK = {
26
+ low: 72,
27
+ medium: 48,
28
+ high: 24,
29
+ critical: 6,
30
+ };
31
+ const CLAIM_PATTERNS = [
32
+ { category: "test", regex: /\b(tests?\s+pass(?:ed|es|ing)|test\s+coverage|all\s+tests?\s+(?:ok|green)|\bnpm\s+test|\bnode\s+--test)/i },
33
+ { category: "build", regex: /\b(build\s+(?:ok|success|succeeded|pass(?:ed|es|ing))|npm\s+run\s+build|tsc\s+.*(?:no\s+error|success)|esbuild|vite\s+build)/i },
34
+ { category: "typecheck", regex: /\b(typecheck\s+(?:ok|pass(?:ed|es|ing)|clean)|tsc\s+--noEmit|no\s+type\s+errors?)/i },
35
+ { category: "lint", regex: /\b(lint\s+(?:ok|pass(?:ed|es|ing)|clean)|eslint.*(?:no\s+error|0\s+(?:problem|warning))|prettier.*check)/i },
36
+ { category: "security", regex: /\b(secur(?:ity|e)\s+(?:ok|pass(?:ed|es|ing)|scan\s+(?:clean|passed))|secret.*scan|audit.*pass|vulnerability.*0)/i },
37
+ { category: "performance", regex: /\b(performance\s+(?:ok|pass(?:ed|es|ing)|improved)|latency.*\d+ms|throughput)/i },
38
+ { category: "docs", regex: /\b(docs?\s+(?:ok|pass(?:ed|es|ing)|updated)|readme.*updated|changelog.*updated)/i },
39
+ { category: "behavioral", regex: /\b(fix(?:ed|es)\s+(?:bug|issue)|feature\s+(?:works?|implemented)|behavior\s+(?:correct|as\s+expected))/i },
40
+ ];
41
+ // ─── Claim Extractor ───────────────────────────────────────────────────────
42
+ export function extractClaims(output) {
43
+ const claims = [];
44
+ const seen = new Set();
45
+ let claimIndex = 0;
46
+ for (const { category, regex } of CLAIM_PATTERNS) {
47
+ const matches = output.match(regex);
48
+ if (matches) {
49
+ for (const match of matches) {
50
+ const key = `${category}:${match.toLowerCase()}`;
51
+ if (seen.has(key))
52
+ continue;
53
+ seen.add(key);
54
+ claims.push({
55
+ claimId: `claim-${category}-${claimIndex++}`,
56
+ text: match,
57
+ category,
58
+ confidence: 0.8,
59
+ });
60
+ }
61
+ }
62
+ }
63
+ return Object.freeze(claims);
64
+ }
65
+ // ─── Required Evidence ─────────────────────────────────────────────────────
66
+ export function requiredEvidenceForClaim(claim, taskType, risk) {
67
+ const required = [];
68
+ const baseKinds = ["command", "trace"];
69
+ const categoryKindMap = {
70
+ test: ["test", "metric"],
71
+ build: ["metric"],
72
+ typecheck: ["metric"],
73
+ lint: ["metric", "audit"],
74
+ security: ["audit", "screenshot"],
75
+ performance: ["metric", "trace"],
76
+ docs: ["diff", "screenshot"],
77
+ behavioral: ["diff", "test"],
78
+ };
79
+ const kinds = [...baseKinds, ...(categoryKindMap[claim.category] ?? [])];
80
+ for (let i = 0; i < kinds.length; i++) {
81
+ required.push({
82
+ evidenceId: `${claim.claimId}-req-${i}`,
83
+ kind: kinds[i],
84
+ description: `Required ${kinds[i]} evidence for ${claim.category} claim`,
85
+ minConfidence: risk === "critical" ? 0.95 : risk === "high" ? 0.85 : risk === "medium" ? 0.75 : 0.6,
86
+ });
87
+ }
88
+ // High/critical risk adds extra audit trail
89
+ if (risk === "high" || risk === "critical") {
90
+ required.push({
91
+ evidenceId: `${claim.claimId}-req-audit`,
92
+ kind: "audit",
93
+ description: `Audit trail for ${risk} risk task`,
94
+ minConfidence: 0.9,
95
+ });
96
+ }
97
+ // Critical tasks require screenshot or review evidence
98
+ if (risk === "critical") {
99
+ required.push({
100
+ evidenceId: `${claim.claimId}-req-review`,
101
+ kind: "review",
102
+ description: `Review evidence for critical risk task`,
103
+ minConfidence: 0.9,
104
+ });
105
+ }
106
+ return Object.freeze(required);
107
+ }
108
+ // ─── Evidence Collector ────────────────────────────────────────────────────
109
+ export async function collectEvidenceFromRunDir(runDir, meta) {
110
+ const items = [];
111
+ const evidenceJsonlPath = join(runDir, "evidence.jsonl");
112
+ if (existsSync(evidenceJsonlPath)) {
113
+ try {
114
+ const content = await readFile(evidenceJsonlPath, "utf8");
115
+ const lines = content.split(/\r?\n/).filter((l) => l.trim().length > 0);
116
+ for (const line of lines) {
117
+ try {
118
+ const parsed = JSON.parse(line);
119
+ if (isObject(parsed)) {
120
+ const item = evidenceItemFromRecord(parsed);
121
+ if (item)
122
+ items.push(item);
123
+ }
124
+ }
125
+ catch { /* ignore parse errors */ }
126
+ }
127
+ }
128
+ catch { /* ignore read errors */ }
129
+ }
130
+ return { items: Object.freeze(items), meta };
131
+ }
132
+ function evidenceItemFromRecord(record) {
133
+ const kind = parseEvidenceKind(record.kind);
134
+ const verdict = parseEvidenceVerdict(record.status);
135
+ if (!kind || !verdict)
136
+ return null;
137
+ return {
138
+ id: String(record.evidenceId ?? record.id ?? ""),
139
+ kind,
140
+ source: String(record.source ?? record.nodeId ?? "unknown"),
141
+ description: String(record.message ?? record.description ?? ""),
142
+ verdict,
143
+ timestamp: String(record.observedAt ?? record.timestamp ?? new Date().toISOString()),
144
+ confidence: typeof record.confidence === "number" ? record.confidence : 0.8,
145
+ linkedTraceId: record.linkedTraceId ? String(record.linkedTraceId) : undefined,
146
+ linkedFilePaths: Array.isArray(record.linkedFilePaths)
147
+ ? record.linkedFilePaths
148
+ : record.path
149
+ ? [String(record.path)]
150
+ : [],
151
+ metadata: record.metadata && isObject(record.metadata) ? record.metadata : undefined,
152
+ };
153
+ }
154
+ function parseEvidenceKind(value) {
155
+ const kinds = ["test", "diff", "command", "screenshot", "trace", "metric", "audit", "review"];
156
+ return kinds.find((k) => k === value) ?? null;
157
+ }
158
+ function parseEvidenceVerdict(value) {
159
+ const verdicts = ["pass", "fail", "partial", "pending"];
160
+ // Map evidence schema statuses to verdicts
161
+ if (value === "passed")
162
+ return "pass";
163
+ if (value === "failed")
164
+ return "fail";
165
+ if (value === "missing" || value === "skipped" || value === "blocked")
166
+ return "pending";
167
+ return verdicts.find((v) => v === value) ?? null;
168
+ }
169
+ // ─── Evidence Verifier ─────────────────────────────────────────────────────
170
+ export function verifyEvidence(required, collected) {
171
+ const satisfied = [];
172
+ const missing = [];
173
+ const partial = [];
174
+ for (const req of required) {
175
+ const matches = collected.items.filter((item) => item.kind === req.kind &&
176
+ item.confidence >= req.minConfidence &&
177
+ (item.verdict === "pass" || item.verdict === "partial"));
178
+ if (matches.length === 0) {
179
+ missing.push(req.evidenceId);
180
+ }
181
+ else if (matches.some((m) => m.verdict === "pass")) {
182
+ satisfied.push(req.evidenceId);
183
+ }
184
+ else {
185
+ partial.push(req.evidenceId);
186
+ }
187
+ }
188
+ return { satisfied: Object.freeze(satisfied), missing: Object.freeze(missing), partial: Object.freeze(partial) };
189
+ }
190
+ // ─── Sub-score Computers ───────────────────────────────────────────────────
191
+ function computeReproducibility(meta) {
192
+ let score = 0;
193
+ let max = 0;
194
+ // commandHash present
195
+ if (meta.commandHash && meta.commandHash.length > 0) {
196
+ score += 0.4;
197
+ }
198
+ max += 0.4;
199
+ // treeHashBefore present
200
+ if (meta.treeHashBefore && meta.treeHashBefore.length > 0) {
201
+ score += 0.3;
202
+ }
203
+ max += 0.3;
204
+ // treeHashAfter present
205
+ if (meta.treeHashAfter && meta.treeHashAfter.length > 0) {
206
+ score += 0.3;
207
+ }
208
+ max += 0.3;
209
+ return max > 0 ? score / max : 0;
210
+ }
211
+ function computeIndependence(collected) {
212
+ if (collected.items.length === 0)
213
+ return 0;
214
+ const independentSources = new Set(["runner", "command", "shell", "test", "ci"]);
215
+ let independentCount = 0;
216
+ for (const item of collected.items) {
217
+ const sourceLower = item.source.toLowerCase();
218
+ if (independentSources.has(sourceLower) ||
219
+ item.kind === "test" ||
220
+ item.kind === "command" ||
221
+ item.kind === "metric") {
222
+ independentCount++;
223
+ }
224
+ }
225
+ return independentCount / collected.items.length;
226
+ }
227
+ function computeCoverageRelevance(collected, dependencyGraphFiles) {
228
+ if (collected.items.length === 0)
229
+ return 0;
230
+ const linkedCount = collected.items.filter((item) => {
231
+ if (item.linkedFilePaths.length > 0)
232
+ return true;
233
+ if (dependencyGraphFiles && dependencyGraphFiles.length > 0) {
234
+ // If item description mentions a file in the dependency graph
235
+ return dependencyGraphFiles.some((f) => item.description.includes(f));
236
+ }
237
+ return false;
238
+ }).length;
239
+ return linkedCount / collected.items.length;
240
+ }
241
+ function computeProvenanceIntegrity(meta) {
242
+ const fields = [
243
+ "runId",
244
+ "provider",
245
+ "model",
246
+ "cwd",
247
+ "treeHashBefore",
248
+ "treeHashAfter",
249
+ "commandHash",
250
+ ];
251
+ const optionalFields = ["nodeId"];
252
+ const allFields = [...fields, ...optionalFields];
253
+ let present = 0;
254
+ for (const field of allFields) {
255
+ const value = meta[field];
256
+ if (typeof value === "string" && value.length > 0) {
257
+ present++;
258
+ }
259
+ }
260
+ return present / allFields.length;
261
+ }
262
+ function computeFreshness(collected, risk, nowIso) {
263
+ if (collected.items.length === 0)
264
+ return 0;
265
+ const now = new Date(nowIso).getTime();
266
+ const staleThresholdMs = STALE_HOURS_BY_RISK[risk] * 60 * 60 * 1000;
267
+ let totalScore = 0;
268
+ for (const item of collected.items) {
269
+ const itemTime = new Date(item.timestamp).getTime();
270
+ const ageMs = now - itemTime;
271
+ if (ageMs < 0 || Number.isNaN(ageMs)) {
272
+ totalScore += 1.0; // Future/now timestamp = fresh
273
+ continue;
274
+ }
275
+ if (ageMs <= staleThresholdMs) {
276
+ totalScore += 1.0;
277
+ }
278
+ else {
279
+ // Linear decay over next 2x threshold
280
+ const decayWindow = staleThresholdMs * 2;
281
+ const decayed = Math.max(0, 1 - (ageMs - staleThresholdMs) / decayWindow);
282
+ totalScore += decayed;
283
+ }
284
+ }
285
+ return totalScore / collected.items.length;
286
+ }
287
+ // ─── Penalty Computers ─────────────────────────────────────────────────────
288
+ function computeGamingPenalty(claims, collected, verification) {
289
+ let penalty = 0;
290
+ // Penalty if claims outnumber independently-sourced evidence
291
+ const independentItems = collected.items.filter((item) => item.source !== "agent" &&
292
+ item.source !== "self" &&
293
+ item.source !== "unknown");
294
+ if (claims.length > 0 && independentItems.length === 0) {
295
+ penalty += 0.15;
296
+ }
297
+ // Penalty if many claims but few verified
298
+ const claimToVerifiedRatio = claims.length > 0 ? verification.satisfied.length / claims.length : 1;
299
+ if (claimToVerifiedRatio < 0.5) {
300
+ penalty += 0.1;
301
+ }
302
+ // Penalty if all evidence is self-reported (agent-sourced)
303
+ const allAgentSourced = collected.items.length > 0 &&
304
+ collected.items.every((item) => item.source === "agent" ||
305
+ item.source === "self" ||
306
+ item.source === "unknown");
307
+ if (allAgentSourced) {
308
+ penalty += 0.1;
309
+ }
310
+ return Math.min(penalty, 0.3);
311
+ }
312
+ function computeStaleResultPenalty(collected, risk, nowIso) {
313
+ const now = new Date(nowIso).getTime();
314
+ const staleThresholdMs = STALE_HOURS_BY_RISK[risk] * 60 * 60 * 1000;
315
+ let staleCount = 0;
316
+ for (const item of collected.items) {
317
+ const itemTime = new Date(item.timestamp).getTime();
318
+ const ageMs = now - itemTime;
319
+ if (ageMs > staleThresholdMs) {
320
+ staleCount++;
321
+ }
322
+ }
323
+ return Math.min(staleCount * 0.05, 0.2);
324
+ }
325
+ function computeUnverifiableClaimPenalty(claims, verification) {
326
+ if (claims.length === 0)
327
+ return 0;
328
+ const unverifiedCount = verification.missing.length;
329
+ return Math.min(unverifiedCount * 0.05, 0.3);
330
+ }
331
+ // ─── Verdict ───────────────────────────────────────────────────────────────
332
+ function computeVerdict(score) {
333
+ if (score >= 0.75)
334
+ return "pass";
335
+ if (score >= 0.50)
336
+ return "warn";
337
+ return "fail";
338
+ }
339
+ export function createEvidenceTrustScoreV2Engine(options) {
340
+ const weights = { ...WEIGHTS, ...options?.customWeights };
341
+ const now = options?.now ?? new Date().toISOString();
342
+ return {
343
+ async evaluate(params) {
344
+ const claims = extractClaims(params.output);
345
+ const allRequired = [];
346
+ for (const claim of claims) {
347
+ allRequired.push(...requiredEvidenceForClaim(claim, params.taskType, params.risk));
348
+ }
349
+ const verification = verifyEvidence(allRequired, params.runArtifacts);
350
+ const reproducibility = computeReproducibility(params.runArtifacts.meta);
351
+ const independence = computeIndependence(params.runArtifacts);
352
+ const coverageRelevance = computeCoverageRelevance(params.runArtifacts, params.dependencyGraphFiles);
353
+ const provenanceIntegrity = computeProvenanceIntegrity(params.runArtifacts.meta);
354
+ const freshness = computeFreshness(params.runArtifacts, params.risk, params.now ?? now);
355
+ const gamingPenalty = computeGamingPenalty(claims, params.runArtifacts, verification);
356
+ const staleResultPenalty = computeStaleResultPenalty(params.runArtifacts, params.risk, params.now ?? now);
357
+ const unverifiableClaimPenalty = computeUnverifiableClaimPenalty(claims, verification);
358
+ let score = weights.reproducibility * reproducibility +
359
+ weights.independence * independence +
360
+ weights.coverageRelevance * coverageRelevance +
361
+ weights.provenanceIntegrity * provenanceIntegrity +
362
+ weights.freshness * freshness -
363
+ gamingPenalty -
364
+ staleResultPenalty -
365
+ unverifiableClaimPenalty;
366
+ score = Math.max(0, Math.min(1, Math.round(score * 1000) / 1000));
367
+ const reasons = [];
368
+ if (reproducibility < 0.5)
369
+ reasons.push("reproducibility below 0.5");
370
+ if (independence < 0.5)
371
+ reasons.push("independence below 0.5");
372
+ if (coverageRelevance < 0.5)
373
+ reasons.push("coverage_relevance below 0.5");
374
+ if (provenanceIntegrity < 0.5)
375
+ reasons.push("provenance_integrity below 0.5");
376
+ if (freshness < 0.5)
377
+ reasons.push("freshness below 0.5");
378
+ if (gamingPenalty > 0)
379
+ reasons.push(`gaming_penalty=${gamingPenalty.toFixed(3)}`);
380
+ if (staleResultPenalty > 0)
381
+ reasons.push(`stale_result_penalty=${staleResultPenalty.toFixed(3)}`);
382
+ if (unverifiableClaimPenalty > 0)
383
+ reasons.push(`unverifiable_claim_penalty=${unverifiableClaimPenalty.toFixed(3)}`);
384
+ if (verification.missing.length > 0)
385
+ reasons.push(`missing evidence: ${verification.missing.length} items`);
386
+ const verdict = computeVerdict(score);
387
+ return {
388
+ score,
389
+ reproducibility: Math.round(reproducibility * 1000) / 1000,
390
+ independence: Math.round(independence * 1000) / 1000,
391
+ coverageRelevance: Math.round(coverageRelevance * 1000) / 1000,
392
+ provenanceIntegrity: Math.round(provenanceIntegrity * 1000) / 1000,
393
+ freshness: Math.round(freshness * 1000) / 1000,
394
+ gamingPenalty: Math.round(gamingPenalty * 1000) / 1000,
395
+ staleResultPenalty: Math.round(staleResultPenalty * 1000) / 1000,
396
+ unverifiableClaimPenalty: Math.round(unverifiableClaimPenalty * 1000) / 1000,
397
+ verdict,
398
+ reasons: Object.freeze(reasons),
399
+ };
400
+ },
401
+ };
402
+ }
403
+ // ─── Helpers ───────────────────────────────────────────────────────────────
404
+ function isObject(value) {
405
+ return value !== null && typeof value === "object" && !Array.isArray(value);
406
+ }
407
+ // ─── Backward-compat: re-export as EvidenceTrustScore for integration ──────
408
+ export { createEvidenceTrustScoreV2Engine as createEvidenceTrustScore };
@@ -15,5 +15,7 @@ export type { DecisionTraceStore } from "./decision-trace.js";
15
15
  export { createDecisionTraceStore } from "./decision-trace.js";
16
16
  export type { ProofTrustMvpEngine, ProofTrustResult } from "./proof-trust.js";
17
17
  export { createProofTrustMvpEngine } from "./proof-trust.js";
18
+ export type { EtsClaim, EtsClaimCategory, EtsTaskType, EtsRiskTier, RequiredEvidenceItem, RunArtifactMeta, CollectedEvidence, EvidenceVerificationResult, EtsV2Result, EtsV2Engine, EtsV2Params, EtsV2EngineOptions, } from "./evidence-trust-score.js";
19
+ export { extractClaims, requiredEvidenceForClaim, collectEvidenceFromRunDir, verifyEvidence, createEvidenceTrustScoreV2Engine, createEvidenceTrustScore, } from "./evidence-trust-score.js";
18
20
  export type { AlgorithmSpec, ReleaseCandidate, RegressionProofMatrixResult, RegressionProofMatrixEngine, RegressionProofMatrixOptions, } from "./regression-proof-matrix.js";
19
21
  export { createRegressionProofMatrixEngine } from "./regression-proof-matrix.js";
@@ -6,4 +6,5 @@ export { createRunTraceStore } from "./run-trace.js";
6
6
  export { decideRepair } from "../orchestration/repair-policy.js";
7
7
  export { createDecisionTraceStore } from "./decision-trace.js";
8
8
  export { createProofTrustMvpEngine } from "./proof-trust.js";
9
+ export { extractClaims, requiredEvidenceForClaim, collectEvidenceFromRunDir, verifyEvidence, createEvidenceTrustScoreV2Engine, createEvidenceTrustScore, } from "./evidence-trust-score.js";
9
10
  export { createRegressionProofMatrixEngine } from "./regression-proof-matrix.js";
Binary file
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Merge Arbiter — patch scoring + conflict detection + winner selection.
3
+ *
4
+ * Pipeline:
5
+ * CollectCandidatePatches → NormalizeDiffs → RunEvidenceSuite → ScorePatch
6
+ * → DetectConflicts → SelectWinnerOrHybrid → ProduceMergeRationale
7
+ */
8
+ export interface CandidatePatch {
9
+ id: string;
10
+ name: string;
11
+ path: string;
12
+ diff: string;
13
+ normalizedDiff: string;
14
+ fileScopes: string[];
15
+ diffLines: number;
16
+ canApply: boolean;
17
+ conflictsWith: string[];
18
+ evidence: PatchEvidence;
19
+ scores: PatchScores;
20
+ compositeScore: number;
21
+ }
22
+ export interface PatchEvidence {
23
+ testsPassed: boolean;
24
+ lintPassed: boolean;
25
+ typecheckPassed: boolean;
26
+ reviewerScore?: number;
27
+ reviewerReason?: string;
28
+ evidenceTrustScore: number;
29
+ }
30
+ export interface PatchScores {
31
+ testPassScore: number;
32
+ evidenceTrustScore: number;
33
+ minimalityScore: number;
34
+ lintTypecheckScore: number;
35
+ conflictFreeScore: number;
36
+ reviewerAgreementScore: number;
37
+ }
38
+ export interface MergeArbiterResult {
39
+ winner: CandidatePatch | null;
40
+ requiresHumanApproval: boolean;
41
+ rationale: MergeRationale;
42
+ trace: MergeTrace;
43
+ }
44
+ export interface MergeRationale {
45
+ summary: string;
46
+ winnerId: string | null;
47
+ scoreBreakdown: Record<string, number>;
48
+ conflicts: string[];
49
+ threshold: number;
50
+ humanApprovalReason?: string;
51
+ }
52
+ export interface MergeTrace {
53
+ steps: MergeTraceStep[];
54
+ timestamp: string;
55
+ }
56
+ export interface MergeTraceStep {
57
+ step: string;
58
+ candidateId: string;
59
+ detail: string;
60
+ durationMs?: number;
61
+ }
62
+ export interface MergeArbiterOptions {
63
+ /** Minimum composite score (0–1) for auto-approval. */
64
+ threshold?: number;
65
+ /** Max diff lines before minimality score hits zero. */
66
+ maxDiffLines?: number;
67
+ /** Timeout for test execution in worktrees (ms). */
68
+ testTimeoutMs?: number;
69
+ /** Timeout for git apply --check (ms). */
70
+ applyCheckTimeoutMs?: number;
71
+ }
72
+ export declare function collectCandidatePatches(worktreesDir: string, currentBranch: string, options?: MergeArbiterOptions): Promise<CandidatePatch[]>;
73
+ export declare function normalizeDiff(diff: string): string;
74
+ export declare function extractFileScopes(diff: string): string[];
75
+ export declare function runEvidenceSuite(candidate: CandidatePatch, projectRoot: string, config: string, options?: MergeArbiterOptions): Promise<CandidatePatch>;
76
+ export declare function scorePatch(candidate: CandidatePatch, options?: MergeArbiterOptions): CandidatePatch;
77
+ export declare function detectConflicts(candidates: CandidatePatch[]): CandidatePatch[];
78
+ export declare function selectWinnerOrHybrid(candidates: CandidatePatch[], options?: MergeArbiterOptions): {
79
+ winner: CandidatePatch | null;
80
+ requiresHumanApproval: boolean;
81
+ reason?: string;
82
+ };
83
+ export declare function produceMergeRationale(candidates: CandidatePatch[], selection: {
84
+ winner: CandidatePatch | null;
85
+ requiresHumanApproval: boolean;
86
+ reason?: string;
87
+ }, options?: MergeArbiterOptions): {
88
+ rationale: MergeRationale;
89
+ trace: MergeTrace;
90
+ };
91
+ export declare function runMergeArbiter(worktreesDir: string, currentBranch: string, projectRoot: string, config: string, options?: MergeArbiterOptions): Promise<MergeArbiterResult>;