open-multi-agent-kit 0.78.1 → 0.78.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/MATURITY.md +4 -0
- package/README.md +70 -1
- package/dist/benchmark/contracts.d.ts +116 -0
- package/dist/benchmark/contracts.js +6 -0
- package/dist/benchmark/fixtures.d.ts +11 -0
- package/dist/benchmark/fixtures.js +121 -0
- package/dist/benchmark/harness.d.ts +13 -0
- package/dist/benchmark/harness.js +191 -0
- package/dist/benchmark/shadow-mode.d.ts +17 -0
- package/dist/benchmark/shadow-mode.js +96 -0
- package/dist/cli/register-spec-agent-goal-commands.js +45 -0
- package/dist/cli/release-promotion-gate.d.ts +14 -0
- package/dist/cli/release-promotion-gate.js +71 -0
- package/dist/cli/v2/release-commands.d.ts +29 -0
- package/dist/cli/v2/release-commands.js +95 -0
- package/dist/commands/chat/native-root-loop.js +14 -1
- package/dist/commands/chat/slash/commands/session.js +19 -1
- package/dist/commands/goal-interview.d.ts +18 -0
- package/dist/commands/goal-interview.js +396 -0
- package/dist/commands/merge.js +102 -56
- package/dist/contracts/interview.d.ts +106 -0
- package/dist/contracts/interview.js +9 -0
- package/dist/contracts/provider-health.d.ts +37 -0
- package/dist/contracts/provider-health.js +49 -1
- package/dist/evidence/evidence-trust-score.d.ts +101 -0
- package/dist/evidence/evidence-trust-score.js +408 -0
- package/dist/evidence/index.d.ts +6 -0
- package/dist/evidence/index.js +3 -0
- package/dist/evidence/proof-trust-cli.d.ts +8 -0
- package/dist/evidence/proof-trust-cli.js +27 -0
- package/dist/evidence/proof-trust.d.ts +14 -0
- package/dist/evidence/proof-trust.js +381 -0
- package/dist/evidence/regression-proof-matrix.d.ts +42 -0
- package/dist/evidence/regression-proof-matrix.js +72 -0
- package/dist/goal/intent-frame.d.ts +6 -0
- package/dist/goal/intent-frame.js +21 -9
- package/dist/goal/interview-assimilation.d.ts +13 -0
- package/dist/goal/interview-assimilation.js +383 -0
- package/dist/goal/interview-question-bank.d.ts +11 -0
- package/dist/goal/interview-question-bank.js +225 -0
- package/dist/goal/interview-scoring.d.ts +31 -0
- package/dist/goal/interview-scoring.js +187 -0
- package/dist/goal/interview-session.d.ts +25 -0
- package/dist/goal/interview-session.js +116 -0
- package/dist/input/input-envelope.d.ts +22 -0
- package/dist/input/input-envelope.js +1 -0
- package/dist/orchestration/merge-arbiter.d.ts +91 -0
- package/dist/orchestration/merge-arbiter.js +376 -0
- package/dist/providers/health.d.ts +3 -0
- package/dist/providers/health.js +46 -0
- package/dist/providers/index.d.ts +1 -0
- package/dist/providers/index.js +1 -0
- package/dist/providers/provider-health.d.ts +8 -1
- package/dist/providers/provider-health.js +39 -0
- package/dist/providers/provider-task-runner.js +31 -0
- package/dist/providers/provider.d.ts +2 -0
- package/dist/providers/router.js +87 -3
- package/dist/providers/types.d.ts +4 -0
- package/dist/runtime/advanced-control-loop.d.ts +60 -0
- package/dist/runtime/advanced-control-loop.js +136 -0
- package/dist/runtime/agent-runtime.d.ts +10 -0
- package/dist/runtime/blast-radius.d.ts +10 -0
- package/dist/runtime/blast-radius.js +14 -0
- package/dist/runtime/contracts/evidence.d.ts +87 -0
- package/dist/runtime/contracts/evidence.js +7 -0
- package/dist/runtime/contracts/router-v2.d.ts +44 -0
- package/dist/runtime/contracts/router-v2.js +4 -0
- package/dist/runtime/contracts/weakness-remediation.d.ts +67 -0
- package/dist/runtime/contracts/weakness-remediation.js +36 -0
- package/dist/runtime/kimi-api-runtime.js +59 -1
- package/dist/runtime/proof-bundle-trust.d.ts +74 -0
- package/dist/runtime/proof-bundle-trust.js +100 -0
- package/dist/runtime/provider-maturity-gate.d.ts +43 -0
- package/dist/runtime/provider-maturity-gate.js +129 -0
- package/dist/runtime/public-surface.d.ts +93 -0
- package/dist/runtime/public-surface.js +146 -0
- package/dist/runtime/router-v2-scoring.d.ts +11 -0
- package/dist/runtime/router-v2-scoring.js +151 -0
- package/dist/runtime/tool-dispatch-contracts.d.ts +24 -3
- package/dist/runtime/tool-dispatch-contracts.js +42 -2
- package/dist/runtime/weakness-remediation-index.d.ts +27 -0
- package/dist/runtime/weakness-remediation-index.js +37 -0
- package/dist/safety/enforcement-engine.d.ts +89 -0
- package/dist/safety/enforcement-engine.js +279 -0
- package/dist/safety/tool-authority-gate.d.ts +40 -0
- package/dist/safety/tool-authority-gate.js +92 -0
- package/dist/schema/evidence.schema.d.ts +2 -2
- package/dist/schema/proof-bundle.schema.d.ts +28 -28
- package/dist/util/clipboard-image.d.ts +49 -0
- package/dist/util/clipboard-image.js +263 -0
- package/docs/2026-06-09/critical-issues.md +20 -0
- package/docs/2026-06-09/improvements.md +14 -0
- package/docs/2026-06-09/init-checklist.md +25 -0
- package/docs/2026-06-09/plan.md +20 -0
- package/docs/benchmark-design.md +122 -0
- package/docs/github-organic-promotion.md +127 -0
- package/docs/native-root-runtime-algorithms.md +301 -0
- package/package.json +8 -4
- package/readmeasset/ASSET_INDEX.md +1 -0
- package/templates/skills/agents/omk-agent-reach-websearch/SKILL.md +55 -0
- package/templates/skills/kimi/omk-agent-reach-websearch/SKILL.md +55 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
// Module: src/goal/interview-scoring.ts
|
|
2
|
+
// OMK Deep Interview — pure deterministic scoring/ranking.
|
|
3
|
+
//
|
|
4
|
+
// 100% deterministic: no I/O, no Date.now, no randomness, no network/LLM calls.
|
|
5
|
+
// All 0..1 outputs are clamped to [0,1] and scores rounded to 2 decimals.
|
|
6
|
+
const clamp01 = (x) => Math.min(1, Math.max(0, x));
|
|
7
|
+
const round2 = (x) => Math.round(x * 100) / 100;
|
|
8
|
+
/** Max questions surfaced per interview depth. */
|
|
9
|
+
export const DEPTH_LIMITS = {
|
|
10
|
+
light: 5,
|
|
11
|
+
standard: 10,
|
|
12
|
+
deep: 18,
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Derive the ranking score for a candidate question.
|
|
16
|
+
* score = informationGain*0.35 + riskReduction*0.25 + dagImpact*0.20
|
|
17
|
+
* + evidenceImpact*0.15 - userCost*0.05, clamped to [0,1].
|
|
18
|
+
*/
|
|
19
|
+
export function scoreInterviewQuestion(input) {
|
|
20
|
+
const raw = input.informationGain * 0.35 +
|
|
21
|
+
input.riskReduction * 0.25 +
|
|
22
|
+
input.dagImpact * 0.2 +
|
|
23
|
+
input.evidenceImpact * 0.15 -
|
|
24
|
+
input.userCost * 0.05;
|
|
25
|
+
const score = round2(clamp01(raw));
|
|
26
|
+
return { ...input, score };
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Select the highest-value questions for a depth, dropping questions whose
|
|
30
|
+
* target field is already answered by the goal (unless marked required).
|
|
31
|
+
*/
|
|
32
|
+
export function selectInterviewQuestions(goal, candidates, depth, maxQuestions) {
|
|
33
|
+
const limit = maxQuestions != null ? Math.min(maxQuestions, DEPTH_LIMITS[depth]) : DEPTH_LIMITS[depth];
|
|
34
|
+
const answeredTargets = new Set();
|
|
35
|
+
if (goal) {
|
|
36
|
+
if (goal.objective?.trim())
|
|
37
|
+
answeredTargets.add("objective");
|
|
38
|
+
if (goal.successCriteria.length > 0)
|
|
39
|
+
answeredTargets.add("successCriteria");
|
|
40
|
+
if (goal.expectedArtifacts.length > 0)
|
|
41
|
+
answeredTargets.add("expectedArtifacts");
|
|
42
|
+
if (goal.constraints.length > 0)
|
|
43
|
+
answeredTargets.add("constraints");
|
|
44
|
+
if (goal.nonGoals.length > 0)
|
|
45
|
+
answeredTargets.add("nonGoals");
|
|
46
|
+
if (goal.risks.length > 0)
|
|
47
|
+
answeredTargets.add("risks");
|
|
48
|
+
}
|
|
49
|
+
const filtered = candidates.filter((q) => q.required || !answeredTargets.has(q.targetField));
|
|
50
|
+
const byScoreThenRequired = (a, b) => {
|
|
51
|
+
const byScore = b.score - a.score;
|
|
52
|
+
if (byScore !== 0)
|
|
53
|
+
return byScore;
|
|
54
|
+
return Number(b.required) - Number(a.required);
|
|
55
|
+
};
|
|
56
|
+
// Pin required questions so a small --max-questions/limit can never drop a
|
|
57
|
+
// required axis; optional questions fill the remaining slots by score.
|
|
58
|
+
const required = filtered.filter((q) => q.required).sort(byScoreThenRequired);
|
|
59
|
+
const optional = filtered.filter((q) => !q.required).sort(byScoreThenRequired);
|
|
60
|
+
const remaining = Math.max(0, limit - required.length);
|
|
61
|
+
return [...required, ...optional.slice(0, remaining)].sort(byScoreThenRequired);
|
|
62
|
+
}
|
|
63
|
+
/** Keyword/regex heuristics describing one ambiguity axis. */
|
|
64
|
+
const AMBIGUITY_AXES = [
|
|
65
|
+
{
|
|
66
|
+
weight: 0.18,
|
|
67
|
+
pattern: /\b(objective|goal|implement|build|create|add|fix|refactor|design|develop|generate|support|enable)\b/,
|
|
68
|
+
goalPresent: (s) => !!s.goal?.objective?.trim(),
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
weight: 0.18,
|
|
72
|
+
pattern: /\b(acceptance|success criteria|criteria|criterion|definition of done|done when|must pass|requirement|expected result|should)\b/,
|
|
73
|
+
goalPresent: (s) => (s.goal?.successCriteria.length ?? 0) > 0,
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
weight: 0.13,
|
|
77
|
+
pattern: /\b(test|verify|verification|check|lint|typecheck|build|coverage|assert|validate)\b/,
|
|
78
|
+
goalPresent: (s) => (s.goal?.expectedArtifacts.some((a) => a.gate === "command-pass") ?? false),
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
weight: 0.12,
|
|
82
|
+
pattern: /(\.[a-z]{2,4}\b|\bfile\b|\bpath\b|\bmodule\b|\bcomponent\b|\bdirectory\b|\bartifact\b|\boutput\b)/,
|
|
83
|
+
goalPresent: (s) => (s.goal?.expectedArtifacts.length ?? 0) > 0,
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
weight: 0.1,
|
|
87
|
+
pattern: /\b(constraint|must not|only|do not|don't|limit|restrict|without|avoid|never|forbidden)\b/,
|
|
88
|
+
goalPresent: (s) => (s.goal?.constraints.length ?? 0) > 0,
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
weight: 0.08,
|
|
92
|
+
pattern: /\b(risk|danger|safe|safety|destructive|irreversible|production|rollback|backup|prod)\b/,
|
|
93
|
+
goalPresent: (s) => (s.goal?.risks.length ?? 0) > 0 || s.riskLevel != null,
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
weight: 0.08,
|
|
97
|
+
pattern: /\b(write|edit|modify|delete|shell|command|merge|commit|push|deploy|permission|read.only|authority|scope)\b/,
|
|
98
|
+
goalPresent: (s) => !!s.goal?.intentFrame?.directives?.some((d) => d.kind === "read-only" || d.kind === "no-edits" || d.kind === "scope"),
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
weight: 0.07,
|
|
102
|
+
pattern: /\b(non-goal|out of scope|exclude|except|not (include|touch|modify|change|do)|do not (edit|modify|change|touch))\b/,
|
|
103
|
+
goalPresent: (s) => (s.goal?.nonGoals.length ?? 0) > 0,
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
weight: 0.06,
|
|
107
|
+
pattern: /\b(depend|dependenc|requires|require|prerequisite|blocked by|relies on|integrat|import)\b/,
|
|
108
|
+
goalPresent: () => false,
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
/**
|
|
112
|
+
* Ambiguity 0..1 — higher means more interview is needed. Computed as the
|
|
113
|
+
* weighted sum of axes that are neither described in the prompt nor present on
|
|
114
|
+
* the goal. Axis weights sum to 1.
|
|
115
|
+
*/
|
|
116
|
+
export function computeAmbiguity(seed) {
|
|
117
|
+
const prompt = (seed.rawPrompt ?? "").toLowerCase();
|
|
118
|
+
let missing = 0;
|
|
119
|
+
for (const axis of AMBIGUITY_AXES) {
|
|
120
|
+
const present = axis.pattern.test(prompt) || axis.goalPresent(seed);
|
|
121
|
+
if (!present)
|
|
122
|
+
missing += axis.weight;
|
|
123
|
+
}
|
|
124
|
+
return round2(clamp01(missing));
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Map an ambiguity score to a recommended depth.
|
|
128
|
+
* <0.25 light (caller may skip) · <0.50 light · <0.75 standard · else deep.
|
|
129
|
+
*/
|
|
130
|
+
export function recommendDepth(ambiguity) {
|
|
131
|
+
if (ambiguity < 0.5)
|
|
132
|
+
return "light";
|
|
133
|
+
if (ambiguity < 0.75)
|
|
134
|
+
return "standard";
|
|
135
|
+
return "deep";
|
|
136
|
+
}
|
|
137
|
+
/** Per-axis completeness: 0.6 if goal field populated, +0.4 if a finding matches. */
|
|
138
|
+
function axisScore(goalPopulated, fields, findings) {
|
|
139
|
+
const hasFinding = findings.some((f) => fields.includes(String(f.field)));
|
|
140
|
+
let score = 0;
|
|
141
|
+
if (goalPopulated)
|
|
142
|
+
score += 0.6;
|
|
143
|
+
if (hasFinding)
|
|
144
|
+
score += 0.4;
|
|
145
|
+
return round2(clamp01(score));
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Completeness across required and supporting axes, blending populated goal
|
|
149
|
+
* fields with assimilated interview findings.
|
|
150
|
+
*/
|
|
151
|
+
export function computeCompleteness(goal, findings) {
|
|
152
|
+
const objective = axisScore(!!goal?.objective?.trim(), ["objective"], findings);
|
|
153
|
+
const successCriteria = axisScore((goal?.successCriteria.length ?? 0) > 0, ["successCriteria"], findings);
|
|
154
|
+
const evidence = axisScore((goal?.expectedArtifacts.some((a) => !!a.gate) ?? false) ||
|
|
155
|
+
(goal?.successCriteria.some((c) => c.requirement === "required") ?? false), ["evidence"], findings);
|
|
156
|
+
const artifacts = axisScore((goal?.expectedArtifacts.length ?? 0) > 0, ["expectedArtifacts", "artifact", "artifacts"], findings);
|
|
157
|
+
const constraints = axisScore((goal?.constraints.length ?? 0) > 0, ["constraints"], findings);
|
|
158
|
+
const risks = axisScore((goal?.risks.length ?? 0) > 0, ["risks", "riskLevel"], findings);
|
|
159
|
+
const authority = axisScore(!!goal?.intentFrame?.directives?.some((d) => d.kind === "read-only" || d.kind === "no-edits" || d.kind === "scope"), ["authority", "intentFrame"], findings);
|
|
160
|
+
const overall = round2(clamp01(objective * 0.15 +
|
|
161
|
+
successCriteria * 0.25 +
|
|
162
|
+
evidence * 0.2 +
|
|
163
|
+
artifacts * 0.15 +
|
|
164
|
+
constraints * 0.1 +
|
|
165
|
+
risks * 0.1 +
|
|
166
|
+
authority * 0.05));
|
|
167
|
+
const criticalMissing = [];
|
|
168
|
+
if (objective < 0.5)
|
|
169
|
+
criticalMissing.push("objective");
|
|
170
|
+
if (successCriteria < 0.5)
|
|
171
|
+
criticalMissing.push("successCriteria");
|
|
172
|
+
if (evidence < 0.5)
|
|
173
|
+
criticalMissing.push("evidence");
|
|
174
|
+
const contradictions = findings.filter((f) => f.conflict).map((f) => String(f.field));
|
|
175
|
+
return {
|
|
176
|
+
overall,
|
|
177
|
+
objective,
|
|
178
|
+
successCriteria,
|
|
179
|
+
evidence,
|
|
180
|
+
artifacts,
|
|
181
|
+
constraints,
|
|
182
|
+
risks,
|
|
183
|
+
authority,
|
|
184
|
+
criticalMissing,
|
|
185
|
+
contradictions,
|
|
186
|
+
};
|
|
187
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { InterviewAnswer, InterviewDepth, InterviewMode, InterviewSeed, InterviewSession } from "../contracts/interview.js";
|
|
2
|
+
/** Completeness threshold above which the interview is considered done. */
|
|
3
|
+
export declare const COMPLETENESS_THRESHOLD = 0.82;
|
|
4
|
+
export interface BuildInterviewSessionInput {
|
|
5
|
+
seed: InterviewSeed;
|
|
6
|
+
mode: InterviewMode;
|
|
7
|
+
depth?: InterviewDepth;
|
|
8
|
+
maxQuestions?: number;
|
|
9
|
+
sessionId?: string;
|
|
10
|
+
goalId?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Build a fresh interview session: score ambiguity, rank deterministic
|
|
14
|
+
* questions, and emit an empty spec delta. No answers are applied yet.
|
|
15
|
+
*/
|
|
16
|
+
export declare function buildInterviewSession(input: BuildInterviewSessionInput): InterviewSession;
|
|
17
|
+
/**
|
|
18
|
+
* Apply a batch of answers to an open session: assimilate into findings and a
|
|
19
|
+
* spec delta, recompute completeness, and decide the terminal status.
|
|
20
|
+
*
|
|
21
|
+
* Termination (spec): complete when completeness >= 0.82 AND no critical
|
|
22
|
+
* missing field AND no unresolved contradiction. Contradictions => blocked.
|
|
23
|
+
*/
|
|
24
|
+
export declare function ingestAnswers(session: InterviewSession, seed: InterviewSeed, newAnswers: InterviewAnswer[]): InterviewSession;
|
|
25
|
+
export declare function decideStatus(completeness: InterviewSession["completeness"]): InterviewSession["status"];
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
// src/goal/interview-session.ts
|
|
2
|
+
// OMK Deep Interview — session orchestrator.
|
|
3
|
+
//
|
|
4
|
+
// Composes the three deterministic modules (question-bank, scoring,
|
|
5
|
+
// assimilation) into an InterviewSession lifecycle:
|
|
6
|
+
// seed -> ambiguity -> question ranking -> answers -> findings
|
|
7
|
+
// -> spec delta -> completeness -> status
|
|
8
|
+
//
|
|
9
|
+
// This module owns the critical integration path and is intentionally
|
|
10
|
+
// deterministic except for timestamps and the session id.
|
|
11
|
+
import { INTERVIEW_SCHEMA_VERSION, INTERVIEW_DELTA_SCHEMA_VERSION } from "../contracts/interview.js";
|
|
12
|
+
import { computeAmbiguity, computeCompleteness, recommendDepth, scoreInterviewQuestion, selectInterviewQuestions, } from "./interview-scoring.js";
|
|
13
|
+
import { buildInterviewQuestionBank } from "./interview-question-bank.js";
|
|
14
|
+
import { assimilateAnswers, applyInterviewDelta } from "./interview-assimilation.js";
|
|
15
|
+
import { createGoalSpec } from "./intake.js";
|
|
16
|
+
import { redactSecretText } from "./intent-frame.js";
|
|
17
|
+
/** Completeness threshold above which the interview is considered done. */
|
|
18
|
+
export const COMPLETENESS_THRESHOLD = 0.82;
|
|
19
|
+
function generateSessionId() {
|
|
20
|
+
return `iv-${new Date().toISOString().replace(/[:.]/g, "-")}`;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Build a fresh interview session: score ambiguity, rank deterministic
|
|
24
|
+
* questions, and emit an empty spec delta. No answers are applied yet.
|
|
25
|
+
*/
|
|
26
|
+
export function buildInterviewSession(input) {
|
|
27
|
+
const now = new Date().toISOString();
|
|
28
|
+
const ambiguity = computeAmbiguity(input.seed);
|
|
29
|
+
const depth = input.depth ?? recommendDepth(ambiguity);
|
|
30
|
+
const candidates = buildInterviewQuestionBank(input.seed);
|
|
31
|
+
const scored = candidates.map((candidate) => scoreInterviewQuestion(candidate));
|
|
32
|
+
const questions = selectInterviewQuestions(input.seed.goal, scored, depth, input.maxQuestions);
|
|
33
|
+
const completeness = computeCompleteness(input.seed.goal, []);
|
|
34
|
+
return {
|
|
35
|
+
schemaVersion: INTERVIEW_SCHEMA_VERSION,
|
|
36
|
+
sessionId: input.sessionId ?? generateSessionId(),
|
|
37
|
+
goalId: input.goalId ?? input.seed.goal?.goalId,
|
|
38
|
+
mode: input.mode,
|
|
39
|
+
depth,
|
|
40
|
+
createdAt: now,
|
|
41
|
+
updatedAt: now,
|
|
42
|
+
rawPrompt: redactSecretText(input.seed.rawPrompt),
|
|
43
|
+
ambiguity,
|
|
44
|
+
questions,
|
|
45
|
+
answers: [],
|
|
46
|
+
findings: [],
|
|
47
|
+
completeness,
|
|
48
|
+
specDelta: {
|
|
49
|
+
schemaVersion: INTERVIEW_DELTA_SCHEMA_VERSION,
|
|
50
|
+
goalId: input.goalId ?? input.seed.goal?.goalId,
|
|
51
|
+
changes: [],
|
|
52
|
+
},
|
|
53
|
+
status: "open",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Apply a batch of answers to an open session: assimilate into findings and a
|
|
58
|
+
* spec delta, recompute completeness, and decide the terminal status.
|
|
59
|
+
*
|
|
60
|
+
* Termination (spec): complete when completeness >= 0.82 AND no critical
|
|
61
|
+
* missing field AND no unresolved contradiction. Contradictions => blocked.
|
|
62
|
+
*/
|
|
63
|
+
export function ingestAnswers(session, seed, newAnswers) {
|
|
64
|
+
// Redact secrets from answers before they are assimilated, persisted, or
|
|
65
|
+
// echoed so tokens never reach interview artifacts or the GoalSpec.
|
|
66
|
+
const redactedAnswers = newAnswers.map((answer) => ({
|
|
67
|
+
...answer,
|
|
68
|
+
answer: redactSecretText(answer.answer),
|
|
69
|
+
}));
|
|
70
|
+
const mergedAnswers = mergeAnswers(session.answers, redactedAnswers);
|
|
71
|
+
const { findings, specDelta, contradictions } = assimilateAnswers({
|
|
72
|
+
seed,
|
|
73
|
+
questions: session.questions,
|
|
74
|
+
answers: mergedAnswers,
|
|
75
|
+
goal: seed.goal,
|
|
76
|
+
});
|
|
77
|
+
// Completeness is measured against the PROJECTED goal (base + delta) so the
|
|
78
|
+
// score reflects what the answers actually contribute to the GoalSpec, not
|
|
79
|
+
// the empty seed goal. This makes the 0.82 termination threshold meaningful.
|
|
80
|
+
const baseGoal = seed.goal ?? createGoalSpec(seed.rawPrompt);
|
|
81
|
+
const projected = applyInterviewDelta(baseGoal, specDelta).goal;
|
|
82
|
+
const completeness = computeCompleteness(projected, findings);
|
|
83
|
+
completeness.contradictions = uniqueStrings([...completeness.contradictions, ...contradictions]);
|
|
84
|
+
const status = decideStatus(completeness);
|
|
85
|
+
return {
|
|
86
|
+
...session,
|
|
87
|
+
updatedAt: new Date().toISOString(),
|
|
88
|
+
answers: mergedAnswers,
|
|
89
|
+
findings,
|
|
90
|
+
completeness,
|
|
91
|
+
specDelta: {
|
|
92
|
+
...specDelta,
|
|
93
|
+
goalId: session.goalId ?? specDelta.goalId,
|
|
94
|
+
},
|
|
95
|
+
status,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
export function decideStatus(completeness) {
|
|
99
|
+
if (completeness.contradictions.length > 0)
|
|
100
|
+
return "blocked";
|
|
101
|
+
if (completeness.overall >= COMPLETENESS_THRESHOLD && completeness.criticalMissing.length === 0) {
|
|
102
|
+
return "complete";
|
|
103
|
+
}
|
|
104
|
+
return "open";
|
|
105
|
+
}
|
|
106
|
+
function mergeAnswers(existing, incoming) {
|
|
107
|
+
const byId = new Map();
|
|
108
|
+
for (const answer of existing)
|
|
109
|
+
byId.set(answer.questionId, answer);
|
|
110
|
+
for (const answer of incoming)
|
|
111
|
+
byId.set(answer.questionId, answer);
|
|
112
|
+
return [...byId.values()];
|
|
113
|
+
}
|
|
114
|
+
function uniqueStrings(values) {
|
|
115
|
+
return [...new Set(values.filter((value) => value.length > 0))];
|
|
116
|
+
}
|
|
@@ -1,4 +1,23 @@
|
|
|
1
1
|
export type InputKind = "plain-prompt" | "slash-command" | "goal-form" | "resume" | "verify" | "replan";
|
|
2
|
+
/**
|
|
3
|
+
* Image or file attached to a prompt (clipboard paste, --image flag, drag).
|
|
4
|
+
* Carries both the on-disk path and the base64 data URI for multimodal
|
|
5
|
+
* wire protocol use (image_url parts).
|
|
6
|
+
*/
|
|
7
|
+
export interface InputAttachment {
|
|
8
|
+
/** Original file name or "clipboard-image.png". */
|
|
9
|
+
name: string;
|
|
10
|
+
/** Absolute or project-relative path to the saved file. */
|
|
11
|
+
path: string;
|
|
12
|
+
/** MIME type: image/png, image/jpeg, image/webp, image/gif. */
|
|
13
|
+
mimeType: string;
|
|
14
|
+
/** Base64 data URI (data:image/png;base64,...) for wire protocol. */
|
|
15
|
+
dataUri: string;
|
|
16
|
+
/** Detected extension: png, jpg, webp, gif. */
|
|
17
|
+
ext: string;
|
|
18
|
+
/** Source of the attachment. */
|
|
19
|
+
source: "clipboard" | "file" | "drag";
|
|
20
|
+
}
|
|
2
21
|
export type InputSource = "chat" | "parallel" | "run" | "goal" | "api";
|
|
3
22
|
export type InputMcpScope = "all" | "project" | "none";
|
|
4
23
|
export interface InputRequestedArtifact {
|
|
@@ -32,6 +51,8 @@ export interface InputEnvelope {
|
|
|
32
51
|
theme?: string;
|
|
33
52
|
constraints: string[];
|
|
34
53
|
requestedArtifacts: InputRequestedArtifact[];
|
|
54
|
+
/** Images/files attached to this input (clipboard paste, --image, drag). */
|
|
55
|
+
attachments: InputAttachment[];
|
|
35
56
|
slashCommand?: InputSlashCommandEnvelope;
|
|
36
57
|
createdAt: string;
|
|
37
58
|
}
|
|
@@ -52,6 +73,7 @@ export interface BuildInputEnvelopeInput {
|
|
|
52
73
|
theme?: string;
|
|
53
74
|
constraints?: readonly string[];
|
|
54
75
|
requestedArtifacts?: readonly InputRequestedArtifact[];
|
|
76
|
+
attachments?: readonly InputAttachment[];
|
|
55
77
|
slashCommand?: InputSlashCommandEnvelope;
|
|
56
78
|
now?: () => Date;
|
|
57
79
|
}
|
|
@@ -60,6 +60,7 @@ export function buildInputEnvelope(input) {
|
|
|
60
60
|
theme: input.theme,
|
|
61
61
|
constraints: [...(input.constraints ?? [])],
|
|
62
62
|
requestedArtifacts: input.requestedArtifacts?.map((artifact) => ({ ...artifact })) ?? [],
|
|
63
|
+
attachments: input.attachments?.map((a) => ({ ...a })) ?? [],
|
|
63
64
|
slashCommand: input.slashCommand
|
|
64
65
|
? cloneSlashCommand(input.slashCommand)
|
|
65
66
|
: undefined,
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Merge Arbiter — patch scoring + conflict detection + winner selection.
|
|
3
|
+
*
|
|
4
|
+
* Pipeline:
|
|
5
|
+
* CollectCandidatePatches → NormalizeDiffs → RunEvidenceSuite → ScorePatch
|
|
6
|
+
* → DetectConflicts → SelectWinnerOrHybrid → ProduceMergeRationale
|
|
7
|
+
*/
|
|
8
|
+
export interface CandidatePatch {
|
|
9
|
+
id: string;
|
|
10
|
+
name: string;
|
|
11
|
+
path: string;
|
|
12
|
+
diff: string;
|
|
13
|
+
normalizedDiff: string;
|
|
14
|
+
fileScopes: string[];
|
|
15
|
+
diffLines: number;
|
|
16
|
+
canApply: boolean;
|
|
17
|
+
conflictsWith: string[];
|
|
18
|
+
evidence: PatchEvidence;
|
|
19
|
+
scores: PatchScores;
|
|
20
|
+
compositeScore: number;
|
|
21
|
+
}
|
|
22
|
+
export interface PatchEvidence {
|
|
23
|
+
testsPassed: boolean;
|
|
24
|
+
lintPassed: boolean;
|
|
25
|
+
typecheckPassed: boolean;
|
|
26
|
+
reviewerScore?: number;
|
|
27
|
+
reviewerReason?: string;
|
|
28
|
+
evidenceTrustScore: number;
|
|
29
|
+
}
|
|
30
|
+
export interface PatchScores {
|
|
31
|
+
testPassScore: number;
|
|
32
|
+
evidenceTrustScore: number;
|
|
33
|
+
minimalityScore: number;
|
|
34
|
+
lintTypecheckScore: number;
|
|
35
|
+
conflictFreeScore: number;
|
|
36
|
+
reviewerAgreementScore: number;
|
|
37
|
+
}
|
|
38
|
+
export interface MergeArbiterResult {
|
|
39
|
+
winner: CandidatePatch | null;
|
|
40
|
+
requiresHumanApproval: boolean;
|
|
41
|
+
rationale: MergeRationale;
|
|
42
|
+
trace: MergeTrace;
|
|
43
|
+
}
|
|
44
|
+
export interface MergeRationale {
|
|
45
|
+
summary: string;
|
|
46
|
+
winnerId: string | null;
|
|
47
|
+
scoreBreakdown: Record<string, number>;
|
|
48
|
+
conflicts: string[];
|
|
49
|
+
threshold: number;
|
|
50
|
+
humanApprovalReason?: string;
|
|
51
|
+
}
|
|
52
|
+
export interface MergeTrace {
|
|
53
|
+
steps: MergeTraceStep[];
|
|
54
|
+
timestamp: string;
|
|
55
|
+
}
|
|
56
|
+
export interface MergeTraceStep {
|
|
57
|
+
step: string;
|
|
58
|
+
candidateId: string;
|
|
59
|
+
detail: string;
|
|
60
|
+
durationMs?: number;
|
|
61
|
+
}
|
|
62
|
+
export interface MergeArbiterOptions {
|
|
63
|
+
/** Minimum composite score (0–1) for auto-approval. */
|
|
64
|
+
threshold?: number;
|
|
65
|
+
/** Max diff lines before minimality score hits zero. */
|
|
66
|
+
maxDiffLines?: number;
|
|
67
|
+
/** Timeout for test execution in worktrees (ms). */
|
|
68
|
+
testTimeoutMs?: number;
|
|
69
|
+
/** Timeout for git apply --check (ms). */
|
|
70
|
+
applyCheckTimeoutMs?: number;
|
|
71
|
+
}
|
|
72
|
+
export declare function collectCandidatePatches(worktreesDir: string, currentBranch: string, options?: MergeArbiterOptions): Promise<CandidatePatch[]>;
|
|
73
|
+
export declare function normalizeDiff(diff: string): string;
|
|
74
|
+
export declare function extractFileScopes(diff: string): string[];
|
|
75
|
+
export declare function runEvidenceSuite(candidate: CandidatePatch, projectRoot: string, config: string, options?: MergeArbiterOptions): Promise<CandidatePatch>;
|
|
76
|
+
export declare function scorePatch(candidate: CandidatePatch, options?: MergeArbiterOptions): CandidatePatch;
|
|
77
|
+
export declare function detectConflicts(candidates: CandidatePatch[]): CandidatePatch[];
|
|
78
|
+
export declare function selectWinnerOrHybrid(candidates: CandidatePatch[], options?: MergeArbiterOptions): {
|
|
79
|
+
winner: CandidatePatch | null;
|
|
80
|
+
requiresHumanApproval: boolean;
|
|
81
|
+
reason?: string;
|
|
82
|
+
};
|
|
83
|
+
export declare function produceMergeRationale(candidates: CandidatePatch[], selection: {
|
|
84
|
+
winner: CandidatePatch | null;
|
|
85
|
+
requiresHumanApproval: boolean;
|
|
86
|
+
reason?: string;
|
|
87
|
+
}, options?: MergeArbiterOptions): {
|
|
88
|
+
rationale: MergeRationale;
|
|
89
|
+
trace: MergeTrace;
|
|
90
|
+
};
|
|
91
|
+
export declare function runMergeArbiter(worktreesDir: string, currentBranch: string, projectRoot: string, config: string, options?: MergeArbiterOptions): Promise<MergeArbiterResult>;
|