@cat-factory/orchestration 0.19.2 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/container.d.ts +43 -6
- package/dist/container.d.ts.map +1 -1
- package/dist/container.js +63 -1
- package/dist/container.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/modules/execution/ExecutionService.d.ts +40 -8
- package/dist/modules/execution/ExecutionService.d.ts.map +1 -1
- package/dist/modules/execution/ExecutionService.js +111 -19
- package/dist/modules/execution/ExecutionService.js.map +1 -1
- package/dist/modules/incidentEnrichment/IncidentEnrichmentService.d.ts +35 -0
- package/dist/modules/incidentEnrichment/IncidentEnrichmentService.d.ts.map +1 -0
- package/dist/modules/incidentEnrichment/IncidentEnrichmentService.js +93 -0
- package/dist/modules/incidentEnrichment/IncidentEnrichmentService.js.map +1 -0
- package/dist/modules/requirements/requirements.logic.d.ts +2 -2
- package/dist/modules/requirements/requirements.logic.d.ts.map +1 -1
- package/dist/modules/requirements/requirements.logic.js +2 -13
- package/dist/modules/requirements/requirements.logic.js.map +1 -1
- package/dist/modules/review/IterativeReviewService.d.ts.map +1 -1
- package/dist/modules/review/IterativeReviewService.js +3 -6
- package/dist/modules/review/IterativeReviewService.js.map +1 -1
- package/dist/modules/sandbox/SandboxRunService.d.ts +48 -0
- package/dist/modules/sandbox/SandboxRunService.d.ts.map +1 -0
- package/dist/modules/sandbox/SandboxRunService.js +248 -0
- package/dist/modules/sandbox/SandboxRunService.js.map +1 -0
- package/dist/modules/sandbox/SandboxService.d.ts +94 -0
- package/dist/modules/sandbox/SandboxService.d.ts.map +1 -0
- package/dist/modules/sandbox/SandboxService.js +227 -0
- package/dist/modules/sandbox/SandboxService.js.map +1 -0
- package/dist/modules/sandbox/sandbox.logic.d.ts +37 -0
- package/dist/modules/sandbox/sandbox.logic.d.ts.map +1 -0
- package/dist/modules/sandbox/sandbox.logic.js +178 -0
- package/dist/modules/sandbox/sandbox.logic.js.map +1 -0
- package/dist/modules/settings/WorkspaceSettingsService.d.ts.map +1 -1
- package/dist/modules/settings/WorkspaceSettingsService.js +3 -0
- package/dist/modules/settings/WorkspaceSettingsService.js.map +1 -1
- package/package.json +9 -8
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import { assertFound, ConflictError, requireWorkspace, ValidationError } from '@cat-factory/kernel';
|
|
2
|
+
import { cellCount, firstVersionFromBaseline, isRunnableMatrix, listBaselines, listBuiltinFixtures, nextVersion, SANDBOX_AGENT_KINDS, baselineVersionId, sandboxKindMeta, } from '@cat-factory/sandbox';
|
|
3
|
+
/** A safety ceiling on how many cells one experiment may expand to (cost guard). */
|
|
4
|
+
export const MAX_SANDBOX_CELLS = 100;
|
|
5
|
+
/**
|
|
6
|
+
* Load an experiment and its result grid (cells + grades). Shared by the read endpoint
|
|
7
|
+
* (`SandboxService.getExperiment`) and the run-driver's returned grid
|
|
8
|
+
* (`SandboxRunService.launch`) so the two can never compose a divergent detail shape.
|
|
9
|
+
*/
|
|
10
|
+
export async function composeExperimentDetail(repos, workspaceId, experimentId) {
|
|
11
|
+
const experiment = assertFound(await repos.sandboxExperimentRepository.get(workspaceId, experimentId), 'SandboxExperiment', experimentId);
|
|
12
|
+
const [runs, grades] = await Promise.all([
|
|
13
|
+
repos.sandboxRunRepository.listByExperiment(workspaceId, experimentId),
|
|
14
|
+
repos.sandboxGradeRepository.listByExperiment(workspaceId, experimentId),
|
|
15
|
+
]);
|
|
16
|
+
return { experiment, runs, grades };
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Management CRUD for the Sandbox (the parallel prompt/model testing surface): the
|
|
20
|
+
* shipped baselines + stored candidate prompt versions, the fixture library (builtins
|
|
21
|
+
* seeded lazily on first list, plus workspace-authored ones), and experiment definitions
|
|
22
|
+
* + their result grids. Running an experiment lives in {@link SandboxRunService}; this
|
|
23
|
+
* service is the persistence-facing half. Everything is workspace-scoped.
|
|
24
|
+
*/
|
|
25
|
+
export class SandboxService {
|
|
26
|
+
deps;
|
|
27
|
+
constructor(deps) {
|
|
28
|
+
this.deps = deps;
|
|
29
|
+
}
|
|
30
|
+
/** The full opt-in overview the UI loads when the Sandbox surface opens. */
|
|
31
|
+
async overview(workspaceId) {
|
|
32
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
33
|
+
const [prompts, fixtures, experiments] = await Promise.all([
|
|
34
|
+
this.listPrompts(workspaceId),
|
|
35
|
+
this.listFixtures(workspaceId),
|
|
36
|
+
this.deps.sandboxExperimentRepository.list(workspaceId),
|
|
37
|
+
]);
|
|
38
|
+
return {
|
|
39
|
+
agentKinds: SANDBOX_AGENT_KINDS,
|
|
40
|
+
prompts,
|
|
41
|
+
fixtures,
|
|
42
|
+
experiments,
|
|
43
|
+
maxCells: MAX_SANDBOX_CELLS,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
// ---- prompt versions ------------------------------------------------------
|
|
47
|
+
/** The shipped baselines (synthetic) followed by stored candidate versions. */
|
|
48
|
+
async listPrompts(workspaceId, agentKind) {
|
|
49
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
50
|
+
const baselines = listBaselines(this.deps.clock.now());
|
|
51
|
+
const candidates = agentKind
|
|
52
|
+
? await this.deps.sandboxPromptVersionRepository.listByKind(workspaceId, agentKind)
|
|
53
|
+
: await this.deps.sandboxPromptVersionRepository.list(workspaceId);
|
|
54
|
+
const baseSlice = agentKind ? baselines.filter((b) => b.agentKind === agentKind) : baselines;
|
|
55
|
+
return [...baseSlice, ...candidates];
|
|
56
|
+
}
|
|
57
|
+
/** Clone a shipped baseline into a fresh editable candidate lineage at version 1. */
|
|
58
|
+
async clonePrompt(workspaceId, input) {
|
|
59
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
60
|
+
const source = this.resolveBaseline(input.agentKind, input.basePromptId);
|
|
61
|
+
const meta = sandboxKindMeta(input.agentKind);
|
|
62
|
+
const name = input.name ?? meta?.label ?? input.agentKind;
|
|
63
|
+
const version = firstVersionFromBaseline({
|
|
64
|
+
agentKind: source.agentKind,
|
|
65
|
+
systemText: source.systemText,
|
|
66
|
+
basePromptId: source.basePromptId,
|
|
67
|
+
}, name, {
|
|
68
|
+
id: this.deps.idGenerator.next('sbp'),
|
|
69
|
+
createdAt: this.deps.clock.now(),
|
|
70
|
+
createdBy: null,
|
|
71
|
+
labels: input.labels ?? [],
|
|
72
|
+
});
|
|
73
|
+
await this.deps.sandboxPromptVersionRepository.upsert(workspaceId, version);
|
|
74
|
+
return version;
|
|
75
|
+
}
|
|
76
|
+
/** Append a new candidate version. The parent may be a baseline (starts a lineage) or a candidate. */
|
|
77
|
+
async saveVersion(workspaceId, input) {
|
|
78
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
79
|
+
const fields = {
|
|
80
|
+
id: this.deps.idGenerator.next('sbp'),
|
|
81
|
+
createdAt: this.deps.clock.now(),
|
|
82
|
+
createdBy: null,
|
|
83
|
+
labels: input.labels ?? [],
|
|
84
|
+
};
|
|
85
|
+
if (input.parentId.startsWith('baseline:')) {
|
|
86
|
+
const baseline = listBaselines(this.deps.clock.now()).find((b) => b.id === input.parentId);
|
|
87
|
+
if (!baseline)
|
|
88
|
+
throw new ValidationError(`Unknown baseline prompt "${input.parentId}"`);
|
|
89
|
+
const meta = sandboxKindMeta(baseline.agentKind);
|
|
90
|
+
const version = firstVersionFromBaseline({
|
|
91
|
+
agentKind: baseline.agentKind,
|
|
92
|
+
systemText: input.systemText,
|
|
93
|
+
basePromptId: baseline.basePromptId,
|
|
94
|
+
}, meta?.label ?? baseline.agentKind, fields);
|
|
95
|
+
await this.deps.sandboxPromptVersionRepository.upsert(workspaceId, version);
|
|
96
|
+
return version;
|
|
97
|
+
}
|
|
98
|
+
const parent = assertFound(await this.deps.sandboxPromptVersionRepository.get(workspaceId, input.parentId), 'SandboxPromptVersion', input.parentId);
|
|
99
|
+
const version = nextVersion(parent, input.systemText, fields);
|
|
100
|
+
await this.deps.sandboxPromptVersionRepository.upsert(workspaceId, version);
|
|
101
|
+
return version;
|
|
102
|
+
}
|
|
103
|
+
/** Replace a candidate version's labels. */
|
|
104
|
+
async setLabels(workspaceId, id, input) {
|
|
105
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
106
|
+
const existing = assertFound(await this.deps.sandboxPromptVersionRepository.get(workspaceId, id), 'SandboxPromptVersion', id);
|
|
107
|
+
const updated = { ...existing, labels: input.labels };
|
|
108
|
+
await this.deps.sandboxPromptVersionRepository.upsert(workspaceId, updated);
|
|
109
|
+
return updated;
|
|
110
|
+
}
|
|
111
|
+
/** Soft-archive a candidate version (hidden from the default listing). */
|
|
112
|
+
async archivePrompt(workspaceId, id) {
|
|
113
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
114
|
+
await this.deps.sandboxPromptVersionRepository.archive(workspaceId, id, this.deps.clock.now());
|
|
115
|
+
}
|
|
116
|
+
// ---- fixtures -------------------------------------------------------------
|
|
117
|
+
/** The fixture library, seeding the builtin fixtures on first use. */
|
|
118
|
+
async listFixtures(workspaceId) {
|
|
119
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
120
|
+
await this.ensureBuiltinFixtures(workspaceId);
|
|
121
|
+
return this.deps.sandboxFixtureRepository.list(workspaceId);
|
|
122
|
+
}
|
|
123
|
+
/** Create a workspace-authored fixture. */
|
|
124
|
+
async createFixture(workspaceId, input) {
|
|
125
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
126
|
+
const fixture = {
|
|
127
|
+
id: this.deps.idGenerator.next('sbf'),
|
|
128
|
+
kind: input.kind,
|
|
129
|
+
name: input.name,
|
|
130
|
+
payload: input.payload ?? null,
|
|
131
|
+
repoRef: input.repoRef ?? null,
|
|
132
|
+
objective: input.objective ?? null,
|
|
133
|
+
origin: 'custom',
|
|
134
|
+
createdAt: this.deps.clock.now(),
|
|
135
|
+
};
|
|
136
|
+
await this.deps.sandboxFixtureRepository.upsert(workspaceId, fixture);
|
|
137
|
+
return fixture;
|
|
138
|
+
}
|
|
139
|
+
/** Remove a workspace-authored fixture. Builtin fixtures cannot be removed. */
|
|
140
|
+
async removeFixture(workspaceId, id) {
|
|
141
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
142
|
+
const existing = await this.deps.sandboxFixtureRepository.get(workspaceId, id);
|
|
143
|
+
if (existing?.origin === 'builtin') {
|
|
144
|
+
throw new ConflictError('Builtin fixtures cannot be deleted.');
|
|
145
|
+
}
|
|
146
|
+
await this.deps.sandboxFixtureRepository.remove(workspaceId, id);
|
|
147
|
+
}
|
|
148
|
+
// ---- experiments ----------------------------------------------------------
|
|
149
|
+
async listExperiments(workspaceId) {
|
|
150
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
151
|
+
return this.deps.sandboxExperimentRepository.list(workspaceId);
|
|
152
|
+
}
|
|
153
|
+
/** An experiment with its result grid (cells + grades). */
|
|
154
|
+
async getExperiment(workspaceId, id) {
|
|
155
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
156
|
+
return composeExperimentDetail(this.deps, workspaceId, id);
|
|
157
|
+
}
|
|
158
|
+
/** Create a draft experiment. Launching it (the run-driver) expands + grades the matrix. */
|
|
159
|
+
async createExperiment(workspaceId, input) {
|
|
160
|
+
await requireWorkspace(this.deps.workspaceRepository, workspaceId);
|
|
161
|
+
const meta = sandboxKindMeta(input.agentKind);
|
|
162
|
+
if (!meta) {
|
|
163
|
+
throw new ValidationError(`"${input.agentKind}" is not a Sandbox-testable agent kind`);
|
|
164
|
+
}
|
|
165
|
+
// Refuse container kinds up front: the in-product run driver only runs inline cells,
|
|
166
|
+
// so a container experiment could be persisted but never launched. Reject at create
|
|
167
|
+
// time rather than leaving an un-launchable draft in the workspace.
|
|
168
|
+
if (meta.bucket === 'container') {
|
|
169
|
+
throw new ValidationError(`The "${input.agentKind}" agent runs in a container; container experiments are not yet supported in the Sandbox.`);
|
|
170
|
+
}
|
|
171
|
+
if (!isRunnableMatrix(input.matrix)) {
|
|
172
|
+
throw new ValidationError('The experiment matrix needs at least one prompt, model and fixture');
|
|
173
|
+
}
|
|
174
|
+
const repeats = input.repeats ?? 1;
|
|
175
|
+
const total = cellCount(input.matrix, repeats);
|
|
176
|
+
if (total > MAX_SANDBOX_CELLS) {
|
|
177
|
+
throw new ValidationError(`This matrix expands to ${total} cells; the limit is ${MAX_SANDBOX_CELLS}. Narrow the selection.`);
|
|
178
|
+
}
|
|
179
|
+
const experiment = {
|
|
180
|
+
id: this.deps.idGenerator.next('sbx'),
|
|
181
|
+
name: input.name,
|
|
182
|
+
agentKind: input.agentKind,
|
|
183
|
+
judgeModel: input.judgeModel ?? this.defaultJudgeModel(),
|
|
184
|
+
repeats,
|
|
185
|
+
status: 'draft',
|
|
186
|
+
matrix: input.matrix,
|
|
187
|
+
budgetTokens: input.budgetTokens ?? null,
|
|
188
|
+
createdAt: this.deps.clock.now(),
|
|
189
|
+
createdBy: null,
|
|
190
|
+
};
|
|
191
|
+
await this.deps.sandboxExperimentRepository.upsert(workspaceId, experiment);
|
|
192
|
+
return experiment;
|
|
193
|
+
}
|
|
194
|
+
// ---- internals ------------------------------------------------------------
|
|
195
|
+
/** Resolve the shipped baseline a clone derives from (by base-prompt id, else by kind). */
|
|
196
|
+
resolveBaseline(agentKind, basePromptId) {
|
|
197
|
+
const baselines = listBaselines(this.deps.clock.now());
|
|
198
|
+
const wantedId = basePromptId ? `baseline:${basePromptId}` : baselineVersionId(agentKind);
|
|
199
|
+
const source = baselines.find((b) => b.id === wantedId) ?? baselines.find((b) => b.agentKind === agentKind);
|
|
200
|
+
if (!source)
|
|
201
|
+
throw new ValidationError(`No baseline prompt for agent kind "${agentKind}"`);
|
|
202
|
+
return source;
|
|
203
|
+
}
|
|
204
|
+
/** Seed the builtin fixture library for a workspace that has none yet. Idempotent. */
|
|
205
|
+
async ensureBuiltinFixtures(workspaceId) {
|
|
206
|
+
const current = await this.deps.sandboxFixtureRepository.list(workspaceId);
|
|
207
|
+
if (current.length > 0)
|
|
208
|
+
return;
|
|
209
|
+
for (const fixture of listBuiltinFixtures(this.deps.clock.now())) {
|
|
210
|
+
await this.deps.sandboxFixtureRepository.upsert(workspaceId, fixture);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* The judge model to use when the caller didn't pick one: the deployment's routing
|
|
215
|
+
* default. We do NOT guess a provider — if no default is configured (e.g. a minimal
|
|
216
|
+
* deployment), require an explicit `judgeModel` at create time rather than defaulting to
|
|
217
|
+
* a vendor that may have no key, which would otherwise fail every cell's grade at launch.
|
|
218
|
+
*/
|
|
219
|
+
defaultJudgeModel() {
|
|
220
|
+
const ref = this.deps.defaultModelRef;
|
|
221
|
+
if (!ref) {
|
|
222
|
+
throw new ValidationError('No default model is configured for the Sandbox judge; specify judgeModel explicitly.');
|
|
223
|
+
}
|
|
224
|
+
return `${ref.provider}:${ref.model}`;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
//# sourceMappingURL=SandboxService.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SandboxService.js","sourceRoot":"","sources":["../../../src/modules/sandbox/SandboxService.ts"],"names":[],"mappings":"AAgBA,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AAQnG,OAAO,EACL,SAAS,EACT,wBAAwB,EACxB,gBAAgB,EAChB,aAAa,EACb,mBAAmB,EACnB,WAAW,EACX,mBAAmB,EAEnB,iBAAiB,EACjB,eAAe,GAChB,MAAM,sBAAsB,CAAA;AAE7B,oFAAoF;AACpF,MAAM,CAAC,MAAM,iBAAiB,GAAG,GAAG,CAAA;AAgBpC;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,KAAgC,EAChC,WAAmB,EACnB,YAAoB;IAEpB,MAAM,UAAU,GAAG,WAAW,CAC5B,MAAM,KAAK,CAAC,2BAA2B,CAAC,GAAG,CAAC,WAAW,EAAE,YAAY,CAAC,EACtE,mBAAmB,EACnB,YAAY,CACb,CAAA;IACD,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACvC,KAAK,CAAC,oBAAoB,CAAC,gBAAgB,CAAC,WAAW,EAAE,YAAY,CAAC;QACtE,KAAK,CAAC,sBAAsB,CAAC,gBAAgB,CAAC,WAAW,EAAE,YAAY,CAAC;KACzE,CAAC,CAAA;IACF,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,CAAA;AACrC,CAAC;AA6BD;;;;;;GAMG;AACH,MAAM,OAAO,cAAc;IACI,IAAI;IAAjC,YAA6B,IAAgC;oBAAhC,IAAI;IAA+B,CAAC;IAEjE,4EAA4E;IAC5E,KAAK,CAAC,QAAQ,CAAC,WAAmB;QAChC,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,WAAW,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YACzD,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC;YAC7B,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC;YAC9B,IAAI,CAAC,IAAI,CAAC,2BAA2B,CAAC,IAAI,CAAC,WAAW,CAAC;SACxD,CAAC,CAAA;QACF,OAAO;YACL,UAAU,EAAE,mBAAmB;YAC/B,OAAO;YACP,QAAQ;YACR,WAAW;YACX,QAAQ,EAAE,iBAAiB;SAC5B,CAAA;IACH,CAAC;IAED,8EAA8E;IAE9E,+EAA+E;IAC/E,KAAK,CAAC,WAAW,CAAC,WAAmB,EAAE,SAAkB;QACvD,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,SAAS,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,MAAM,UAAU,GAAG,SAAS;YAC1B,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,UAAU,CAAC,WAAW,EAAE,SAAS,CAAC;YACnF,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QACpE,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;QAC5F,OAAO,CAAC,GAAG,SAAS,EAAE,GAAG,UAAU,CAAC,CAAA;IACtC,CAAC;IAED,qFAAqF;IACrF,KAAK,CAAC,WAAW,CACf,WAAmB,EACnB,KAA8B;QAE9B,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,YAAY,CAAC,CAAA;QACxE,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,IAAI,IAAI,EAAE,KAAK,IAAI,KAAK,CAAC,SAAS,CAAA;QACzD,MAAM,OAAO,GAAG,wBAAwB,CACtC;YACE,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,YAAY,EAAE,MAAM,CAAC,YAAY;SAClC,EACD,IAAI,EACJ;YACE,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;YACrC,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;YAChC,SAAS,EAAE,IAAI;YACf,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,EAAE;SAC3B,CACF,CAAA;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;QAC3E,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,sGAAsG;IACtG,KAAK,CAAC,WAAW,CACf,WAAmB,EACnB,KAA8B;QAE9B,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,MAAM,GAAG;YACb,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;YACrC,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;YAChC,SAAS,EAAE,IAAI;YACf,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,EAAE;SAC3B,CAAA;QACD,IAAI,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC3C,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,KAAK,CAAC,QAAQ,CAAC,CAAA;YAC1F,IAAI,CAAC,QAAQ;gBAAE,MAAM,IAAI,eAAe,CAAC,4BAA4B,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAA;YACvF,MAAM,IAAI,GAAG,eAAe,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAA;YAChD,MAAM,OAAO,GAAG,wBAAwB,CACtC;gBACE,SAAS,EAAE,QAAQ,CAAC,SAAS;gBAC7B,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,YAAY,EAAE,QAAQ,CAAC,YAAY;aACpC,EACD,IAAI,EAAE,KAAK,IAAI,QAAQ,CAAC,SAAS,EACjC,MAAM,CACP,CAAA;YACD,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;YAC3E,OAAO,OAAO,CAAA;QAChB,CAAC;QACD,MAAM,MAAM,GAAG,WAAW,CACxB,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,GAAG,CAAC,WAAW,EAAE,KAAK,CAAC,QAAQ,CAAC,EAC/E,sBAAsB,EACtB,KAAK,CAAC,QAAQ,CACf,CAAA;QACD,MAAM,OAAO,GAAG,WAAW,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,MAAM,CAAC,CAAA;QAC7D,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;QAC3E,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,4CAA4C;IAC5C,KAAK,CAAC,SAAS,CACb,WAAmB,EACnB,EAAU,EACV,KAA4B;QAE5B,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,QAAQ,GAAG,WAAW,CAC1B,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC,EACnE,sBAAsB,EACtB,EAAE,CACH,CAAA;QACD,MAAM,OAAO,GAAG,EAAE,GAAG,QAAQ,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAA;QACrD,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;QAC3E,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,0EAA0E;IAC1E,KAAK,CAAC,aAAa,CAAC,WAAmB,EAAE,EAAU;QACjD,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,IAAI,CAAC,IAAI,CAAC,8BAA8B,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAA;IAChG,CAAC;IAED,8EAA8E;IAE9E,sEAAsE;IACtE,KAAK,CAAC,YAAY,CAAC,WAAmB;QACpC,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC,CAAA;QAC7C,OAAO,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;IAC7D,CAAC;IAED,2CAA2C;IAC3C,KAAK,CAAC,aAAa,CACjB,WAAmB,EACnB,KAAgC;QAEhC,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,OAAO,GAAmB;YAC9B,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;YACrC,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,IAAI;YAC9B,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,IAAI;YAC9B,SAAS,EAAE,KAAK,CAAC,SAAS,IAAI,IAAI;YAClC,MAAM,EAAE,QAAQ;YAChB,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;SACjC,CAAA;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;QACrE,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,+EAA+E;IAC/E,KAAK,CAAC,aAAa,CAAC,WAAmB,EAAE,EAAU;QACjD,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC,CAAA;QAC9E,IAAI,QAAQ,EAAE,MAAM,KAAK,SAAS,EAAE,CAAC;YACnC,MAAM,IAAI,aAAa,CAAC,qCAAqC,CAAC,CAAA;QAChE,CAAC;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,MAAM,CAAC,WAAW,EAAE,EAAE,CAAC,CAAA;IAClE,CAAC;IAED,8EAA8E;IAE9E,KAAK,CAAC,eAAe,CAAC,WAAmB;QACvC,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,OAAO,IAAI,CAAC,IAAI,CAAC,2BAA2B,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;IAChE,CAAC;IAED,2DAA2D;IAC3D,KAAK,CAAC,aAAa,CAAC,WAAmB,EAAE,EAAU;QACjD,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,OAAO,uBAAuB,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,EAAE,EAAE,CAAC,CAAA;IAC5D,CAAC;IAED,4FAA4F;IAC5F,KAAK,CAAC,gBAAgB,CACpB,WAAmB,EACnB,KAAmC;QAEnC,MAAM,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAClE,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QAC7C,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,eAAe,CAAC,IAAI,KAAK,CAAC,SAAS,wCAAwC,CAAC,CAAA;QACxF,CAAC;QACD,qFAAqF;QACrF,oFAAoF;QACpF,oEAAoE;QACpE,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YAChC,MAAM,IAAI,eAAe,CACvB,QAAQ,KAAK,CAAC,SAAS,0FAA0F,CAClH,CAAA;QACH,CAAC;QACD,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,MAAM,IAAI,eAAe,CACvB,oEAAoE,CACrE,CAAA;QACH,CAAC;QACD,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QAC9C,IAAI,KAAK,GAAG,iBAAiB,EAAE,CAAC;YAC9B,MAAM,IAAI,eAAe,CACvB,0BAA0B,KAAK,wBAAwB,iBAAiB,yBAAyB,CAClG,CAAA;QACH,CAAC;QACD,MAAM,UAAU,GAAsB;YACpC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;YACrC,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,IAAI,CAAC,iBAAiB,EAAE;YACxD,OAAO;YACP,MAAM,EAAE,OAAO;YACf,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,IAAI;YACxC,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;YAChC,SAAS,EAAE,IAAI;SAChB,CAAA;QACD,MAAM,IAAI,CAAC,IAAI,CAAC,2BAA2B,CAAC,MAAM,CAAC,WAAW,EAAE,UAAU,CAAC,CAAA;QAC3E,OAAO,UAAU,CAAA;IACnB,CAAC;IAED,8EAA8E;IAE9E,2FAA2F;IACnF,eAAe,CAAC,SAAiB,EAAE,YAA2B;QACpE,MAAM,SAAS,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,YAAY,YAAY,EAAE,CAAC,CAAC,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAAA;QACzF,MAAM,MAAM,GACV,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAA;QAC9F,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,eAAe,CAAC,sCAAsC,SAAS,GAAG,CAAC,CAAA;QAC1F,OAAO,MAAM,CAAA;IACf,CAAC;IAED,sFAAsF;IAC9E,KAAK,CAAC,qBAAqB,CAAC,WAAmB;QACrD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1E,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,OAAM;QAC9B,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC;YACjE,MAAM,IAAI,CAAC,IAAI,CAAC,wBAAwB,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAA;QACvE,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACK,iBAAiB;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,CAAA;QACrC,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,eAAe,CACvB,sFAAsF,CACvF,CAAA;QACH,CAAC;QACD,OAAO,GAAG,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,KAAK,EAAE,CAAA;IACvC,CAAC;CACF"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { SandboxExpectation, SandboxFixture, SandboxGradeDimension, SandboxObjectiveResult } from '@cat-factory/contracts';
|
|
2
|
+
import type { ModelRef } from '@cat-factory/kernel';
|
|
3
|
+
import { type ExpectationScore, type Rubric } from '@cat-factory/sandbox';
|
|
4
|
+
export { extractJson } from '@cat-factory/kernel';
|
|
5
|
+
/** Split a model catalog id (`provider:model`) into a {@link ModelRef}. */
|
|
6
|
+
export declare function parseModelCatalogId(id: string): ModelRef;
|
|
7
|
+
/**
|
|
8
|
+
* Render an inline fixture's payload into the task input the candidate reasons over (its
|
|
9
|
+
* system prompt carries the role instructions) and the judge grades against. Defensive:
|
|
10
|
+
* the payload is a `Record<string, unknown>` matching a `RequirementsContext` /
|
|
11
|
+
* `ClarityContext` (a `block` + optional `docs`/`tasks`) or a reviewer `AgentRunContext`
|
|
12
|
+
* (a `block` + the work-to-review in `priorOutputs`), so it reads each field tolerantly.
|
|
13
|
+
*/
|
|
14
|
+
export declare function renderFixtureInput(fixture: SandboxFixture): string;
|
|
15
|
+
/** System prompt for the Sandbox judge — a reference-free rubric grader. */
|
|
16
|
+
export declare const JUDGE_SYSTEM_PROMPT: string;
|
|
17
|
+
/** Build the judge user prompt for one cell: rubric + task input + candidate output + expectations. */
|
|
18
|
+
export declare function buildJudgePrompt(rubric: Rubric, taskInput: string, output: string, expectations: readonly SandboxExpectation[]): string;
|
|
19
|
+
/**
|
|
20
|
+
* Coerce a judge's raw JSON into one score per rubric dimension. Scores are clamped to
|
|
21
|
+
* [1,5]; a dimension the judge omitted (or scored non-numerically) defaults to 1 with a
|
|
22
|
+
* note, so the weighted mean never silently drops a dimension from its denominator.
|
|
23
|
+
*/
|
|
24
|
+
export declare function coerceJudgeScores(rubric: Rubric, raw: unknown): SandboxGradeDimension[];
|
|
25
|
+
/** Project the deterministic objective score into the wire `SandboxObjectiveResult` (findings). */
|
|
26
|
+
export declare function toFindingsObjectiveResult(score: ExpectationScore): SandboxObjectiveResult;
|
|
27
|
+
/** Score a candidate's output against a fixture's `findings` objective, if it declares one. */
|
|
28
|
+
export declare function objectiveFor(fixture: SandboxFixture, output: string): SandboxObjectiveResult | null;
|
|
29
|
+
/**
|
|
30
|
+
* How many rubric dimensions the judge actually scored with a usable numeric value. The
|
|
31
|
+
* run-driver treats a count of 0 as a grading FAILURE (record an error on the cell) rather
|
|
32
|
+
* than letting {@link coerceJudgeScores} silently floor every dimension to 1 — an
|
|
33
|
+
* unparseable / empty / reasoning-only judge reply must not masquerade as a confident
|
|
34
|
+
* bottom-of-scale grade.
|
|
35
|
+
*/
|
|
36
|
+
export declare function gradedDimensionCount(rubric: Rubric, raw: unknown): number;
|
|
37
|
+
//# sourceMappingURL=sandbox.logic.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sandbox.logic.d.ts","sourceRoot":"","sources":["../../../src/modules/sandbox/sandbox.logic.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAClB,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACvB,MAAM,wBAAwB,CAAA;AAC/B,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAA;AAEnD,OAAO,EACL,KAAK,gBAAgB,EACrB,KAAK,MAAM,EAGZ,MAAM,sBAAsB,CAAA;AAK7B,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAOjD,2EAA2E;AAC3E,wBAAgB,mBAAmB,CAAC,EAAE,EAAE,MAAM,GAAG,QAAQ,CAIxD;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,cAAc,GAAG,MAAM,CAyClE;AAED,4EAA4E;AAC5E,eAAO,MAAM,mBAAmB,QAUrB,CAAA;AAEX,uGAAuG;AACvG,wBAAgB,gBAAgB,CAC9B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,SAAS,kBAAkB,EAAE,GAC1C,MAAM,CAsBR;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,GAAG,qBAAqB,EAAE,CAavF;AAED,mGAAmG;AACnG,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,gBAAgB,GAAG,sBAAsB,CAWzF;AAED,+FAA+F;AAC/F,wBAAgB,YAAY,CAC1B,OAAO,EAAE,cAAc,EACvB,MAAM,EAAE,MAAM,GACb,sBAAsB,GAAG,IAAI,CAI/B;AAED;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,OAAO,GAAG,MAAM,CAOzE"}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import { FINAL_ANSWER_IN_REPLY } from '@cat-factory/agents';
|
|
2
|
+
import { renderExpectationBrief, scoreExpectations, } from '@cat-factory/sandbox';
|
|
3
|
+
// The robust LLM-reply JSON extractor lives in the kernel (one copy shared by the
|
|
4
|
+
// requirements reviewer, the document planner, and the Sandbox judge). Re-exported here
|
|
5
|
+
// so the run-driver imports it alongside the other judge helpers.
|
|
6
|
+
export { extractJson } from '@cat-factory/kernel';
|
|
7
|
+
// Pure helpers for the Sandbox run-driver + judge. Kept side-effect-free (no LLM/IO,
|
|
8
|
+
// no clock/identity) so the candidate-input rendering, judge-prompt assembly, score
|
|
9
|
+
// coercion and objective projection are deterministic and unit-testable; the service
|
|
10
|
+
// wraps them with the model-provider calls + persistence.
|
|
11
|
+
/** Split a model catalog id (`provider:model`) into a {@link ModelRef}. */
|
|
12
|
+
export function parseModelCatalogId(id) {
|
|
13
|
+
const idx = id.indexOf(':');
|
|
14
|
+
if (idx === -1)
|
|
15
|
+
return { provider: id, model: '' };
|
|
16
|
+
return { provider: id.slice(0, idx), model: id.slice(idx + 1) };
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Render an inline fixture's payload into the task input the candidate reasons over (its
|
|
20
|
+
* system prompt carries the role instructions) and the judge grades against. Defensive:
|
|
21
|
+
* the payload is a `Record<string, unknown>` matching a `RequirementsContext` /
|
|
22
|
+
* `ClarityContext` (a `block` + optional `docs`/`tasks`) or a reviewer `AgentRunContext`
|
|
23
|
+
* (a `block` + the work-to-review in `priorOutputs`), so it reads each field tolerantly.
|
|
24
|
+
*/
|
|
25
|
+
export function renderFixtureInput(fixture) {
|
|
26
|
+
const payload = (fixture.payload ?? {});
|
|
27
|
+
const parts = [];
|
|
28
|
+
const block = payload.block;
|
|
29
|
+
if (block) {
|
|
30
|
+
const heading = block.type ? `${block.title ?? 'Untitled'} (${block.type})` : block.title;
|
|
31
|
+
parts.push(`# ${heading ?? 'Untitled'}`);
|
|
32
|
+
if (block.description)
|
|
33
|
+
parts.push(block.description);
|
|
34
|
+
}
|
|
35
|
+
const docs = Array.isArray(payload.docs) ? payload.docs : [];
|
|
36
|
+
if (docs.length > 0) {
|
|
37
|
+
parts.push('## Linked documents');
|
|
38
|
+
for (const doc of docs) {
|
|
39
|
+
const d = doc;
|
|
40
|
+
parts.push(`### ${d.title ?? 'Document'}\n${d.body ?? d.content ?? ''}`.trim());
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
const tasks = Array.isArray(payload.tasks) ? payload.tasks : [];
|
|
44
|
+
if (tasks.length > 0) {
|
|
45
|
+
parts.push('## Linked tracker issues');
|
|
46
|
+
for (const task of tasks) {
|
|
47
|
+
const t = task;
|
|
48
|
+
parts.push(`- ${t.title ?? 'Issue'}${t.body ? `: ${t.body}` : ''}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const priorOutputs = Array.isArray(payload.priorOutputs)
|
|
52
|
+
? payload.priorOutputs
|
|
53
|
+
: [];
|
|
54
|
+
if (priorOutputs.length > 0) {
|
|
55
|
+
parts.push('## Work from earlier agents');
|
|
56
|
+
for (const prior of priorOutputs) {
|
|
57
|
+
const p = prior;
|
|
58
|
+
parts.push(`### ${p.agentKind ?? 'agent'}\n${p.output ?? ''}`.trim());
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return parts.join('\n\n').trim();
|
|
62
|
+
}
|
|
63
|
+
/** System prompt for the Sandbox judge — a reference-free rubric grader. */
|
|
64
|
+
export const JUDGE_SYSTEM_PROMPT = [
|
|
65
|
+
"You are a meticulous, impartial evaluator. You grade an AI agent's output for a given",
|
|
66
|
+
'task against a fixed rubric, scoring each dimension from 1 (poor) to 5 (excellent).',
|
|
67
|
+
'Judge ONLY against the task input and the candidate output you are given — never invent',
|
|
68
|
+
'context. Be calibrated: reserve 5 for genuinely excellent work and 1 for output that',
|
|
69
|
+
'fails the dimension outright. Your entire visible reply MUST be the requested JSON object',
|
|
70
|
+
'and nothing else.',
|
|
71
|
+
// The judge's deliverable IS its (parsed) final reply, so append the shared directive
|
|
72
|
+
// that keeps reasoning models from emitting the answer into a hidden thinking channel.
|
|
73
|
+
FINAL_ANSWER_IN_REPLY,
|
|
74
|
+
].join(' ');
|
|
75
|
+
/** Build the judge user prompt for one cell: rubric + task input + candidate output + expectations. */
|
|
76
|
+
export function buildJudgePrompt(rubric, taskInput, output, expectations) {
|
|
77
|
+
const dims = rubric.dimensions
|
|
78
|
+
.map((d) => `- "${d.key}" — ${d.label}: ${d.description}`)
|
|
79
|
+
.join('\n');
|
|
80
|
+
const brief = renderExpectationBrief(expectations);
|
|
81
|
+
return [
|
|
82
|
+
`You are grading an AI agent's output for a "${rubric.task}" task.`,
|
|
83
|
+
'',
|
|
84
|
+
'## Task input',
|
|
85
|
+
taskInput || '(no task input was supplied)',
|
|
86
|
+
'',
|
|
87
|
+
'## Candidate output',
|
|
88
|
+
output.trim() || '(the candidate produced no output)',
|
|
89
|
+
'',
|
|
90
|
+
'## Rubric — score every dimension from 1 to 5',
|
|
91
|
+
dims,
|
|
92
|
+
...(brief ? ['', brief] : []),
|
|
93
|
+
'',
|
|
94
|
+
'Respond with ONLY this JSON object (no prose, no code fences):',
|
|
95
|
+
'{"scores":[{"key":"<dimension key>","score":<1-5>,"rationale":"<one short sentence>"}]}',
|
|
96
|
+
'Include every dimension key exactly once.',
|
|
97
|
+
].join('\n');
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Coerce a judge's raw JSON into one score per rubric dimension. Scores are clamped to
|
|
101
|
+
* [1,5]; a dimension the judge omitted (or scored non-numerically) defaults to 1 with a
|
|
102
|
+
* note, so the weighted mean never silently drops a dimension from its denominator.
|
|
103
|
+
*/
|
|
104
|
+
export function coerceJudgeScores(rubric, raw) {
|
|
105
|
+
const byKey = scoreEntriesByKey(raw);
|
|
106
|
+
return rubric.dimensions.map((dim) => {
|
|
107
|
+
const found = byKey.get(dim.key);
|
|
108
|
+
const score = clampScore(found?.score);
|
|
109
|
+
const rationale = typeof found?.rationale === 'string' && found.rationale.trim()
|
|
110
|
+
? found.rationale.trim()
|
|
111
|
+
: score === null
|
|
112
|
+
? 'Judge did not score this dimension.'
|
|
113
|
+
: '';
|
|
114
|
+
return { key: dim.key, score: score ?? 1, rationale };
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
/** Project the deterministic objective score into the wire `SandboxObjectiveResult` (findings). */
|
|
118
|
+
export function toFindingsObjectiveResult(score) {
|
|
119
|
+
return {
|
|
120
|
+
kind: 'findings',
|
|
121
|
+
pass: score.missedHighImpact.length === 0,
|
|
122
|
+
detail: `Caught ${score.caught.length}/${score.caught.length + score.missed.length} expected findings; impact recall ${score.impactRecall}, wow bonus ${score.wowBonus}.`,
|
|
123
|
+
impactRecall: score.impactRecall,
|
|
124
|
+
wowBonus: score.wowBonus,
|
|
125
|
+
caught: score.caught.length,
|
|
126
|
+
total: score.caught.length + score.missed.length,
|
|
127
|
+
missedHighImpact: score.missedHighImpact,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
/** Score a candidate's output against a fixture's `findings` objective, if it declares one. */
|
|
131
|
+
export function objectiveFor(fixture, output) {
|
|
132
|
+
const objective = fixture.objective;
|
|
133
|
+
if (!objective || objective.kind !== 'findings')
|
|
134
|
+
return null;
|
|
135
|
+
return toFindingsObjectiveResult(scoreExpectations(objective.expectations, output));
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* How many rubric dimensions the judge actually scored with a usable numeric value. The
|
|
139
|
+
* run-driver treats a count of 0 as a grading FAILURE (record an error on the cell) rather
|
|
140
|
+
* than letting {@link coerceJudgeScores} silently floor every dimension to 1 — an
|
|
141
|
+
* unparseable / empty / reasoning-only judge reply must not masquerade as a confident
|
|
142
|
+
* bottom-of-scale grade.
|
|
143
|
+
*/
|
|
144
|
+
export function gradedDimensionCount(rubric, raw) {
|
|
145
|
+
const byKey = scoreEntriesByKey(raw);
|
|
146
|
+
let count = 0;
|
|
147
|
+
for (const dim of rubric.dimensions) {
|
|
148
|
+
if (clampScore(byKey.get(dim.key)?.score) !== null)
|
|
149
|
+
count++;
|
|
150
|
+
}
|
|
151
|
+
return count;
|
|
152
|
+
}
|
|
153
|
+
function scoreEntriesByKey(raw) {
|
|
154
|
+
const byKey = new Map();
|
|
155
|
+
for (const entry of extractScoreArray(raw)) {
|
|
156
|
+
if (entry &&
|
|
157
|
+
typeof entry === 'object' &&
|
|
158
|
+
typeof entry.key === 'string') {
|
|
159
|
+
byKey.set(entry.key, entry);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return byKey;
|
|
163
|
+
}
|
|
164
|
+
function extractScoreArray(raw) {
|
|
165
|
+
if (Array.isArray(raw))
|
|
166
|
+
return raw;
|
|
167
|
+
if (raw && typeof raw === 'object' && Array.isArray(raw.scores)) {
|
|
168
|
+
return raw.scores;
|
|
169
|
+
}
|
|
170
|
+
return [];
|
|
171
|
+
}
|
|
172
|
+
function clampScore(value) {
|
|
173
|
+
const n = typeof value === 'number' ? value : typeof value === 'string' ? Number(value) : NaN;
|
|
174
|
+
if (!Number.isFinite(n))
|
|
175
|
+
return null;
|
|
176
|
+
return Math.min(5, Math.max(1, n));
|
|
177
|
+
}
|
|
178
|
+
//# sourceMappingURL=sandbox.logic.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sandbox.logic.js","sourceRoot":"","sources":["../../../src/modules/sandbox/sandbox.logic.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAA;AAC3D,OAAO,EAGL,sBAAsB,EACtB,iBAAiB,GAClB,MAAM,sBAAsB,CAAA;AAE7B,kFAAkF;AAClF,wFAAwF;AACxF,kEAAkE;AAClE,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAEjD,qFAAqF;AACrF,oFAAoF;AACpF,qFAAqF;AACrF,0DAA0D;AAE1D,2EAA2E;AAC3E,MAAM,UAAU,mBAAmB,CAAC,EAAU;IAC5C,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC3B,IAAI,GAAG,KAAK,CAAC,CAAC;QAAE,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;IAClD,OAAO,EAAE,QAAQ,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,EAAE,CAAA;AACjE,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAuB;IACxD,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAA4B,CAAA;IAClE,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,KAA4E,CAAA;IAClG,IAAI,KAAK,EAAE,CAAC;QACV,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,KAAK,IAAI,UAAU,KAAK,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAA;QACzF,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,IAAI,UAAU,EAAE,CAAC,CAAA;QACxC,IAAI,KAAK,CAAC,WAAW;YAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAA;IACtD,CAAC;IAED,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAE,OAAO,CAAC,IAAkB,CAAC,CAAC,CAAC,EAAE,CAAA;IAC3E,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpB,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAA;QACjC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,CAAC,GAAG,GAA0D,CAAA;YACpE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,UAAU,KAAK,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,CAAA;QACjF,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAE,OAAO,CAAC,KAAmB,CAAC,CAAC,CAAC,EAAE,CAAA;IAC9E,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAA;QACtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,CAAC,GAAG,IAAyC,CAAA;YACnD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,KAAK,IAAI,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAA;QACrE,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,YAAY,CAAC;QACtD,CAAC,CAAE,OAAO,CAAC,YAA0B;QACrC,CAAC,CAAC,EAAE,CAAA;IACN,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAA;QACzC,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;YACjC,MAAM,CAAC,GAAG,KAAgD,CAAA;YAC1D,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,SAAS,IAAI,OAAO,KAAK,CAAC,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,CAAA;QACvE,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAA;AAClC,CAAC;AAED,4EAA4E;AAC5E,MAAM,CAAC,MAAM,mBAAmB,GAAG;IACjC,uFAAuF;IACvF,qFAAqF;IACrF,yFAAyF;IACzF,sFAAsF;IACtF,2FAA2F;IAC3F,mBAAmB;IACnB,sFAAsF;IACtF,uFAAuF;IACvF,qBAAqB;CACtB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAEX,uGAAuG;AACvG,MAAM,UAAU,gBAAgB,CAC9B,MAAc,EACd,SAAiB,EACjB,MAAc,EACd,YAA2C;IAE3C,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU;SAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;SACzD,IAAI,CAAC,IAAI,CAAC,CAAA;IACb,MAAM,KAAK,GAAG,sBAAsB,CAAC,YAAY,CAAC,CAAA;IAClD,OAAO;QACL,+CAA+C,MAAM,CAAC,IAAI,SAAS;QACnE,EAAE;QACF,eAAe;QACf,SAAS,IAAI,8BAA8B;QAC3C,EAAE;QACF,qBAAqB;QACrB,MAAM,CAAC,IAAI,EAAE,IAAI,oCAAoC;QACrD,EAAE;QACF,+CAA+C;QAC/C,IAAI;QACJ,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7B,EAAE;QACF,gEAAgE;QAChE,yFAAyF;QACzF,2CAA2C;KAC5C,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAAc,EAAE,GAAY;IAC5D,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAA;IACpC,OAAO,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACnC,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;QAChC,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;QACtC,MAAM,SAAS,GACb,OAAO,KAAK,EAAE,SAAS,KAAK,QAAQ,IAAI,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE;YAC5D,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE;YACxB,CAAC,CAAC,KAAK,KAAK,IAAI;gBACd,CAAC,CAAC,qCAAqC;gBACvC,CAAC,CAAC,EAAE,CAAA;QACV,OAAO,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,KAAK,IAAI,CAAC,EAAE,SAAS,EAAE,CAAA;IACvD,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,mGAAmG;AACnG,MAAM,UAAU,yBAAyB,CAAC,KAAuB;IAC/D,OAAO;QACL,IAAI,EAAE,UAAU;QAChB,IAAI,EAAE,KAAK,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC;QACzC,MAAM,EAAE,UAAU,KAAK,CAAC,MAAM,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,qCAAqC,KAAK,CAAC,YAAY,eAAe,KAAK,CAAC,QAAQ,GAAG;QACzK,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM;QAC3B,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM;QAChD,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;KACzC,CAAA;AACH,CAAC;AAED,+FAA+F;AAC/F,MAAM,UAAU,YAAY,CAC1B,OAAuB,EACvB,MAAc;IAEd,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,CAAA;IACnC,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,IAAI,KAAK,UAAU;QAAE,OAAO,IAAI,CAAA;IAC5D,OAAO,yBAAyB,CAAC,iBAAiB,CAAC,SAAS,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;AACrF,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,oBAAoB,CAAC,MAAc,EAAE,GAAY;IAC/D,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAA;IACpC,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACpC,IAAI,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,KAAK,IAAI;YAAE,KAAK,EAAE,CAAA;IAC7D,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAY;IACrC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAoD,CAAA;IACzE,KAAK,MAAM,KAAK,IAAI,iBAAiB,CAAC,GAAG,CAAC,EAAE,CAAC;QAC3C,IACE,KAAK;YACL,OAAO,KAAK,KAAK,QAAQ;YACzB,OAAQ,KAA2B,CAAC,GAAG,KAAK,QAAQ,EACpD,CAAC;YACD,KAAK,CAAC,GAAG,CAAE,KAAyB,CAAC,GAAG,EAAE,KAAiD,CAAC,CAAA;QAC9F,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC;AAED,SAAS,iBAAiB,CAAC,GAAY;IACrC,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,OAAO,GAAG,CAAA;IAClC,IAAI,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAE,GAA4B,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1F,OAAQ,GAA6B,CAAC,MAAM,CAAA;IAC9C,CAAC;IACD,OAAO,EAAE,CAAA;AACX,CAAC;AAED,SAAS,UAAU,CAAC,KAAc;IAChC,MAAM,CAAC,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAA;IAC7F,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAA;IACpC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;AACpC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WorkspaceSettingsService.d.ts","sourceRoot":"","sources":["../../../src/modules/settings/WorkspaceSettingsService.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,4BAA4B,EAC5B,mBAAmB,EACnB,iBAAiB,EACjB,2BAA2B,EAC5B,MAAM,qBAAqB,CAAA;AAG5B,MAAM,WAAW,oCAAoC;IACnD,2BAA2B,EAAE,2BAA2B,CAAA;IACxD,mBAAmB,EAAE,mBAAmB,CAAA;CACzC;AAED;;;;;;GAMG;AACH,qBAAa,wBAAwB;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA6B;IACtD,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAqB;IAEzD,YAAY,IAAI,EAAE,oCAAoC,EAGrD;IAED,0FAA0F;IACpF,GAAG,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAEzD;IAED,kEAAkE;IAC5D,MAAM,CACV,WAAW,EAAE,MAAM,EACnB,KAAK,EAAE,4BAA4B,GAClC,OAAO,CAAC,iBAAiB,CAAC,
|
|
1
|
+
{"version":3,"file":"WorkspaceSettingsService.d.ts","sourceRoot":"","sources":["../../../src/modules/settings/WorkspaceSettingsService.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,4BAA4B,EAC5B,mBAAmB,EACnB,iBAAiB,EACjB,2BAA2B,EAC5B,MAAM,qBAAqB,CAAA;AAG5B,MAAM,WAAW,oCAAoC;IACnD,2BAA2B,EAAE,2BAA2B,CAAA;IACxD,mBAAmB,EAAE,mBAAmB,CAAA;CACzC;AAED;;;;;;GAMG;AACH,qBAAa,wBAAwB;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA6B;IACtD,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAqB;IAEzD,YAAY,IAAI,EAAE,oCAAoC,EAGrD;IAED,0FAA0F;IACpF,GAAG,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAEzD;IAED,kEAAkE;IAC5D,MAAM,CACV,WAAW,EAAE,MAAM,EACnB,KAAK,EAAE,4BAA4B,GAClC,OAAO,CAAC,iBAAiB,CAAC,CA+B5B;CACF"}
|
|
@@ -26,6 +26,9 @@ export class WorkspaceSettingsService {
|
|
|
26
26
|
taskLimitMode: patch.taskLimitMode ?? current.taskLimitMode,
|
|
27
27
|
taskLimitShared: patch.taskLimitShared !== undefined ? patch.taskLimitShared : current.taskLimitShared,
|
|
28
28
|
taskLimitPerType: patch.taskLimitPerType !== undefined ? patch.taskLimitPerType : current.taskLimitPerType,
|
|
29
|
+
spendCurrency: patch.spendCurrency !== undefined ? patch.spendCurrency : current.spendCurrency,
|
|
30
|
+
spendMonthlyLimit: patch.spendMonthlyLimit !== undefined ? patch.spendMonthlyLimit : current.spendMonthlyLimit,
|
|
31
|
+
spendModelPrices: patch.spendModelPrices !== undefined ? patch.spendModelPrices : current.spendModelPrices,
|
|
29
32
|
};
|
|
30
33
|
// Keep the limit fields consistent with the mode so the enforcement logic + UI never
|
|
31
34
|
// read a stale cap from an inactive mode.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WorkspaceSettingsService.js","sourceRoot":"","sources":["../../../src/modules/settings/WorkspaceSettingsService.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,0BAA0B,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AAOlF;;;;;;GAMG;AACH,MAAM,OAAO,wBAAwB;IAClB,QAAQ,CAA6B;IACrC,mBAAmB,CAAqB;IAEzD,YAAY,IAA0C;QACpD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,2BAA2B,CAAA;QAChD,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,CAAA;IACrD,CAAC;IAED,0FAA0F;IAC1F,KAAK,CAAC,GAAG,CAAC,WAAmB;QAC3B,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,GAAG,0BAA0B,EAAE,CAAA;IACpF,CAAC;IAED,kEAAkE;IAClE,KAAK,CAAC,MAAM,CACV,WAAmB,EACnB,KAAmC;QAEnC,MAAM,gBAAgB,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAC7D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAA;QAC3C,MAAM,IAAI,GAAsB;YAC9B,wBAAwB,EAAE,KAAK,CAAC,wBAAwB,IAAI,OAAO,CAAC,wBAAwB;YAC5F,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,OAAO,CAAC,aAAa;YAC3D,eAAe,EACb,KAAK,CAAC,eAAe,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC,eAAe;YACvF,gBAAgB,EACd,KAAK,CAAC,gBAAgB,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB;SAC3F,CAAA;QACD,qFAAqF;QACrF,0CAA0C;QAC1C,IAAI,IAAI,CAAC,aAAa,KAAK,KAAK,EAAE,CAAC;YACjC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAA;YAC3B,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAA;QAC9B,CAAC;aAAM,IAAI,IAAI,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;YAC3C,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAA;YAC5B,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI;gBAAE,IAAI,CAAC,eAAe,GAAG,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,eAAe,GAAG,IAAI,CAAA;YAC3B,IAAI,IAAI,CAAC,gBAAgB,IAAI,IAAI;gBAAE,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAA;QAC/D,CAAC;QACD,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,IAAI,CAAC,CAAA;QAC7C,OAAO,IAAI,CAAA;IACb,CAAC;CACF"}
|
|
1
|
+
{"version":3,"file":"WorkspaceSettingsService.js","sourceRoot":"","sources":["../../../src/modules/settings/WorkspaceSettingsService.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,0BAA0B,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAA;AAOlF;;;;;;GAMG;AACH,MAAM,OAAO,wBAAwB;IAClB,QAAQ,CAA6B;IACrC,mBAAmB,CAAqB;IAEzD,YAAY,IAA0C;QACpD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,2BAA2B,CAAA;QAChD,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,CAAA;IACrD,CAAC;IAED,0FAA0F;IAC1F,KAAK,CAAC,GAAG,CAAC,WAAmB;QAC3B,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,GAAG,0BAA0B,EAAE,CAAA;IACpF,CAAC;IAED,kEAAkE;IAClE,KAAK,CAAC,MAAM,CACV,WAAmB,EACnB,KAAmC;QAEnC,MAAM,gBAAgB,CAAC,IAAI,CAAC,mBAAmB,EAAE,WAAW,CAAC,CAAA;QAC7D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAA;QAC3C,MAAM,IAAI,GAAsB;YAC9B,wBAAwB,EAAE,KAAK,CAAC,wBAAwB,IAAI,OAAO,CAAC,wBAAwB;YAC5F,aAAa,EAAE,KAAK,CAAC,aAAa,IAAI,OAAO,CAAC,aAAa;YAC3D,eAAe,EACb,KAAK,CAAC,eAAe,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC,eAAe;YACvF,gBAAgB,EACd,KAAK,CAAC,gBAAgB,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB;YAC1F,aAAa,EACX,KAAK,CAAC,aAAa,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa;YACjF,iBAAiB,EACf,KAAK,CAAC,iBAAiB,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC,CAAC,OAAO,CAAC,iBAAiB;YAC7F,gBAAgB,EACd,KAAK,CAAC,gBAAgB,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB;SAC3F,CAAA;QACD,qFAAqF;QACrF,0CAA0C;QAC1C,IAAI,IAAI,CAAC,aAAa,KAAK,KAAK,EAAE,CAAC;YACjC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAA;YAC3B,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAA;QAC9B,CAAC;aAAM,IAAI,IAAI,CAAC,aAAa,KAAK,QAAQ,EAAE,CAAC;YAC3C,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAA;YAC5B,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI;gBAAE,IAAI,CAAC,eAAe,GAAG,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,eAAe,GAAG,IAAI,CAAA;YAC3B,IAAI,IAAI,CAAC,gBAAgB,IAAI,IAAI;gBAAE,IAAI,CAAC,gBAAgB,GAAG,EAAE,CAAA;QAC/D,CAAC;QACD,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,IAAI,CAAC,CAAA;QAC7C,OAAO,IAAI,CAAA;IACb,CAAC;CACF"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cat-factory/orchestration",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.21.0",
|
|
4
4
|
"description": "Delivery-workflow engine for the Agent Architecture Board (execution, bootstrap, pipelines, board, boardScan, requirements, and composition root).",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -25,13 +25,14 @@
|
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"ai": "^6.0.209",
|
|
28
|
-
"@cat-factory/agents": "0.14.
|
|
29
|
-
"@cat-factory/contracts": "0.
|
|
30
|
-
"@cat-factory/integrations": "0.
|
|
31
|
-
"@cat-factory/kernel": "0.
|
|
32
|
-
"@cat-factory/prompt-fragments": "0.7.
|
|
33
|
-
"@cat-factory/
|
|
34
|
-
"@cat-factory/
|
|
28
|
+
"@cat-factory/agents": "0.14.7",
|
|
29
|
+
"@cat-factory/contracts": "0.25.0",
|
|
30
|
+
"@cat-factory/integrations": "0.20.0",
|
|
31
|
+
"@cat-factory/kernel": "0.28.0",
|
|
32
|
+
"@cat-factory/prompt-fragments": "0.7.22",
|
|
33
|
+
"@cat-factory/sandbox": "0.8.0",
|
|
34
|
+
"@cat-factory/spend": "0.9.1",
|
|
35
|
+
"@cat-factory/workspaces": "0.7.34"
|
|
35
36
|
},
|
|
36
37
|
"devDependencies": {
|
|
37
38
|
"typescript": "7.0.1-rc",
|