@cat-factory/sandbox 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ import type { SandboxPromptVersion } from '@cat-factory/kernel';
2
+ import type { SandboxTaskType } from './rubrics.js';
3
+ export type SandboxAgentBucket = 'inline' | 'container';
4
+ export interface SandboxAgentKindMeta {
5
+ /** The agent kind (matches `AgentKind` strings used across the product). */
6
+ agentKind: string;
7
+ /** A short human label for the Sandbox prompt browser. */
8
+ label: string;
9
+ /** Inline kinds run a single LLM call; container kinds need a real checkout. */
10
+ bucket: SandboxAgentBucket;
11
+ /** Which rubric the judge grades this kind's output against. */
12
+ rubric: SandboxTaskType;
13
+ /**
14
+ * The version-controlled baseline prompt id (a `PROMPT_VERSIONS` key) this kind's
15
+ * system prompt comes from. When null, the baseline text is read from
16
+ * `systemPromptFor(agentKind)` and labelled `<kind>@v1`.
17
+ */
18
+ basePromptId: string | null;
19
+ }
20
+ /** The testable-kind catalog. Ordered for stable display (inline-first, then container). */
21
+ export declare const SANDBOX_AGENT_KINDS: readonly SandboxAgentKindMeta[];
22
+ /** Metadata for a testable agent kind, or undefined if the kind is not in the catalog. */
23
+ export declare function sandboxKindMeta(agentKind: string): SandboxAgentKindMeta | undefined;
24
+ /** The current shipped system-prompt text + `id@vN` label for a catalog kind. */
25
+ export declare function baselinePromptText(meta: SandboxAgentKindMeta): {
26
+ text: string;
27
+ label: string;
28
+ };
29
+ /**
30
+ * Enumerate every shipped baseline as a synthetic (un-persisted) {@link SandboxPromptVersion}.
31
+ * These are version 0, origin `baseline`, with no parent/lineage of their own — the prompt
32
+ * browser groups them by agent kind and offers "clone" to start an editable candidate lineage.
33
+ */
34
+ export declare function listBaselines(now: number): SandboxPromptVersion[];
35
+ //# sourceMappingURL=baselines.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"baselines.d.ts","sourceRoot":"","sources":["../src/baselines.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAA;AAC/D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAWnD,MAAM,MAAM,kBAAkB,GAAG,QAAQ,GAAG,WAAW,CAAA;AAEvD,MAAM,WAAW,oBAAoB;IACnC,4EAA4E;IAC5E,SAAS,EAAE,MAAM,CAAA;IACjB,0DAA0D;IAC1D,KAAK,EAAE,MAAM,CAAA;IACb,gFAAgF;IAChF,MAAM,EAAE,kBAAkB,CAAA;IAC1B,gEAAgE;IAChE,MAAM,EAAE,eAAe,CAAA;IACvB;;;;OAIG;IACH,YAAY,EAAE,MAAM,GAAG,IAAI,CAAA;CAC5B;AAED,4FAA4F;AAC5F,eAAO,MAAM,mBAAmB,EAAE,SAAS,oBAAoB,EAwC9D,CAAA;AAMD,0FAA0F;AAC1F,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,oBAAoB,GAAG,SAAS,CAEnF;AAED,iFAAiF;AACjF,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,oBAAoB,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAM9F;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,oBAAoB,EAAE,CAmBjE"}
@@ -0,0 +1,82 @@
1
+ import { PROMPT_VERSIONS, promptVersionLabel, systemPromptFor } from '@cat-factory/agents';
2
+ /** The testable-kind catalog. Ordered for stable display (inline-first, then container). */
3
+ export const SANDBOX_AGENT_KINDS = [
4
+ {
5
+ agentKind: 'requirements-review',
6
+ label: 'Requirements review',
7
+ bucket: 'inline',
8
+ rubric: 'requirement-review',
9
+ basePromptId: 'requirement-review',
10
+ },
11
+ {
12
+ agentKind: 'clarity-review',
13
+ label: 'Clarity (bug-report) review',
14
+ bucket: 'inline',
15
+ rubric: 'requirement-review',
16
+ basePromptId: 'clarity-review',
17
+ },
18
+ {
19
+ agentKind: 'reviewer',
20
+ label: 'Code reviewer',
21
+ bucket: 'inline',
22
+ rubric: 'code-review',
23
+ basePromptId: 'review',
24
+ },
25
+ {
26
+ // Reviews an `architect`'s design proposal (the architect-companion grades it). A
27
+ // proposal critique is graded on the same axes as a requirements review — gap
28
+ // coverage, no-hallucination, specificity. No numbered baseline prompt: the text is
29
+ // read live from `systemPromptFor('architect-companion')`.
30
+ agentKind: 'architect-companion',
31
+ label: 'Architecture-proposal review',
32
+ bucket: 'inline',
33
+ rubric: 'requirement-review',
34
+ basePromptId: null,
35
+ },
36
+ {
37
+ agentKind: 'coder',
38
+ label: 'Coder (implementation)',
39
+ bucket: 'container',
40
+ rubric: 'implementation',
41
+ basePromptId: 'build',
42
+ },
43
+ ];
44
+ const BY_KIND = new Map(SANDBOX_AGENT_KINDS.map((m) => [m.agentKind, m]));
45
+ /** Metadata for a testable agent kind, or undefined if the kind is not in the catalog. */
46
+ export function sandboxKindMeta(agentKind) {
47
+ return BY_KIND.get(agentKind);
48
+ }
49
+ /** The current shipped system-prompt text + `id@vN` label for a catalog kind. */
50
+ export function baselinePromptText(meta) {
51
+ if (meta.basePromptId && meta.basePromptId in PROMPT_VERSIONS) {
52
+ const versioned = PROMPT_VERSIONS[meta.basePromptId];
53
+ return { text: versioned.text, label: promptVersionLabel(versioned.id, versioned.version) };
54
+ }
55
+ return { text: systemPromptFor(meta.agentKind), label: promptVersionLabel(meta.agentKind, 1) };
56
+ }
57
+ /**
58
+ * Enumerate every shipped baseline as a synthetic (un-persisted) {@link SandboxPromptVersion}.
59
+ * These are version 0, origin `baseline`, with no parent/lineage of their own — the prompt
60
+ * browser groups them by agent kind and offers "clone" to start an editable candidate lineage.
61
+ */
62
+ export function listBaselines(now) {
63
+ return SANDBOX_AGENT_KINDS.map((meta) => {
64
+ const { text, label } = baselinePromptText(meta);
65
+ return {
66
+ id: `baseline:${meta.basePromptId ?? meta.agentKind}`,
67
+ lineageId: `baseline:${meta.basePromptId ?? meta.agentKind}`,
68
+ agentKind: meta.agentKind,
69
+ name: label,
70
+ origin: 'baseline',
71
+ systemText: text,
72
+ basePromptId: meta.basePromptId,
73
+ version: 0,
74
+ parentId: null,
75
+ labels: [],
76
+ createdAt: now,
77
+ createdBy: null,
78
+ archivedAt: null,
79
+ };
80
+ });
81
+ }
82
+ //# sourceMappingURL=baselines.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"baselines.js","sourceRoot":"","sources":["../src/baselines.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AAgC1F,4FAA4F;AAC5F,MAAM,CAAC,MAAM,mBAAmB,GAAoC;IAClE;QACE,SAAS,EAAE,qBAAqB;QAChC,KAAK,EAAE,qBAAqB;QAC5B,MAAM,EAAE,QAAQ;QAChB,MAAM,EAAE,oBAAoB;QAC5B,YAAY,EAAE,oBAAoB;KACnC;IACD;QACE,SAAS,EAAE,gBAAgB;QAC3B,KAAK,EAAE,6BAA6B;QACpC,MAAM,EAAE,QAAQ;QAChB,MAAM,EAAE,oBAAoB;QAC5B,YAAY,EAAE,gBAAgB;KAC/B;IACD;QACE,SAAS,EAAE,UAAU;QACrB,KAAK,EAAE,eAAe;QACtB,MAAM,EAAE,QAAQ;QAChB,MAAM,EAAE,aAAa;QACrB,YAAY,EAAE,QAAQ;KACvB;IACD;QACE,kFAAkF;QAClF,8EAA8E;QAC9E,oFAAoF;QACpF,2DAA2D;QAC3D,SAAS,EAAE,qBAAqB;QAChC,KAAK,EAAE,8BAA8B;QACrC,MAAM,EAAE,QAAQ;QAChB,MAAM,EAAE,oBAAoB;QAC5B,YAAY,EAAE,IAAI;KACnB;IACD;QACE,SAAS,EAAE,OAAO;QAClB,KAAK,EAAE,wBAAwB;QAC/B,MAAM,EAAE,WAAW;QACnB,MAAM,EAAE,gBAAgB;QACxB,YAAY,EAAE,OAAO;KACtB;CACF,CAAA;AAED,MAAM,OAAO,GAAG,IAAI,GAAG,CACrB,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CACjD,CAAA;AAED,0FAA0F;AAC1F,MAAM,UAAU,eAAe,CAAC,SAAiB;IAC/C,OAAO,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;AAC/B,CAAC;AAED,iFAAiF;AACjF,MAAM,UAAU,kBAAkB,CAAC,IAA0B;IAC3D,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY,IAAI,eAAe,EAAE,CAAC;QAC9D,MAAM,SAAS,GAAG,eAAe,CAAC,IAAI,CAAC,YAA4C,CAAC,CAAA;QACpF,OAAO,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,kBAAkB,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,CAAC,OAAO,CAAC,EAAE,CAAA;IAC7F,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,kBAAkB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,EAAE,CAAA;AAChG,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,GAAW;IACvC,OAAO,mBAAmB,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACtC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAChD,OAAO;YACL,EAAE,EAAE,YAAY,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,SAAS,EAAE;YACrD,SAAS,EAAE,YAAY,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,SAAS,EAAE;YAC5D,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,IAAI,EAAE,KAAK;YACX,MAAM,EAAE,UAAU;YAClB,UAAU,EAAE,IAAI;YAChB,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,OAAO,EAAE,CAAC;YACV,QAAQ,EAAE,IAAI;YACd,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,GAAG;YACd,SAAS,EAAE,IAAI;YACf,UAAU,EAAE,IAAI;SACjB,CAAA;IACH,CAAC,CAAC,CAAA;AACJ,CAAC"}
@@ -0,0 +1,41 @@
1
+ import type { CreateSandboxExperimentInput, SandboxFixture } from '@cat-factory/contracts';
2
+ export { BUILTIN_SANDBOX_FIXTURES, builtinFixturesFor, builtinFixture, toSandboxFixture, type SandboxFixtureDefinition, type SandboxFixtureDifficulty, } from '@cat-factory/sandbox-fixtures';
3
+ /**
4
+ * The default-loaded builtin fixtures as wire `SandboxFixture`s (the runtime seeds these
5
+ * when a workspace has no custom fixtures yet). `now` stamps `createdAt`.
6
+ */
7
+ export declare function listBuiltinFixtures(now: number): SandboxFixture[];
8
+ /** The synthetic baseline prompt-version id for a catalog agent kind (matches `listBaselines`). */
9
+ export declare function baselineVersionId(agentKind: string): string;
10
+ export interface SuggestExperimentInput {
11
+ /** The agent kind every cell exercises (a Sandbox catalog kind). */
12
+ agentKind: string;
13
+ /** Model catalog ids to test (the user's selection — e.g. `anthropic:claude-opus-4-8`). */
14
+ models: string[];
15
+ /** Fixture ids to run against (one or more). */
16
+ fixtureIds: string[];
17
+ /**
18
+ * Prompt-version ids to test. Defaults to just the shipped baseline for the agent, so the
19
+ * suggestion answers "which model is best?" out of the box; pass candidate lineage ids to
20
+ * also answer "does a better prompt help?".
21
+ */
22
+ promptVersionIds?: string[];
23
+ /** Judge model catalog id; omit to let the API default it (latest Claude). */
24
+ judgeModel?: string;
25
+ /** Repeats per cell (variance); defaults to 1. */
26
+ repeats?: number;
27
+ /** Experiment name; defaults to a label derived from the agent. */
28
+ name?: string;
29
+ /** Optional hard token budget for the whole experiment. */
30
+ budgetTokens?: number | null;
31
+ }
32
+ /**
33
+ * Build a ready-to-create experiment for "run these selected models and prompts against
34
+ * these selected fixtures, mapped to this selected agent". Pure: it assembles a
35
+ * {@link CreateSandboxExperimentInput} (the matrix is the cartesian product of prompt
36
+ * versions × models × fixtures) without dispatching anything — the caller POSTs it to the
37
+ * experiments API. Throws on an empty model/fixture selection so a non-runnable suggestion
38
+ * can't be created.
39
+ */
40
+ export declare function suggestExperiment(input: SuggestExperimentInput): CreateSandboxExperimentInput;
41
+ //# sourceMappingURL=fixtures.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fixtures.d.ts","sourceRoot":"","sources":["../src/fixtures.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,4BAA4B,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAA;AAS1F,OAAO,EACL,wBAAwB,EACxB,kBAAkB,EAClB,cAAc,EACd,gBAAgB,EAChB,KAAK,wBAAwB,EAC7B,KAAK,wBAAwB,GAC9B,MAAM,+BAA+B,CAAA;AAEtC;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,EAAE,CAEjE;AAED,mGAAmG;AACnG,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAG3D;AAED,MAAM,WAAW,sBAAsB;IACrC,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;IACjB,2FAA2F;IAC3F,MAAM,EAAE,MAAM,EAAE,CAAA;IAChB,gDAAgD;IAChD,UAAU,EAAE,MAAM,EAAE,CAAA;IACpB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,8EAA8E;IAC9E,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,kDAAkD;IAClD,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,mEAAmE;IACnE,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,2DAA2D;IAC3D,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAC7B;AAED;;;;;;;GAOG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,sBAAsB,GAAG,4BAA4B,CAwB7F"}
@@ -0,0 +1,50 @@
1
+ import { BUILTIN_SANDBOX_FIXTURES, toSandboxFixture } from '@cat-factory/sandbox-fixtures';
2
+ import { sandboxKindMeta } from './baselines.js';
3
+ // The Sandbox loads its builtin fixtures from the workspace `@cat-factory/sandbox-fixtures`
4
+ // package by default — that package is the single source of truth for the hand-authored,
5
+ // graded no-repo fixtures. Re-exported here so every consumer imports them (and the
6
+ // experiment-suggestion helper) from `@cat-factory/sandbox`.
7
+ export { BUILTIN_SANDBOX_FIXTURES, builtinFixturesFor, builtinFixture, toSandboxFixture, } from '@cat-factory/sandbox-fixtures';
8
+ /**
9
+ * The default-loaded builtin fixtures as wire `SandboxFixture`s (the runtime seeds these
10
+ * when a workspace has no custom fixtures yet). `now` stamps `createdAt`.
11
+ */
12
+ export function listBuiltinFixtures(now) {
13
+ return BUILTIN_SANDBOX_FIXTURES.map((def) => toSandboxFixture(def, now));
14
+ }
15
+ /** The synthetic baseline prompt-version id for a catalog agent kind (matches `listBaselines`). */
16
+ export function baselineVersionId(agentKind) {
17
+ const meta = sandboxKindMeta(agentKind);
18
+ return `baseline:${meta?.basePromptId ?? agentKind}`;
19
+ }
20
+ /**
21
+ * Build a ready-to-create experiment for "run these selected models and prompts against
22
+ * these selected fixtures, mapped to this selected agent". Pure: it assembles a
23
+ * {@link CreateSandboxExperimentInput} (the matrix is the cartesian product of prompt
24
+ * versions × models × fixtures) without dispatching anything — the caller POSTs it to the
25
+ * experiments API. Throws on an empty model/fixture selection so a non-runnable suggestion
26
+ * can't be created.
27
+ */
28
+ export function suggestExperiment(input) {
29
+ if (input.models.length === 0)
30
+ throw new Error('suggestExperiment: at least one model is required');
31
+ if (input.fixtureIds.length === 0)
32
+ throw new Error('suggestExperiment: at least one fixture is required');
33
+ const meta = sandboxKindMeta(input.agentKind);
34
+ const promptVersionIds = input.promptVersionIds && input.promptVersionIds.length > 0
35
+ ? input.promptVersionIds
36
+ : [baselineVersionId(input.agentKind)];
37
+ return {
38
+ name: input.name ?? `${meta?.label ?? input.agentKind} — sandbox run`,
39
+ agentKind: input.agentKind,
40
+ matrix: {
41
+ promptVersionIds,
42
+ models: input.models,
43
+ fixtureIds: input.fixtureIds,
44
+ },
45
+ ...(input.judgeModel ? { judgeModel: input.judgeModel } : {}),
46
+ repeats: input.repeats ?? 1,
47
+ budgetTokens: input.budgetTokens ?? null,
48
+ };
49
+ }
50
+ //# sourceMappingURL=fixtures.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fixtures.js","sourceRoot":"","sources":["../src/fixtures.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAA;AAC1F,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAEhD,4FAA4F;AAC5F,yFAAyF;AACzF,oFAAoF;AACpF,6DAA6D;AAE7D,OAAO,EACL,wBAAwB,EACxB,kBAAkB,EAClB,cAAc,EACd,gBAAgB,GAGjB,MAAM,+BAA+B,CAAA;AAEtC;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CAAC,GAAW;IAC7C,OAAO,wBAAwB,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,gBAAgB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAA;AAC1E,CAAC;AAED,mGAAmG;AACnG,MAAM,UAAU,iBAAiB,CAAC,SAAiB;IACjD,MAAM,IAAI,GAAG,eAAe,CAAC,SAAS,CAAC,CAAA;IACvC,OAAO,YAAY,IAAI,EAAE,YAAY,IAAI,SAAS,EAAE,CAAA;AACtD,CAAC;AAyBD;;;;;;;GAOG;AACH,MAAM,UAAU,iBAAiB,CAAC,KAA6B;IAC7D,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAA;IACtE,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,qDAAqD,CAAC,CAAA;IAExE,MAAM,IAAI,GAAG,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;IAC7C,MAAM,gBAAgB,GACpB,KAAK,CAAC,gBAAgB,IAAI,KAAK,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC;QACzD,CAAC,CAAC,KAAK,CAAC,gBAAgB;QACxB,CAAC,CAAC,CAAC,iBAAiB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAA;IAE1C,OAAO;QACL,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,GAAG,IAAI,EAAE,KAAK,IAAI,KAAK,CAAC,SAAS,gBAAgB;QACrE,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,MAAM,EAAE;YACN,gBAAgB;YAChB,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,UAAU,EAAE,KAAK,CAAC,UAAU;SAC7B;QACD,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,CAAC;QAC3B,YAAY,EAAE,KAAK,CAAC,YAAY,IAAI,IAAI;KACzC,CAAA;AACH,CAAC"}
@@ -0,0 +1,6 @@
1
+ export { type SandboxTaskType, type Rubric, type RubricDimension, type ExpectationScore, HIGH_IMPACT_THRESHOLD, TRICKY_THRESHOLD, rubricFor, weightedTotal, scoreExpectations, renderExpectationBrief, } from './rubrics.js';
2
+ export { type SandboxAgentBucket, type SandboxAgentKindMeta, SANDBOX_AGENT_KINDS, sandboxKindMeta, baselinePromptText, listBaselines, } from './baselines.js';
3
+ export { type NewVersionFields, firstVersionFromBaseline, nextVersion, versionLabel, filterByLabels, } from './promptVersions.logic.js';
4
+ export { type ExpandDeps, cellCount, expandMatrix, isRunnableMatrix } from './matrix.logic.js';
5
+ export { type SuggestExperimentInput, type SandboxFixtureDefinition, type SandboxFixtureDifficulty, BUILTIN_SANDBOX_FIXTURES, builtinFixturesFor, builtinFixture, toSandboxFixture, listBuiltinFixtures, baselineVersionId, suggestExperiment, } from './fixtures.js';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAMA,OAAO,EACL,KAAK,eAAe,EACpB,KAAK,MAAM,EACX,KAAK,eAAe,EACpB,KAAK,gBAAgB,EACrB,qBAAqB,EACrB,gBAAgB,EAChB,SAAS,EACT,aAAa,EACb,iBAAiB,EACjB,sBAAsB,GACvB,MAAM,cAAc,CAAA;AAErB,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,oBAAoB,EACzB,mBAAmB,EACnB,eAAe,EACf,kBAAkB,EAClB,aAAa,GACd,MAAM,gBAAgB,CAAA;AAEvB,OAAO,EACL,KAAK,gBAAgB,EACrB,wBAAwB,EACxB,WAAW,EACX,YAAY,EACZ,cAAc,GACf,MAAM,2BAA2B,CAAA;AAElC,OAAO,EAAE,KAAK,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAA;AAE9F,OAAO,EACL,KAAK,sBAAsB,EAC3B,KAAK,wBAAwB,EAC7B,KAAK,wBAAwB,EAC7B,wBAAwB,EACxB,kBAAkB,EAClB,cAAc,EACd,gBAAgB,EAChB,mBAAmB,EACnB,iBAAiB,EACjB,iBAAiB,GAClB,MAAM,eAAe,CAAA"}
package/dist/index.js ADDED
@@ -0,0 +1,11 @@
1
+ // @cat-factory/sandbox — the parallel prompt/model testing surface. This package is
2
+ // deliberately isolated: it depends on kernel (ports), contracts (wire types) and
3
+ // agents (baseline prompts), and nothing in the core product depends on it, so the
4
+ // whole feature can be lifted out later. This entry re-exports the pure domain logic;
5
+ // the run driver + judge service (which consume the executor seams) build on top.
6
+ export { HIGH_IMPACT_THRESHOLD, TRICKY_THRESHOLD, rubricFor, weightedTotal, scoreExpectations, renderExpectationBrief, } from './rubrics.js';
7
+ export { SANDBOX_AGENT_KINDS, sandboxKindMeta, baselinePromptText, listBaselines, } from './baselines.js';
8
+ export { firstVersionFromBaseline, nextVersion, versionLabel, filterByLabels, } from './promptVersions.logic.js';
9
+ export { cellCount, expandMatrix, isRunnableMatrix } from './matrix.logic.js';
10
+ export { BUILTIN_SANDBOX_FIXTURES, builtinFixturesFor, builtinFixture, toSandboxFixture, listBuiltinFixtures, baselineVersionId, suggestExperiment, } from './fixtures.js';
11
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,oFAAoF;AACpF,kFAAkF;AAClF,mFAAmF;AACnF,sFAAsF;AACtF,kFAAkF;AAElF,OAAO,EAKL,qBAAqB,EACrB,gBAAgB,EAChB,SAAS,EACT,aAAa,EACb,iBAAiB,EACjB,sBAAsB,GACvB,MAAM,cAAc,CAAA;AAErB,OAAO,EAGL,mBAAmB,EACnB,eAAe,EACf,kBAAkB,EAClB,aAAa,GACd,MAAM,gBAAgB,CAAA;AAEvB,OAAO,EAEL,wBAAwB,EACxB,WAAW,EACX,YAAY,EACZ,cAAc,GACf,MAAM,2BAA2B,CAAA;AAElC,OAAO,EAAmB,SAAS,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAA;AAE9F,OAAO,EAIL,wBAAwB,EACxB,kBAAkB,EAClB,cAAc,EACd,gBAAgB,EAChB,mBAAmB,EACnB,iBAAiB,EACjB,iBAAiB,GAClB,MAAM,eAAe,CAAA"}
@@ -0,0 +1,19 @@
1
+ import type { SandboxExperiment, SandboxMatrix, SandboxRun } from '@cat-factory/kernel';
2
+ /** The number of cells a matrix expands to (for the pre-launch cost estimate). */
3
+ export declare function cellCount(matrix: SandboxMatrix, repeats: number): number;
4
+ export interface ExpandDeps {
5
+ /** Mint a unique run id; called once per cell (pass index so callers can vary it). */
6
+ makeId: (index: number) => string;
7
+ /** The frozen `name@vN` label for a prompt version id (resolved by the service). */
8
+ labelFor: (promptVersionId: string) => string;
9
+ now: number;
10
+ }
11
+ /**
12
+ * Expand an experiment into queued {@link SandboxRun} cells. The product (prompt ×
13
+ * model × fixture × repeat) is emitted in a stable order (prompt-major) so a results
14
+ * grid renders consistently. Each cell starts `queued` with all outcome fields null.
15
+ */
16
+ export declare function expandMatrix(experiment: Pick<SandboxExperiment, 'id' | 'matrix' | 'repeats'>, deps: ExpandDeps): SandboxRun[];
17
+ /** A non-empty matrix references at least one of each axis. */
18
+ export declare function isRunnableMatrix(matrix: SandboxMatrix): boolean;
19
+ //# sourceMappingURL=matrix.logic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"matrix.logic.d.ts","sourceRoot":"","sources":["../src/matrix.logic.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAA;AAOvF,kFAAkF;AAClF,wBAAgB,SAAS,CAAC,MAAM,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAExE;AAED,MAAM,WAAW,UAAU;IACzB,sFAAsF;IACtF,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,MAAM,CAAA;IACjC,oFAAoF;IACpF,QAAQ,EAAE,CAAC,eAAe,EAAE,MAAM,KAAK,MAAM,CAAA;IAC7C,GAAG,EAAE,MAAM,CAAA;CACZ;AAED;;;;GAIG;AACH,wBAAgB,YAAY,CAC1B,UAAU,EAAE,IAAI,CAAC,iBAAiB,EAAE,IAAI,GAAG,QAAQ,GAAG,SAAS,CAAC,EAChE,IAAI,EAAE,UAAU,GACf,UAAU,EAAE,CAkCd;AAED,+DAA+D;AAC/D,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,aAAa,GAAG,OAAO,CAI/D"}
@@ -0,0 +1,53 @@
1
+ // Pure expansion of an experiment's matrix into individual run cells. One cell per
2
+ // (prompt version × model × fixture × repeat). The durable fan-out driver consumes
3
+ // these queued skeletons; everything time/identity-dependent is injected so the
4
+ // expansion is deterministic and unit-testable.
5
+ /** The number of cells a matrix expands to (for the pre-launch cost estimate). */
6
+ export function cellCount(matrix, repeats) {
7
+ return matrix.promptVersionIds.length * matrix.models.length * matrix.fixtureIds.length * repeats;
8
+ }
9
+ /**
10
+ * Expand an experiment into queued {@link SandboxRun} cells. The product (prompt ×
11
+ * model × fixture × repeat) is emitted in a stable order (prompt-major) so a results
12
+ * grid renders consistently. Each cell starts `queued` with all outcome fields null.
13
+ */
14
+ export function expandMatrix(experiment, deps) {
15
+ const { promptVersionIds, models, fixtureIds } = experiment.matrix;
16
+ const runs = [];
17
+ let index = 0;
18
+ for (const promptVersionId of promptVersionIds) {
19
+ for (const model of models) {
20
+ for (const fixtureId of fixtureIds) {
21
+ for (let repeatIndex = 0; repeatIndex < experiment.repeats; repeatIndex++) {
22
+ runs.push({
23
+ id: deps.makeId(index),
24
+ experimentId: experiment.id,
25
+ promptVersionId,
26
+ model,
27
+ fixtureId,
28
+ repeatIndex,
29
+ status: 'queued',
30
+ outputText: null,
31
+ usage: null,
32
+ latencyMs: null,
33
+ branch: null,
34
+ prUrl: null,
35
+ diff: null,
36
+ error: null,
37
+ seedSha: null,
38
+ promptLabel: deps.labelFor(promptVersionId),
39
+ startedAt: null,
40
+ finishedAt: null,
41
+ });
42
+ index++;
43
+ }
44
+ }
45
+ }
46
+ }
47
+ return runs;
48
+ }
49
+ /** A non-empty matrix references at least one of each axis. */
50
+ export function isRunnableMatrix(matrix) {
51
+ return (matrix.promptVersionIds.length > 0 && matrix.models.length > 0 && matrix.fixtureIds.length > 0);
52
+ }
53
+ //# sourceMappingURL=matrix.logic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"matrix.logic.js","sourceRoot":"","sources":["../src/matrix.logic.ts"],"names":[],"mappings":"AAEA,mFAAmF;AACnF,mFAAmF;AACnF,gFAAgF;AAChF,gDAAgD;AAEhD,kFAAkF;AAClF,MAAM,UAAU,SAAS,CAAC,MAAqB,EAAE,OAAe;IAC9D,OAAO,MAAM,CAAC,gBAAgB,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,OAAO,CAAA;AACnG,CAAC;AAUD;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAC1B,UAAgE,EAChE,IAAgB;IAEhB,MAAM,EAAE,gBAAgB,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,UAAU,CAAC,MAAM,CAAA;IAClE,MAAM,IAAI,GAAiB,EAAE,CAAA;IAC7B,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,KAAK,MAAM,eAAe,IAAI,gBAAgB,EAAE,CAAC;QAC/C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;gBACnC,KAAK,IAAI,WAAW,GAAG,CAAC,EAAE,WAAW,GAAG,UAAU,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,CAAC;oBAC1E,IAAI,CAAC,IAAI,CAAC;wBACR,EAAE,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC;wBACtB,YAAY,EAAE,UAAU,CAAC,EAAE;wBAC3B,eAAe;wBACf,KAAK;wBACL,SAAS;wBACT,WAAW;wBACX,MAAM,EAAE,QAAQ;wBAChB,UAAU,EAAE,IAAI;wBAChB,KAAK,EAAE,IAAI;wBACX,SAAS,EAAE,IAAI;wBACf,MAAM,EAAE,IAAI;wBACZ,KAAK,EAAE,IAAI;wBACX,IAAI,EAAE,IAAI;wBACV,KAAK,EAAE,IAAI;wBACX,OAAO,EAAE,IAAI;wBACb,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC;wBAC3C,SAAS,EAAE,IAAI;wBACf,UAAU,EAAE,IAAI;qBACjB,CAAC,CAAA;oBACF,KAAK,EAAE,CAAA;gBACT,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,+DAA+D;AAC/D,MAAM,UAAU,gBAAgB,CAAC,MAAqB;IACpD,OAAO,CACL,MAAM,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAC/F,CAAA;AACH,CAAC"}
@@ -0,0 +1,17 @@
1
+ import type { SandboxPromptVersion } from '@cat-factory/kernel';
2
+ export interface NewVersionFields {
3
+ /** The id to assign the new version row. */
4
+ id: string;
5
+ createdAt: number;
6
+ createdBy: string | null;
7
+ labels?: string[];
8
+ }
9
+ /** Clone a baseline (or any version's text) into a fresh candidate lineage at version 1. */
10
+ export declare function firstVersionFromBaseline(source: Pick<SandboxPromptVersion, 'agentKind' | 'systemText' | 'basePromptId'>, name: string, fields: NewVersionFields): SandboxPromptVersion;
11
+ /** Append a new version onto an existing lineage from an edited system prompt. */
12
+ export declare function nextVersion(parent: SandboxPromptVersion, systemText: string, fields: NewVersionFields): SandboxPromptVersion;
13
+ /** The canonical `name@vN` label for a stored version (frozen onto each run). */
14
+ export declare function versionLabel(version: SandboxPromptVersion): string;
15
+ /** Filter versions to those carrying every one of the given labels (AND semantics). */
16
+ export declare function filterByLabels(versions: SandboxPromptVersion[], labels: string[]): SandboxPromptVersion[];
17
+ //# sourceMappingURL=promptVersions.logic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"promptVersions.logic.d.ts","sourceRoot":"","sources":["../src/promptVersions.logic.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAA;AAQ/D,MAAM,WAAW,gBAAgB;IAC/B,4CAA4C;IAC5C,EAAE,EAAE,MAAM,CAAA;IACV,SAAS,EAAE,MAAM,CAAA;IACjB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAA;IACxB,MAAM,CAAC,EAAE,MAAM,EAAE,CAAA;CAClB;AAED,4FAA4F;AAC5F,wBAAgB,wBAAwB,CACtC,MAAM,EAAE,IAAI,CAAC,oBAAoB,EAAE,WAAW,GAAG,YAAY,GAAG,cAAc,CAAC,EAC/E,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,gBAAgB,GACvB,oBAAoB,CAgBtB;AAED,kFAAkF;AAClF,wBAAgB,WAAW,CACzB,MAAM,EAAE,oBAAoB,EAC5B,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,gBAAgB,GACvB,oBAAoB,CAgBtB;AAED,iFAAiF;AACjF,wBAAgB,YAAY,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAElE;AAED,uFAAuF;AACvF,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,oBAAoB,EAAE,EAChC,MAAM,EAAE,MAAM,EAAE,GACf,oBAAoB,EAAE,CAOxB"}
@@ -0,0 +1,51 @@
1
+ /** Clone a baseline (or any version's text) into a fresh candidate lineage at version 1. */
2
+ export function firstVersionFromBaseline(source, name, fields) {
3
+ return {
4
+ id: fields.id,
5
+ lineageId: fields.id, // v1 roots its own lineage
6
+ agentKind: source.agentKind,
7
+ name,
8
+ origin: 'candidate',
9
+ systemText: source.systemText,
10
+ basePromptId: source.basePromptId,
11
+ version: 1,
12
+ parentId: null,
13
+ labels: fields.labels ?? [],
14
+ createdAt: fields.createdAt,
15
+ createdBy: fields.createdBy,
16
+ archivedAt: null,
17
+ };
18
+ }
19
+ /** Append a new version onto an existing lineage from an edited system prompt. */
20
+ export function nextVersion(parent, systemText, fields) {
21
+ return {
22
+ id: fields.id,
23
+ lineageId: parent.lineageId,
24
+ agentKind: parent.agentKind,
25
+ name: parent.name,
26
+ origin: 'candidate',
27
+ systemText,
28
+ basePromptId: parent.basePromptId,
29
+ version: parent.version + 1,
30
+ parentId: parent.id,
31
+ labels: fields.labels ?? [],
32
+ createdAt: fields.createdAt,
33
+ createdBy: fields.createdBy,
34
+ archivedAt: null,
35
+ };
36
+ }
37
+ /** The canonical `name@vN` label for a stored version (frozen onto each run). */
38
+ export function versionLabel(version) {
39
+ return `${version.name}@v${version.version}`;
40
+ }
41
+ /** Filter versions to those carrying every one of the given labels (AND semantics). */
42
+ export function filterByLabels(versions, labels) {
43
+ if (labels.length === 0)
44
+ return versions;
45
+ const wanted = labels.map((l) => l.trim().toLowerCase()).filter(Boolean);
46
+ return versions.filter((vsn) => {
47
+ const have = new Set(vsn.labels.map((l) => l.toLowerCase()));
48
+ return wanted.every((w) => have.has(w));
49
+ });
50
+ }
51
+ //# sourceMappingURL=promptVersions.logic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"promptVersions.logic.js","sourceRoot":"","sources":["../src/promptVersions.logic.ts"],"names":[],"mappings":"AAgBA,4FAA4F;AAC5F,MAAM,UAAU,wBAAwB,CACtC,MAA+E,EAC/E,IAAY,EACZ,MAAwB;IAExB,OAAO;QACL,EAAE,EAAE,MAAM,CAAC,EAAE;QACb,SAAS,EAAE,MAAM,CAAC,EAAE,EAAE,2BAA2B;QACjD,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,IAAI;QACJ,MAAM,EAAE,WAAW;QACnB,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,YAAY,EAAE,MAAM,CAAC,YAAY;QACjC,OAAO,EAAE,CAAC;QACV,QAAQ,EAAE,IAAI;QACd,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,EAAE;QAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,UAAU,EAAE,IAAI;KACjB,CAAA;AACH,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,WAAW,CACzB,MAA4B,EAC5B,UAAkB,EAClB,MAAwB;IAExB,OAAO;QACL,EAAE,EAAE,MAAM,CAAC,EAAE;QACb,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,MAAM,EAAE,WAAW;QACnB,UAAU;QACV,YAAY,EAAE,MAAM,CAAC,YAAY;QACjC,OAAO,EAAE,MAAM,CAAC,OAAO,GAAG,CAAC;QAC3B,QAAQ,EAAE,MAAM,CAAC,EAAE;QACnB,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,EAAE;QAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,UAAU,EAAE,IAAI;KACjB,CAAA;AACH,CAAC;AAED,iFAAiF;AACjF,MAAM,UAAU,YAAY,CAAC,OAA6B;IACxD,OAAO,GAAG,OAAO,CAAC,IAAI,KAAK,OAAO,CAAC,OAAO,EAAE,CAAA;AAC9C,CAAC;AAED,uFAAuF;AACvF,MAAM,UAAU,cAAc,CAC5B,QAAgC,EAChC,MAAgB;IAEhB,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,QAAQ,CAAA;IACxC,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;IACxE,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE;QAC7B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAA;QAC5D,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;IACzC,CAAC,CAAC,CAAA;AACJ,CAAC"}
@@ -0,0 +1,63 @@
1
+ import type { SandboxExpectation } from '@cat-factory/contracts';
2
+ /** The grading task a Sandbox agent kind maps to (drives which rubric is used). */
3
+ export type SandboxTaskType = 'requirement-review' | 'code-review' | 'implementation';
4
+ export interface RubricDimension {
5
+ key: string;
6
+ label: string;
7
+ description: string;
8
+ weight: number;
9
+ }
10
+ export interface Rubric {
11
+ task: SandboxTaskType;
12
+ dimensions: RubricDimension[];
13
+ }
14
+ export declare function rubricFor(task: SandboxTaskType): Rubric;
15
+ /** Weighted mean of dimension scores (1–5), using the rubric weights. */
16
+ export declare function weightedTotal(task: SandboxTaskType, scores: {
17
+ key: string;
18
+ score: number;
19
+ }[]): number;
20
+ /** An expectation is "high-impact" (a serious miss) at or above this impact rating. */
21
+ export declare const HIGH_IMPACT_THRESHOLD = 4;
22
+ /** An expectation is "tricky" (its catch earns the wow bonus) at or above this rating. */
23
+ export declare const TRICKY_THRESHOLD = 4;
24
+ export interface ExpectationScore {
25
+ /** Expectations the candidate output surfaced. */
26
+ caught: SandboxExpectation[];
27
+ /** Expectations the candidate output missed. */
28
+ missed: SandboxExpectation[];
29
+ /**
30
+ * Impact-weighted recall in [0,1]: `1 − Σ(impact of missed) / Σ(impact of all)`. Missing
31
+ * a high-impact item moves this far more than missing a low-impact one — the asymmetry the
32
+ * fixtures are graded on. 1 when there are no expectations.
33
+ */
34
+ impactRecall: number;
35
+ /**
36
+ * Trickiness-weighted "wow" bonus in [0,1]: `Σ(trickiness of caught tricky items) /
37
+ * Σ(trickiness of all tricky items)`. Only the genuinely tricky items (trickiness ≥
38
+ * {@link TRICKY_THRESHOLD}) contribute, so catching a hard-to-spot finding is rewarded
39
+ * while missing one is not penalized here (impact handles penalties). 1 when nothing is
40
+ * tricky (no wow on offer).
41
+ */
42
+ wowBonus: number;
43
+ /** Ids of missed expectations with impact ≥ {@link HIGH_IMPACT_THRESHOLD}. */
44
+ missedHighImpact: string[];
45
+ }
46
+ /**
47
+ * Deterministic, asymmetric objective score for `findings` fixtures. An expectation is
48
+ * "caught" when any of its `matchHints` (defaulting to its `summary`) appears in the
49
+ * candidate output as a contiguous run of word tokens — case/whitespace/punctuation
50
+ * insensitive, so `reset logic` does not match inside `preset logic`. Recorded ALONGSIDE
51
+ * the judge grade (never blended in); it intentionally does not penalize extra findings
52
+ * (that is the judge's `false_positives` dimension). The two signals are deliberately
53
+ * different: `impactRecall` punishes missing what matters, `wowBonus` rewards catching what
54
+ * is hard to spot. See {@link SandboxExpectation}.
55
+ */
56
+ export declare function scoreExpectations(expectations: readonly SandboxExpectation[], output: string): ExpectationScore;
57
+ /**
58
+ * Render the graded expectations into a Markdown section to append to the judge prompt —
59
+ * "what the judge should expect to see", with the scoring guidance the asymmetry implies.
60
+ * Returns an empty string when there are no expectations (an un-graded fixture).
61
+ */
62
+ export declare function renderExpectationBrief(expectations: readonly SandboxExpectation[]): string;
63
+ //# sourceMappingURL=rubrics.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rubrics.d.ts","sourceRoot":"","sources":["../src/rubrics.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAA;AAQhE,mFAAmF;AACnF,MAAM,MAAM,eAAe,GAAG,oBAAoB,GAAG,aAAa,GAAG,gBAAgB,CAAA;AAErF,MAAM,WAAW,eAAe;IAC9B,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,MAAM,CAAA;IACb,WAAW,EAAE,MAAM,CAAA;IACnB,MAAM,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,MAAM;IACrB,IAAI,EAAE,eAAe,CAAA;IACrB,UAAU,EAAE,eAAe,EAAE,CAAA;CAC9B;AA6GD,wBAAgB,SAAS,CAAC,IAAI,EAAE,eAAe,GAAG,MAAM,CAEvD;AAED,yEAAyE;AACzE,wBAAgB,aAAa,CAC3B,IAAI,EAAE,eAAe,EACrB,MAAM,EAAE;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EAAE,GACvC,MAAM,CAYR;AAED,uFAAuF;AACvF,eAAO,MAAM,qBAAqB,IAAI,CAAA;AACtC,0FAA0F;AAC1F,eAAO,MAAM,gBAAgB,IAAI,CAAA;AAEjC,MAAM,WAAW,gBAAgB;IAC/B,kDAAkD;IAClD,MAAM,EAAE,kBAAkB,EAAE,CAAA;IAC5B,gDAAgD;IAChD,MAAM,EAAE,kBAAkB,EAAE,CAAA;IAC5B;;;;OAIG;IACH,YAAY,EAAE,MAAM,CAAA;IACpB;;;;;;OAMG;IACH,QAAQ,EAAE,MAAM,CAAA;IAChB,8EAA8E;IAC9E,gBAAgB,EAAE,MAAM,EAAE,CAAA;CAC3B;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAC/B,YAAY,EAAE,SAAS,kBAAkB,EAAE,EAC3C,MAAM,EAAE,MAAM,GACb,gBAAgB,CA2BlB;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,YAAY,EAAE,SAAS,kBAAkB,EAAE,GAAG,MAAM,CAgB1F"}
@@ -0,0 +1,207 @@
1
+ const REQUIREMENT_REVIEW = [
2
+ {
3
+ key: 'gap_coverage',
4
+ label: 'Gap coverage',
5
+ description: 'Surfaces the genuine gaps, ambiguities and risks that would block confident implementation.',
6
+ weight: 3,
7
+ },
8
+ {
9
+ key: 'specificity',
10
+ label: 'Specificity & actionability',
11
+ description: 'Each item is concrete and phrased so a product owner can answer it directly.',
12
+ weight: 2,
13
+ },
14
+ {
15
+ key: 'no_hallucination',
16
+ label: 'No invented requirements',
17
+ description: 'Does not fabricate requirements or answers; raises questions instead of guessing.',
18
+ weight: 3,
19
+ },
20
+ {
21
+ key: 'severity_calibration',
22
+ label: 'Severity calibration',
23
+ description: 'Severity/category labels are sensible and ordered high-impact first.',
24
+ weight: 1,
25
+ },
26
+ {
27
+ key: 'signal_noise',
28
+ label: 'Signal vs noise',
29
+ description: 'Avoids trivial or duplicate items; volume matches the actual ambiguity.',
30
+ weight: 1,
31
+ },
32
+ ];
33
+ const CODE_REVIEW = [
34
+ {
35
+ key: 'issue_detection',
36
+ label: 'Real-issue detection',
37
+ description: 'Finds the genuine correctness, security and edge-case problems in the work.',
38
+ weight: 3,
39
+ },
40
+ {
41
+ key: 'correctness',
42
+ label: 'Correctness of findings',
43
+ description: 'Findings are technically accurate and the proposed fixes are sound.',
44
+ weight: 3,
45
+ },
46
+ {
47
+ key: 'severity_order',
48
+ label: 'Severity ordering',
49
+ description: 'Orders findings blocker → nit and separates must-fix from optional.',
50
+ weight: 1,
51
+ },
52
+ {
53
+ key: 'actionability',
54
+ label: 'Actionability',
55
+ description: 'References the specific code each finding concerns; fixes are concrete.',
56
+ weight: 2,
57
+ },
58
+ {
59
+ key: 'false_positives',
60
+ label: 'Few false positives',
61
+ description: 'Does not invent problems; acknowledges sound code rather than nit-picking.',
62
+ weight: 2,
63
+ },
64
+ ];
65
+ const IMPLEMENTATION = [
66
+ {
67
+ key: 'faithfulness',
68
+ label: 'Design faithfulness',
69
+ description: 'Implements the agreed design and resolved decisions without silent redesign.',
70
+ weight: 3,
71
+ },
72
+ {
73
+ key: 'correctness',
74
+ label: 'Correctness',
75
+ description: 'The diff is correct, handles errors/edge cases, and would plausibly pass CI.',
76
+ weight: 3,
77
+ },
78
+ {
79
+ key: 'completeness',
80
+ label: 'Completeness',
81
+ description: 'Covers the requested scope; no obvious missing pieces or stubs left behind.',
82
+ weight: 2,
83
+ },
84
+ {
85
+ key: 'scope_discipline',
86
+ label: 'Scope discipline',
87
+ description: 'Stays within scope; no speculative abstraction or unrelated churn.',
88
+ weight: 1,
89
+ },
90
+ {
91
+ key: 'code_quality',
92
+ label: 'Code quality',
93
+ description: 'Cohesive, readable, idiomatic to the surrounding codebase.',
94
+ weight: 1,
95
+ },
96
+ ];
97
+ const RUBRICS = {
98
+ 'requirement-review': REQUIREMENT_REVIEW,
99
+ 'code-review': CODE_REVIEW,
100
+ implementation: IMPLEMENTATION,
101
+ };
102
+ export function rubricFor(task) {
103
+ return { task, dimensions: RUBRICS[task] };
104
+ }
105
+ /** Weighted mean of dimension scores (1–5), using the rubric weights. */
106
+ export function weightedTotal(task, scores) {
107
+ const dims = RUBRICS[task];
108
+ let sum = 0;
109
+ let weight = 0;
110
+ for (const dim of dims) {
111
+ const score = scores.find((s) => s.key === dim.key)?.score;
112
+ if (typeof score === 'number') {
113
+ sum += score * dim.weight;
114
+ weight += dim.weight;
115
+ }
116
+ }
117
+ return weight === 0 ? 0 : Math.round((sum / weight) * 100) / 100;
118
+ }
119
+ /** An expectation is "high-impact" (a serious miss) at or above this impact rating. */
120
+ export const HIGH_IMPACT_THRESHOLD = 4;
121
+ /** An expectation is "tricky" (its catch earns the wow bonus) at or above this rating. */
122
+ export const TRICKY_THRESHOLD = 4;
123
+ /**
124
+ * Deterministic, asymmetric objective score for `findings` fixtures. An expectation is
125
+ * "caught" when any of its `matchHints` (defaulting to its `summary`) appears in the
126
+ * candidate output as a contiguous run of word tokens — case/whitespace/punctuation
127
+ * insensitive, so `reset logic` does not match inside `preset logic`. Recorded ALONGSIDE
128
+ * the judge grade (never blended in); it intentionally does not penalize extra findings
129
+ * (that is the judge's `false_positives` dimension). The two signals are deliberately
130
+ * different: `impactRecall` punishes missing what matters, `wowBonus` rewards catching what
131
+ * is hard to spot. See {@link SandboxExpectation}.
132
+ */
133
+ export function scoreExpectations(expectations, output) {
134
+ const haystack = tokenize(output);
135
+ const caught = [];
136
+ const missed = [];
137
+ for (const expectation of expectations) {
138
+ const hints = expectation.matchHints.length > 0 ? expectation.matchHints : [expectation.summary];
139
+ const hit = hints.some((hint) => {
140
+ const needle = tokenize(hint);
141
+ return needle.length > 0 && containsSequence(haystack, needle);
142
+ });
143
+ (hit ? caught : missed).push(expectation);
144
+ }
145
+ const totalImpact = expectations.reduce((sum, e) => sum + e.impact, 0);
146
+ const missedImpact = missed.reduce((sum, e) => sum + e.impact, 0);
147
+ const impactRecall = totalImpact === 0 ? 1 : round2(1 - missedImpact / totalImpact);
148
+ const trickyTotal = expectations
149
+ .filter((e) => e.trickiness >= TRICKY_THRESHOLD)
150
+ .reduce((sum, e) => sum + e.trickiness, 0);
151
+ const trickyCaught = caught
152
+ .filter((e) => e.trickiness >= TRICKY_THRESHOLD)
153
+ .reduce((sum, e) => sum + e.trickiness, 0);
154
+ const wowBonus = trickyTotal === 0 ? 1 : round2(trickyCaught / trickyTotal);
155
+ const missedHighImpact = missed.filter((e) => e.impact >= HIGH_IMPACT_THRESHOLD).map((e) => e.id);
156
+ return { caught, missed, impactRecall, wowBonus, missedHighImpact };
157
+ }
158
+ /**
159
+ * Render the graded expectations into a Markdown section to append to the judge prompt —
160
+ * "what the judge should expect to see", with the scoring guidance the asymmetry implies.
161
+ * Returns an empty string when there are no expectations (an un-graded fixture).
162
+ */
163
+ export function renderExpectationBrief(expectations) {
164
+ if (expectations.length === 0)
165
+ return '';
166
+ const lines = [
167
+ '## Expected findings (grading reference)',
168
+ '',
169
+ 'A strong response should surface the following. Each is rated by **impact** (how bad it',
170
+ 'is to miss, 1–5) and **trickiness** (how hard it is to spot, 1–5). Reward catching',
171
+ 'high-trickiness items — those are the impressive catches. Penalize missing high-impact',
172
+ 'items most heavily; missing a merely tricky item is a smaller concern.',
173
+ '',
174
+ ];
175
+ for (const e of expectations) {
176
+ lines.push(`- **${e.summary}** _(impact ${e.impact}, trickiness ${e.trickiness})_`);
177
+ if (e.detail.trim())
178
+ lines.push(` - ${e.detail.trim()}`);
179
+ }
180
+ return lines.join('\n');
181
+ }
182
+ /** Round to 2 decimal places. */
183
+ function round2(n) {
184
+ return Math.round(n * 100) / 100;
185
+ }
186
+ /** Lowercase alphanumeric word tokens (drops punctuation/whitespace). */
187
+ function tokenize(text) {
188
+ return text.toLowerCase().match(/[a-z0-9]+/g) ?? [];
189
+ }
190
+ /** Whether `needle`'s tokens appear as a contiguous run within `haystack`'s tokens. */
191
+ function containsSequence(haystack, needle) {
192
+ if (needle.length === 0)
193
+ return false;
194
+ for (let i = 0; i + needle.length <= haystack.length; i++) {
195
+ let hit = true;
196
+ for (let j = 0; j < needle.length; j++) {
197
+ if (haystack[i + j] !== needle[j]) {
198
+ hit = false;
199
+ break;
200
+ }
201
+ }
202
+ if (hit)
203
+ return true;
204
+ }
205
+ return false;
206
+ }
207
+ //# sourceMappingURL=rubrics.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rubrics.js","sourceRoot":"","sources":["../src/rubrics.ts"],"names":[],"mappings":"AAuBA,MAAM,kBAAkB,GAAsB;IAC5C;QACE,GAAG,EAAE,cAAc;QACnB,KAAK,EAAE,cAAc;QACrB,WAAW,EACT,6FAA6F;QAC/F,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,aAAa;QAClB,KAAK,EAAE,6BAA6B;QACpC,WAAW,EAAE,8EAA8E;QAC3F,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,kBAAkB;QACvB,KAAK,EAAE,0BAA0B;QACjC,WAAW,EACT,mFAAmF;QACrF,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,sBAAsB;QAC3B,KAAK,EAAE,sBAAsB;QAC7B,WAAW,EAAE,sEAAsE;QACnF,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,cAAc;QACnB,KAAK,EAAE,iBAAiB;QACxB,WAAW,EAAE,yEAAyE;QACtF,MAAM,EAAE,CAAC;KACV;CACF,CAAA;AAED,MAAM,WAAW,GAAsB;IACrC;QACE,GAAG,EAAE,iBAAiB;QACtB,KAAK,EAAE,sBAAsB;QAC7B,WAAW,EAAE,6EAA6E;QAC1F,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,aAAa;QAClB,KAAK,EAAE,yBAAyB;QAChC,WAAW,EAAE,qEAAqE;QAClF,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,gBAAgB;QACrB,KAAK,EAAE,mBAAmB;QAC1B,WAAW,EAAE,qEAAqE;QAClF,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,eAAe;QACpB,KAAK,EAAE,eAAe;QACtB,WAAW,EAAE,yEAAyE;QACtF,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,iBAAiB;QACtB,KAAK,EAAE,qBAAqB;QAC5B,WAAW,EAAE,4EAA4E;QACzF,MAAM,EAAE,CAAC;KACV;CACF,CAAA;AAED,MAAM,cAAc,GAAsB;IACxC;QACE,GAAG,EAAE,cAAc;QACnB,KAAK,EAAE,qBAAqB;QAC5B,WAAW,EAAE,8EAA8E;QAC3F,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,aAAa;QAClB,KAAK,EAAE,aAAa;QACpB,WAAW,EAAE,8EAA8E;QAC3F,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,cAAc;QACnB,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,6EAA6E;QAC1F,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,kBAAkB;QACvB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,oEAAoE;QACjF,MAAM,EAAE,CAAC;KACV;IACD;QACE,GAAG,EAAE,cAAc;QACnB,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,4DAA4D;QACzE,MAAM,EAAE,CAAC;KACV;CACF,CAAA;AAED,MAAM,OAAO,GAA+C;IAC1D,oBAAoB,EAAE,kBAAkB;IACxC,aAAa,EAAE,WAAW;IAC1B,cAAc,EAAE,cAAc;CAC/B,CAAA;AAED,MAAM,UAAU,SAAS,CAAC,IAAqB;IAC7C,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,EAAE,CAAA;AAC5C,CAAC;AAED,yEAAyE;AACzE,MAAM,UAAU,aAAa,CAC3B,IAAqB,EACrB,MAAwC;IAExC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IAC1B,IAAI,GAAG,GAAG,CAAC,CAAA;IACX,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,EAAE,KAAK,CAAA;QAC1D,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,GAAG,IAAI,KAAK,GAAG,GAAG,CAAC,MAAM,CAAA;YACzB,MAAM,IAAI,GAAG,CAAC,MAAM,CAAA;QACtB,CAAC;IACH,CAAC;IACD,OAAO,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,MAAM,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAA;AAClE,CAAC;AAED,uFAAuF;AACvF,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAA;AACtC,0FAA0F;AAC1F,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,CAAA;AAyBjC;;;;;;;;;GASG;AACH,MAAM,UAAU,iBAAiB,CAC/B,YAA2C,EAC3C,MAAc;IAEd,MAAM,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAA;IACjC,MAAM,MAAM,GAAyB,EAAE,CAAA;IACvC,MAAM,MAAM,GAAyB,EAAE,CAAA;IACvC,KAAK,MAAM,WAAW,IAAI,YAAY,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,WAAW,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,CAAA;QAChG,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE;YAC9B,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAA;YAC7B,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,gBAAgB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;QAChE,CAAC,CAAC,CACD;QAAA,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;IAC5C,CAAC;IAED,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;IACtE,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;IACjE,MAAM,YAAY,GAAG,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,YAAY,GAAG,WAAW,CAAC,CAAA;IAEnF,MAAM,WAAW,GAAG,YAAY;SAC7B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,gBAAgB,CAAC;SAC/C,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAA;IAC5C,MAAM,YAAY,GAAG,MAAM;SACxB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,gBAAgB,CAAC;SAC/C,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAA;IAC5C,MAAM,QAAQ,GAAG,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,GAAG,WAAW,CAAC,CAAA;IAE3E,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,qBAAqB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;IACjG,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAA;AACrE,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CAAC,YAA2C;IAChF,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IACxC,MAAM,KAAK,GAAG;QACZ,0CAA0C;QAC1C,EAAE;QACF,yFAAyF;QACzF,oFAAoF;QACpF,wFAAwF;QACxF,wEAAwE;QACxE,EAAE;KACH,CAAA;IACD,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,eAAe,CAAC,CAAC,MAAM,gBAAgB,CAAC,CAAC,UAAU,IAAI,CAAC,CAAA;QACnF,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,EAAE;YAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;IAC3D,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACzB,CAAC;AAED,iCAAiC;AACjC,SAAS,MAAM,CAAC,CAAS;IACvB,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAA;AAClC,CAAC;AAED,yEAAyE;AACzE,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,EAAE,CAAA;AACrD,CAAC;AAED,uFAAuF;AACvF,SAAS,gBAAgB,CAAC,QAAkB,EAAE,MAAgB;IAC5D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAA;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1D,IAAI,GAAG,GAAG,IAAI,CAAA;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,IAAI,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClC,GAAG,GAAG,KAAK,CAAA;gBACX,MAAK;YACP,CAAC;QACH,CAAC;QACD,IAAI,GAAG;YAAE,OAAO,IAAI,CAAA;IACtB,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cat-factory/sandbox",
3
- "version": "0.7.2",
3
+ "version": "0.7.3",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/kibertoad/cat-factory.git",
@@ -24,10 +24,10 @@
24
24
  "access": "public"
25
25
  },
26
26
  "dependencies": {
27
- "@cat-factory/agents": "0.7.2",
27
+ "@cat-factory/contracts": "0.7.2",
28
28
  "@cat-factory/kernel": "0.7.2",
29
29
  "@cat-factory/sandbox-fixtures": "0.7.2",
30
- "@cat-factory/contracts": "0.7.2"
30
+ "@cat-factory/agents": "0.7.2"
31
31
  },
32
32
  "devDependencies": {
33
33
  "typescript": "7.0.1-rc",