agent-composer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -0
- package/composer.config.schema.json +79 -0
- package/dist/cli/init.d.ts +20 -0
- package/dist/cli/init.js +122 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/config/env.d.ts +13 -0
- package/dist/config/env.js +65 -0
- package/dist/config/env.js.map +1 -0
- package/dist/config/loader.d.ts +3 -0
- package/dist/config/loader.js +34 -0
- package/dist/config/loader.js.map +1 -0
- package/dist/config/schema.d.ts +93 -0
- package/dist/config/schema.js +44 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/evolve/budget.d.ts +23 -0
- package/dist/evolve/budget.js +55 -0
- package/dist/evolve/budget.js.map +1 -0
- package/dist/evolve/lengthPenalty.d.ts +3 -0
- package/dist/evolve/lengthPenalty.js +30 -0
- package/dist/evolve/lengthPenalty.js.map +1 -0
- package/dist/evolve/operators.d.ts +24 -0
- package/dist/evolve/operators.js +110 -0
- package/dist/evolve/operators.js.map +1 -0
- package/dist/evolve/pareto.d.ts +24 -0
- package/dist/evolve/pareto.js +153 -0
- package/dist/evolve/pareto.js.map +1 -0
- package/dist/evolve/plateau.d.ts +18 -0
- package/dist/evolve/plateau.js +45 -0
- package/dist/evolve/plateau.js.map +1 -0
- package/dist/evolve/postflight.d.ts +12 -0
- package/dist/evolve/postflight.js +61 -0
- package/dist/evolve/postflight.js.map +1 -0
- package/dist/evolve/preflight.d.ts +13 -0
- package/dist/evolve/preflight.js +39 -0
- package/dist/evolve/preflight.js.map +1 -0
- package/dist/evolve/reflection.d.ts +12 -0
- package/dist/evolve/reflection.js +41 -0
- package/dist/evolve/reflection.js.map +1 -0
- package/dist/evolve/runner.d.ts +62 -0
- package/dist/evolve/runner.js +202 -0
- package/dist/evolve/runner.js.map +1 -0
- package/dist/evolve/s2-deny.d.ts +26 -0
- package/dist/evolve/s2-deny.js +75 -0
- package/dist/evolve/s2-deny.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +36 -0
- package/dist/index.js.map +1 -0
- package/dist/providers/AnthropicCompatibleProvider.d.ts +48 -0
- package/dist/providers/AnthropicCompatibleProvider.js +50 -0
- package/dist/providers/AnthropicCompatibleProvider.js.map +1 -0
- package/dist/providers/CLIProvider.d.ts +30 -0
- package/dist/providers/CLIProvider.js +106 -0
- package/dist/providers/CLIProvider.js.map +1 -0
- package/dist/providers/IProvider.d.ts +17 -0
- package/dist/providers/IProvider.js +4 -0
- package/dist/providers/IProvider.js.map +1 -0
- package/dist/providers/MockProvider.d.ts +28 -0
- package/dist/providers/MockProvider.js +66 -0
- package/dist/providers/MockProvider.js.map +1 -0
- package/dist/registry.d.ts +21 -0
- package/dist/registry.js +79 -0
- package/dist/registry.js.map +1 -0
- package/dist/server.d.ts +6 -0
- package/dist/server.js +85 -0
- package/dist/server.js.map +1 -0
- package/dist/util/slug.d.ts +1 -0
- package/dist/util/slug.js +7 -0
- package/dist/util/slug.js.map +1 -0
- package/package.json +56 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
// Wave 3 Step 2 — preflight ecosystem snapshot via `agy` CLI (Gemini 3.1).
|
|
2
|
+
//
|
|
3
|
+
// Runs ONCE per /evolve session before any mutation. Result is surfaced
|
|
4
|
+
// into the reflection LM as `currentEcosystem` so mutators see what
|
|
5
|
+
// changed (new APIs, deprecations, best-practice shifts).
|
|
6
|
+
//
|
|
7
|
+
// Best-effort: if the CLI fails (network down, agy unavailable), we
|
|
8
|
+
// return an empty snapshot — the evolve loop continues without
|
|
9
|
+
// ecosystem grounding rather than refusing to run.
|
|
10
|
+
export function buildPreflightPrompt(input) {
|
|
11
|
+
const since = input.lastEvolveDate ?? "never";
|
|
12
|
+
return [
|
|
13
|
+
`Research task: what changed in "${input.skillDomain}" since ${since}?`,
|
|
14
|
+
"",
|
|
15
|
+
"Focus on:",
|
|
16
|
+
"- newly released or stabilised APIs / SDK versions",
|
|
17
|
+
"- deprecations and breaking changes",
|
|
18
|
+
"- best-practice shifts the community has adopted",
|
|
19
|
+
"",
|
|
20
|
+
"Reply with a concise bulleted summary (≤ 25 bullets). No prose intro.",
|
|
21
|
+
].join("\n");
|
|
22
|
+
}
|
|
23
|
+
export async function runPreflight(provider, input) {
|
|
24
|
+
const prompt = buildPreflightPrompt(input);
|
|
25
|
+
const fetchedAt = new Date().toISOString();
|
|
26
|
+
try {
|
|
27
|
+
const out = await provider.execute({ prompt });
|
|
28
|
+
return { text: out.text, prompt, fetchedAt };
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
return {
|
|
32
|
+
text: "",
|
|
33
|
+
prompt,
|
|
34
|
+
fetchedAt,
|
|
35
|
+
error: err instanceof Error ? err.message : String(err),
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=preflight.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"preflight.js","sourceRoot":"","sources":["../../src/evolve/preflight.ts"],"names":[],"mappings":"AAAA,2EAA2E;AAC3E,EAAE;AACF,wEAAwE;AACxE,oEAAoE;AACpE,0DAA0D;AAC1D,EAAE;AACF,oEAAoE;AACpE,+DAA+D;AAC/D,mDAAmD;AAgBnD,MAAM,UAAU,oBAAoB,CAAC,KAAqB;IACxD,MAAM,KAAK,GAAG,KAAK,CAAC,cAAc,IAAI,OAAO,CAAC;IAC9C,OAAO;QACL,mCAAmC,KAAK,CAAC,WAAW,WAAW,KAAK,GAAG;QACvE,EAAE;QACF,WAAW;QACX,oDAAoD;QACpD,qCAAqC;QACrC,kDAAkD;QAClD,EAAE;QACF,uEAAuE;KACxE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,QAAmB,EACnB,KAAqB;IAErB,MAAM,MAAM,GAAG,oBAAoB,CAAC,KAAK,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAC3C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/C,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IAC/C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,IAAI,EAAE,EAAE;YACR,MAAM;YACN,SAAS;YACT,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACxD,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { IProvider } from "../providers/IProvider.js";
|
|
2
|
+
export interface TaskTranscript {
|
|
3
|
+
task: string;
|
|
4
|
+
outcome: string;
|
|
5
|
+
}
|
|
6
|
+
export interface ReflectionInput {
|
|
7
|
+
parent: string;
|
|
8
|
+
taskTranscripts: ReadonlyArray<TaskTranscript>;
|
|
9
|
+
currentEcosystem?: string;
|
|
10
|
+
}
|
|
11
|
+
export declare function buildReflectionPrompt(input: ReflectionInput): string;
|
|
12
|
+
export declare function reflectViaProvider(provider: IProvider, input: ReflectionInput): Promise<string>;
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// Wave 3 Step 2 — GEPA reflect_and_rewrite via GLM 5.1.
|
|
2
|
+
//
|
|
3
|
+
// The reflection LM sees: the parent skill, the failing task
|
|
4
|
+
// transcripts, and the preflight ecosystem snapshot. It returns a
|
|
5
|
+
// candidate rewrite. Temperature is fixed at 0 by the provider layer
|
|
6
|
+
// (AnthropicCompatibleProvider passes through whatever the request
|
|
7
|
+
// says; runner sets temperature=0 once per loop).
|
|
8
|
+
//
|
|
9
|
+
// Errors here ARE fatal to a candidate — unlike preflight/postflight
|
|
10
|
+
// (best-effort), if the mutator itself can't produce a rewrite, the
|
|
11
|
+
// runner should skip this round, not promote anything.
|
|
12
|
+
export function buildReflectionPrompt(input) {
|
|
13
|
+
const lines = [
|
|
14
|
+
"You are a GEPA reflection mutator. Rewrite the parent skill so the",
|
|
15
|
+
"listed failures would have been avoided. Keep the same overall",
|
|
16
|
+
"structure and length budget. Reply with ONLY the rewritten skill",
|
|
17
|
+
"body (no preamble, no fenced block).",
|
|
18
|
+
"",
|
|
19
|
+
"## Parent skill",
|
|
20
|
+
input.parent,
|
|
21
|
+
"",
|
|
22
|
+
"## Recent failing transcripts",
|
|
23
|
+
];
|
|
24
|
+
for (const t of input.taskTranscripts) {
|
|
25
|
+
lines.push(`- task: ${t.task}`);
|
|
26
|
+
lines.push(` outcome: ${t.outcome}`);
|
|
27
|
+
}
|
|
28
|
+
if (input.currentEcosystem && input.currentEcosystem.trim().length > 0) {
|
|
29
|
+
lines.push("", "## Current ecosystem", input.currentEcosystem);
|
|
30
|
+
}
|
|
31
|
+
return lines.join("\n");
|
|
32
|
+
}
|
|
33
|
+
const FENCE_RE = /^```[a-zA-Z]*\n([\s\S]*?)\n```\s*$/;
|
|
34
|
+
export async function reflectViaProvider(provider, input) {
|
|
35
|
+
const prompt = buildReflectionPrompt(input);
|
|
36
|
+
const out = await provider.execute({ prompt });
|
|
37
|
+
const trimmed = out.text.trim();
|
|
38
|
+
const m = trimmed.match(FENCE_RE);
|
|
39
|
+
return m ? m[1].trim() : trimmed;
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=reflection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reflection.js","sourceRoot":"","sources":["../../src/evolve/reflection.ts"],"names":[],"mappings":"AAAA,wDAAwD;AACxD,EAAE;AACF,6DAA6D;AAC7D,kEAAkE;AAClE,qEAAqE;AACrE,mEAAmE;AACnE,kDAAkD;AAClD,EAAE;AACF,qEAAqE;AACrE,oEAAoE;AACpE,uDAAuD;AAevD,MAAM,UAAU,qBAAqB,CAAC,KAAsB;IAC1D,MAAM,KAAK,GAAa;QACtB,oEAAoE;QACpE,gEAAgE;QAChE,kEAAkE;QAClE,sCAAsC;QACtC,EAAE;QACF,iBAAiB;QACjB,KAAK,CAAC,MAAM;QACZ,EAAE;QACF,+BAA+B;KAChC,CAAC;IACF,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IACxC,CAAC;IACD,IAAI,KAAK,CAAC,gBAAgB,IAAI,KAAK,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvE,KAAK,CAAC,IAAI,CAAC,EAAE,EAAE,sBAAsB,EAAE,KAAK,CAAC,gBAAgB,CAAC,CAAC;IACjE,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,QAAQ,GAAG,oCAAoC,CAAC;AAEtD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAmB,EACnB,KAAsB;IAEtB,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAC/C,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAChC,MAAM,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAClC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;AACpC,CAAC"}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { type OperatorMeta } from "./operators.js";
|
|
2
|
+
import { type EvolveBudgetConfig } from "./budget.js";
|
|
3
|
+
import { type PreflightSnapshot } from "./preflight.js";
|
|
4
|
+
import { type Verdict } from "./postflight.js";
|
|
5
|
+
import type { IProvider } from "../providers/IProvider.js";
|
|
6
|
+
import type { TaskTranscript } from "./reflection.js";
|
|
7
|
+
export interface EvolveTask {
|
|
8
|
+
id: string;
|
|
9
|
+
description: string;
|
|
10
|
+
}
|
|
11
|
+
export interface EvalResult {
|
|
12
|
+
score: number;
|
|
13
|
+
transcripts: ReadonlyArray<TaskTranscript>;
|
|
14
|
+
}
|
|
15
|
+
export interface EvolveDeps {
|
|
16
|
+
reflectionProvider: IProvider;
|
|
17
|
+
researchProvider: IProvider;
|
|
18
|
+
evaluate: (skill: string, tasks: ReadonlyArray<EvolveTask>) => Promise<EvalResult>;
|
|
19
|
+
reReplicate: (skill: string, tasks: ReadonlyArray<EvolveTask>) => Promise<boolean>;
|
|
20
|
+
skillDomain: string;
|
|
21
|
+
lastEvolveDate?: string;
|
|
22
|
+
/** Override postflight provider call entirely (test hook). */
|
|
23
|
+
postflightOverride?: (winner: string, snap: PreflightSnapshot) => Promise<Verdict>;
|
|
24
|
+
/** Estimated USD cost per worker call. */
|
|
25
|
+
costPerCallUsd?: number;
|
|
26
|
+
/** Override operator selection strategy; default = round-robin pickOperator. */
|
|
27
|
+
pickOperator?: (round: number) => OperatorMeta;
|
|
28
|
+
}
|
|
29
|
+
export interface EvolveOptions {
|
|
30
|
+
parent: string;
|
|
31
|
+
tasks: ReadonlyArray<EvolveTask>;
|
|
32
|
+
deps: EvolveDeps;
|
|
33
|
+
maxRounds?: number;
|
|
34
|
+
reRunSamples?: number;
|
|
35
|
+
budget?: EvolveBudgetConfig;
|
|
36
|
+
lengthLambda?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface EvolveRoundLog {
|
|
39
|
+
round: number;
|
|
40
|
+
operator: string;
|
|
41
|
+
parentScore: number;
|
|
42
|
+
candidateScore: number;
|
|
43
|
+
promoted: boolean;
|
|
44
|
+
reason: string;
|
|
45
|
+
}
|
|
46
|
+
export interface EvolveResult {
|
|
47
|
+
winner: string;
|
|
48
|
+
history: EvolveRoundLog[];
|
|
49
|
+
stoppedAt: "plateau" | "budget" | "maxRounds";
|
|
50
|
+
preflight?: PreflightSnapshot;
|
|
51
|
+
postflight?: Verdict;
|
|
52
|
+
postflightRejections: Verdict[];
|
|
53
|
+
budgetStats: {
|
|
54
|
+
calls: number;
|
|
55
|
+
usd: number;
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
export declare function rotateHoldout(tasks: ReadonlyArray<EvolveTask>, round: number): {
|
|
59
|
+
holdout: EvolveTask;
|
|
60
|
+
trainVal: EvolveTask[];
|
|
61
|
+
};
|
|
62
|
+
export declare function runEvolve(opts: EvolveOptions): Promise<EvolveResult>;
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
// Wave 3 Step 2 — evolve loop orchestrator.
|
|
2
|
+
//
|
|
3
|
+
// Wires the rest of src/evolve/* into a single `runEvolve` call:
|
|
4
|
+
// preflight → for each round:
|
|
5
|
+
// - pick task split (train+val / holdout rotation)
|
|
6
|
+
// - pick operator (round-robin)
|
|
7
|
+
// - mutate via operator (reflect_and_rewrite uses reflectionProvider)
|
|
8
|
+
// - evaluate on train+val
|
|
9
|
+
// - candidateBeatsParent? if yes, N=re-run, promote if survives
|
|
10
|
+
// - lengthPenalty applied to score
|
|
11
|
+
// - plateau detector observes holdout score
|
|
12
|
+
// → on plateau or budget or maxRounds, exit
|
|
13
|
+
// postflight on winner → if reject, revert to parent
|
|
14
|
+
//
|
|
15
|
+
// All worker calls accounted in EvolveBudgetGuard. Provider-side costs
|
|
16
|
+
// are estimated per-call (default $0.025 per eval call, matches plan).
|
|
17
|
+
import { pickOperator } from "./operators.js";
|
|
18
|
+
import { lengthPenalty, estimateTokens } from "./lengthPenalty.js";
|
|
19
|
+
import { candidateBeatsParent } from "./pareto.js";
|
|
20
|
+
import { PlateauDetector } from "./plateau.js";
|
|
21
|
+
import { EvolveBudgetGuard, EvolveBudgetExceededError, DEFAULT_EVOLVE_BUDGET, } from "./budget.js";
|
|
22
|
+
import { runPreflight } from "./preflight.js";
|
|
23
|
+
import { runPostflight } from "./postflight.js";
|
|
24
|
+
import { reflectViaProvider } from "./reflection.js";
|
|
25
|
+
export function rotateHoldout(tasks, round) {
|
|
26
|
+
if (tasks.length < 2) {
|
|
27
|
+
throw new Error("rotateHoldout: need at least 2 tasks");
|
|
28
|
+
}
|
|
29
|
+
const idx = round % tasks.length;
|
|
30
|
+
const holdout = tasks[idx];
|
|
31
|
+
const trainVal = tasks.filter((_, i) => i !== idx);
|
|
32
|
+
return { holdout, trainVal };
|
|
33
|
+
}
|
|
34
|
+
const DEFAULT_COST_PER_CALL = 0.025;
|
|
35
|
+
export async function runEvolve(opts) {
|
|
36
|
+
const { parent, tasks, deps, maxRounds = 30, reRunSamples = 3, budget = DEFAULT_EVOLVE_BUDGET, lengthLambda, } = opts;
|
|
37
|
+
const guard = new EvolveBudgetGuard(budget);
|
|
38
|
+
const plateau = new PlateauDetector();
|
|
39
|
+
const costPerCall = deps.costPerCallUsd ?? DEFAULT_COST_PER_CALL;
|
|
40
|
+
const charge = () => guard.spent(costPerCall);
|
|
41
|
+
const preflight = await runPreflight(deps.researchProvider, {
|
|
42
|
+
skillDomain: deps.skillDomain,
|
|
43
|
+
lastEvolveDate: deps.lastEvolveDate,
|
|
44
|
+
});
|
|
45
|
+
try {
|
|
46
|
+
charge();
|
|
47
|
+
}
|
|
48
|
+
catch (e) {
|
|
49
|
+
if (e instanceof EvolveBudgetExceededError) {
|
|
50
|
+
return {
|
|
51
|
+
winner: parent,
|
|
52
|
+
history: [],
|
|
53
|
+
stoppedAt: "budget",
|
|
54
|
+
preflight,
|
|
55
|
+
postflightRejections: [],
|
|
56
|
+
budgetStats: guard.stats,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
throw e;
|
|
60
|
+
}
|
|
61
|
+
let winner = parent;
|
|
62
|
+
const history = [];
|
|
63
|
+
const rejections = [];
|
|
64
|
+
let stoppedAt = "maxRounds";
|
|
65
|
+
for (let round = 0; round < maxRounds; round++) {
|
|
66
|
+
const split = rotateHoldout(tasks, round);
|
|
67
|
+
const op = (deps.pickOperator ?? pickOperator)(round);
|
|
68
|
+
const ctx = {
|
|
69
|
+
currentEcosystem: preflight.text,
|
|
70
|
+
reflect: (text) => reflectViaProvider(deps.reflectionProvider, {
|
|
71
|
+
parent: text,
|
|
72
|
+
taskTranscripts: [],
|
|
73
|
+
currentEcosystem: preflight.text,
|
|
74
|
+
}),
|
|
75
|
+
};
|
|
76
|
+
let candidate;
|
|
77
|
+
try {
|
|
78
|
+
candidate = await op.apply(winner, ctx);
|
|
79
|
+
charge();
|
|
80
|
+
}
|
|
81
|
+
catch (e) {
|
|
82
|
+
if (e instanceof EvolveBudgetExceededError) {
|
|
83
|
+
stoppedAt = "budget";
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
throw e;
|
|
87
|
+
}
|
|
88
|
+
const parentEval = await deps.evaluate(winner, split.trainVal);
|
|
89
|
+
try {
|
|
90
|
+
charge();
|
|
91
|
+
}
|
|
92
|
+
catch (e) {
|
|
93
|
+
if (e instanceof EvolveBudgetExceededError) {
|
|
94
|
+
stoppedAt = "budget";
|
|
95
|
+
break;
|
|
96
|
+
}
|
|
97
|
+
throw e;
|
|
98
|
+
}
|
|
99
|
+
const candEval = await deps.evaluate(candidate, split.trainVal);
|
|
100
|
+
try {
|
|
101
|
+
charge();
|
|
102
|
+
}
|
|
103
|
+
catch (e) {
|
|
104
|
+
if (e instanceof EvolveBudgetExceededError) {
|
|
105
|
+
stoppedAt = "budget";
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
throw e;
|
|
109
|
+
}
|
|
110
|
+
const pAdj = parentEval.score + lengthPenalty(winner, lengthLambda);
|
|
111
|
+
const cAdj = candEval.score + lengthPenalty(candidate, lengthLambda);
|
|
112
|
+
const beats = candidateBeatsParent([pAdj], [cAdj], {
|
|
113
|
+
parentTokens: estimateTokens(winner),
|
|
114
|
+
candidateTokens: estimateTokens(candidate),
|
|
115
|
+
});
|
|
116
|
+
let promoted = false;
|
|
117
|
+
let reason = beats.reason;
|
|
118
|
+
if (beats.beats) {
|
|
119
|
+
// N re-run survival
|
|
120
|
+
const survives = [];
|
|
121
|
+
let budgetBroke = false;
|
|
122
|
+
for (let n = 0; n < reRunSamples; n++) {
|
|
123
|
+
const r = await deps.evaluate(candidate, split.trainVal);
|
|
124
|
+
survives.push(r.score + lengthPenalty(candidate, lengthLambda));
|
|
125
|
+
try {
|
|
126
|
+
charge();
|
|
127
|
+
}
|
|
128
|
+
catch (e) {
|
|
129
|
+
if (e instanceof EvolveBudgetExceededError) {
|
|
130
|
+
stoppedAt = "budget";
|
|
131
|
+
budgetBroke = true;
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
throw e;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (budgetBroke)
|
|
138
|
+
break;
|
|
139
|
+
const surviveBeats = candidateBeatsParent([pAdj], survives, {
|
|
140
|
+
parentTokens: estimateTokens(winner),
|
|
141
|
+
candidateTokens: estimateTokens(candidate),
|
|
142
|
+
});
|
|
143
|
+
if (surviveBeats.beats) {
|
|
144
|
+
winner = candidate;
|
|
145
|
+
promoted = true;
|
|
146
|
+
reason = `${beats.reason}; re-run ${surviveBeats.reason}`;
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
reason = `beat parent once but failed re-run (${surviveBeats.reason})`;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
// Holdout observation drives plateau
|
|
153
|
+
const holdoutResult = await deps.evaluate(winner, [split.holdout]);
|
|
154
|
+
try {
|
|
155
|
+
charge();
|
|
156
|
+
}
|
|
157
|
+
catch (e) {
|
|
158
|
+
if (e instanceof EvolveBudgetExceededError) {
|
|
159
|
+
stoppedAt = "budget";
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
throw e;
|
|
163
|
+
}
|
|
164
|
+
plateau.observe(holdoutResult.score + lengthPenalty(winner, lengthLambda));
|
|
165
|
+
history.push({
|
|
166
|
+
round,
|
|
167
|
+
operator: op.name,
|
|
168
|
+
parentScore: pAdj,
|
|
169
|
+
candidateScore: cAdj,
|
|
170
|
+
promoted,
|
|
171
|
+
reason,
|
|
172
|
+
});
|
|
173
|
+
if (plateau.shouldStop()) {
|
|
174
|
+
const survived = await deps.reReplicate(winner, [split.holdout]);
|
|
175
|
+
if (plateau.terminate(survived)) {
|
|
176
|
+
stoppedAt = "plateau";
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
const postflight = deps.postflightOverride
|
|
182
|
+
? await deps.postflightOverride(winner, preflight)
|
|
183
|
+
: await runPostflight(deps.researchProvider, {
|
|
184
|
+
ecosystem: preflight.text,
|
|
185
|
+
candidate: winner,
|
|
186
|
+
});
|
|
187
|
+
let finalWinner = winner;
|
|
188
|
+
if (!postflight.accept) {
|
|
189
|
+
rejections.push(postflight);
|
|
190
|
+
finalWinner = parent;
|
|
191
|
+
}
|
|
192
|
+
return {
|
|
193
|
+
winner: finalWinner,
|
|
194
|
+
history,
|
|
195
|
+
stoppedAt,
|
|
196
|
+
preflight,
|
|
197
|
+
postflight,
|
|
198
|
+
postflightRejections: rejections,
|
|
199
|
+
budgetStats: guard.stats,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../src/evolve/runner.ts"],"names":[],"mappings":"AAAA,4CAA4C;AAC5C,EAAE;AACF,iEAAiE;AACjE,gCAAgC;AAChC,mEAAmE;AACnE,gDAAgD;AAChD,sFAAsF;AACtF,0CAA0C;AAC1C,gFAAgF;AAChF,mDAAmD;AACnD,4DAA4D;AAC5D,0DAA0D;AAC1D,uDAAuD;AACvD,EAAE;AACF,uEAAuE;AACvE,uEAAuE;AAEvE,OAAO,EAAE,YAAY,EAA2C,MAAM,gBAAgB,CAAC;AACvF,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EACL,iBAAiB,EACjB,yBAAyB,EACzB,qBAAqB,GAEtB,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,YAAY,EAA0B,MAAM,gBAAgB,CAAC;AACtE,OAAO,EAAE,aAAa,EAAgB,MAAM,iBAAiB,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AA0DrD,MAAM,UAAU,aAAa,CAC3B,KAAgC,EAChC,KAAa;IAEb,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAC1D,CAAC;IACD,MAAM,GAAG,GAAG,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;IACjC,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC;IACnD,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;AAC/B,CAAC;AAED,MAAM,qBAAqB,GAAG,KAAK,CAAC;AAEpC,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAmB;IACjD,MAAM,EACJ,MAAM,EACN,KAAK,EACL,IAAI,EACJ,SAAS,GAAG,EAAE,EACd,YAAY,GAAG,CAAC,EAChB,MAAM,GAAG,qBAAqB,EAC9B,YAAY,GACb,GAAG,IAAI,CAAC;IAET,MAAM,KAAK,GAAG,IAAI,iBAAiB,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,OAAO,GAAG,IAAI,eAAe,EAAE,CAAC;IACtC,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,IAAI,qBAAqB,CAAC;IACjE,MAAM,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;IAE9C,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,gBAAgB,EAAE;QAC1D,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,cAAc,EAAE,IAAI,CAAC,cAAc;KACpC,CAAC,CAAC;IACH,IAAI,CAAC;QACH,MAAM,EAAE,CAAC;IACX,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,IAAI,CAAC,YAAY,yBAAyB,EAAE,CAAC;YAC3C,OAAO;gBACL,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,EAAE;gBACX,SAAS,EAAE,QAAQ;gBACnB,SAAS;gBACT,oBAAoB,EAAE,EAAE;gBACxB,WAAW,EAAE,KAAK,CAAC,KAAK;aACzB,CAAC;QACJ,CAAC;QACD,MAAM,CAAC,CAAC;IACV,CAAC;IAED,IAAI,MAAM,GAAG,MAAM,CAAC;IACpB,MAAM,OAAO,GAAqB,EAAE,CAAC;IACrC,MAAM,UAAU,GAAc,EAAE,CAAC;IACjC,IAAI,SAAS,GAA8B,WAAW,CAAC;IAEvD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,SAAS,EAAE,KAAK,EAAE,EAAE,CAAC;QAC/C,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC1C,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC;QACtD,MAAM,GAAG,GAAoB;YAC3B,gBAAgB,EAAE,SAAS,CAAC,IAAI;YAChC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAChB,kBAAkB,CAAC,IAAI,CAAC,kBAAkB,EAAE;gBAC1C,MAAM,EAAE,IAAI;gBACZ,eAAe,EAAE,EAAE;gBACnB,gBAAgB,EAAE,SAAS,CAAC,IAAI;aACjC,CAAC;SACL,CAAC;QAEF,IAAI,SAAiB,CAAC;QACtB,IAAI,CAAC;YACH,SAAS,GAAG,MAAM,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YACxC,MAAM,EAAE,CAAC;QACX,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,CAAC,YAAY,yBAAyB,EAAE,CAAC;gBAC3C,SAAS,GAAG,QAAQ,CAAC;gBACrB,MAAM;YACR,CAAC;YACD,MAAM,CAAC,CAAC;QACV,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;QAC/D,IAAI,CAAC;YAAC,MAAM,EAAE,CAAC;QAAC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,IAAI,CAAC,YAAY,yBAAyB,EAAE,CAAC;gBAAC,SAAS,GAAG,QAAQ,CAAC;gBAAC,MAAM;YAAC,CAAC;YAC5E,MAAM,CAAC,CAAC;QACV,CAAC;QACD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;QAChE,IAAI,CAAC;YAAC,MAAM,EAAE,CAAC;QAAC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,IAAI,CAAC,YAAY,yBAAyB,EAAE,CAAC;gBAAC,SAAS,GAAG,QAAQ,CAAC;gBAAC,MAAM;YAAC,CAAC;YAC5E,MAAM,CAAC,CAAC;QACV,CAAC;QAED,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,GAAG,aAAa,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;QACpE,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,GAAG,aAAa,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;QAErE,MAAM,KAAK,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE;YACjD,YAAY,EAAE,cAAc,CAAC,MAAM,CAAC;YACpC,eAAe,EAAE,cAAc,CAAC,SAAS,CAAC;SAC3C,CAAC,CAAC;QAEH,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,IAAI,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;QAC1B,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;YAChB,oBAAoB;YACpB,MAAM,QAAQ,GAAa,EAAE,CAAC;YAC9B,IAAI,WAAW,GAAG,KAAK,CAAC;YACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,SAAS,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACzD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,aAAa,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC;gBAChE,IAAI,CAAC;oBAAC,MAAM,EAAE,CAAC;gBAAC,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBAC3B,IAAI,CAAC,YAAY,yBAAyB,EAAE,CAAC;wBAAC,SAAS,GAAG,QAAQ,CAAC;wBAAC,WAAW,GAAG,IAAI,CAAC;wBAAC,MAAM;oBAAC,CAAC;oBAChG,MAAM,CAAC,CAAC;gBACV,CAAC;YACH,CAAC;YACD,IAAI,WAAW;gBAAE,MAAM;YACvB,MAAM,YAAY,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE;gBAC1D,YAAY,EAAE,cAAc,CAAC,MAAM,CAAC;gBACpC,eAAe,EAAE,cAAc,CAAC,SAAS,CAAC;aAC3C,CAAC,CAAC;YACH,IAAI,YAAY,CAAC,KAAK,EAAE,CAAC;gBACvB,MAAM,GAAG,SAAS,CAAC;gBACnB,QAAQ,GAAG,IAAI,CAAC;gBAChB,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,YAAY,YAAY,CAAC,MAAM,EAAE,CAAC;YAC5D,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,uCAAuC,YAAY,CAAC,MAAM,GAAG,CAAC;YACzE,CAAC;QACH,CAAC;QAED,qCAAqC;QACrC,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;QACnE,IAAI,CAAC;YAAC,MAAM,EAAE,CAAC;QAAC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,IAAI,CAAC,YAAY,yBAAyB,EAAE,CAAC;gBAAC,SAAS,GAAG,QAAQ,CAAC;gBAAC,MAAM;YAAC,CAAC;YAC5E,MAAM,CAAC,CAAC;QACV,CAAC;QACD,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,KAAK,GAAG,aAAa,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC;QAE3E,OAAO,CAAC,IAAI,CAAC;YACX,KAAK;YACL,QAAQ,EAAE,EAAE,CAAC,IAAI;YACjB,WAAW,EAAE,IAAI;YACjB,cAAc,EAAE,IAAI;YACpB,QAAQ;YACR,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;YACjE,IAAI,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAChC,SAAS,GAAG,SAAS,CAAC;gBACtB,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB;QACxC,CAAC,CAAC,MAAM,IAAI,CAAC,kBAAkB,CAAC,MAAM,EAAE,SAAS,CAAC;QAClD,CAAC,CAAC,MAAM,aAAa,CAAC,IAAI,CAAC,gBAAgB,EAAE;YACzC,SAAS,EAAE,SAAS,CAAC,IAAI;YACzB,SAAS,EAAE,MAAM;SAClB,CAAC,CAAC;IAEP,IAAI,WAAW,GAAG,MAAM,CAAC;IACzB,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;QACvB,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC5B,WAAW,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,OAAO;QACL,MAAM,EAAE,WAAW;QACnB,OAAO;QACP,SAAS;QACT,SAAS;QACT,UAAU;QACV,oBAAoB,EAAE,UAAU;QAChC,WAAW,EAAE,KAAK,CAAC,KAAK;KACzB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface S2Pattern {
|
|
2
|
+
/** Stable identifier used in the reject reason. */
|
|
3
|
+
id: string;
|
|
4
|
+
/** Pattern to test against the candidate text. */
|
|
5
|
+
pattern: RegExp;
|
|
6
|
+
/** Human-readable description of why this pattern is denied. */
|
|
7
|
+
description: string;
|
|
8
|
+
}
|
|
9
|
+
export declare const S2_DENY_PATTERNS: ReadonlyArray<S2Pattern>;
|
|
10
|
+
export interface S2Result {
|
|
11
|
+
allowed: boolean;
|
|
12
|
+
/** Stable id of the first matching pattern, if any. */
|
|
13
|
+
matchedPatternId?: string;
|
|
14
|
+
/** Human-readable reason — same shape as Verdict.reason from postflight. */
|
|
15
|
+
reason?: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Test a candidate skill string against the S2 deny list.
|
|
19
|
+
* Returns `{ allowed: true }` if no pattern matches; otherwise the first
|
|
20
|
+
* match's id and a formatted reason.
|
|
21
|
+
*
|
|
22
|
+
* The order of patterns in {@link S2_DENY_PATTERNS} is deterministic; the
|
|
23
|
+
* function returns on the first match, so put more-specific patterns first
|
|
24
|
+
* if precision matters.
|
|
25
|
+
*/
|
|
26
|
+
export declare function s2DenyPrefilter(candidate: string): S2Result;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// ADR 0003 S2 — deterministic deny-pattern prefilter for self-evolution.
|
|
2
|
+
//
|
|
3
|
+
// Runs BEFORE the LLM postflight researcher (agy CLI). Any regex hit is an
|
|
4
|
+
// automatic reject — the LLM cannot be talked out of these via prompt
|
|
5
|
+
// injection because the LLM is never called. The four-layer stat-gate also
|
|
6
|
+
// continues to apply, but S2 short-circuits ahead of S3 layer 3.
|
|
7
|
+
//
|
|
8
|
+
// The deny list is intentionally narrow: only escalation primitives and
|
|
9
|
+
// boundary-bypass directives that have NO legitimate place in an
|
|
10
|
+
// orchestrator skill. Current SKILL.md contains zero matches, verified at
|
|
11
|
+
// the time S2 shipped (HEAD ~de3ad2b).
|
|
12
|
+
export const S2_DENY_PATTERNS = [
|
|
13
|
+
{
|
|
14
|
+
id: "rm-rf",
|
|
15
|
+
pattern: /\brm\s+-[rf]+[rf]\s+/i,
|
|
16
|
+
description: "rm -rf — destructive recursive remove",
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
id: "sudo",
|
|
20
|
+
pattern: /\bsudo\s+\w/i,
|
|
21
|
+
description: "sudo invocation — privilege escalation",
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
id: "chmod-777",
|
|
25
|
+
pattern: /\bchmod\s+777\b/,
|
|
26
|
+
description: "chmod 777 — world-writable permissions",
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
id: "curl-pipe-shell",
|
|
30
|
+
pattern: /\bcurl\b[^\n]*\|\s*(?:sh|bash|zsh)\b/i,
|
|
31
|
+
description: "curl | sh — remote code execution primitive",
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
id: "wget-pipe-shell",
|
|
35
|
+
pattern: /\bwget\b[^\n]*\|\s*(?:sh|bash|zsh)\b/i,
|
|
36
|
+
description: "wget | sh — remote code execution primitive",
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
id: "dangerously-skip-permissions",
|
|
40
|
+
pattern: /--dangerously-skip-permissions/,
|
|
41
|
+
description: "claude flag that disables permission gating",
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
id: "boundary-bypass-env",
|
|
45
|
+
pattern: /COMPOSER_DANGEROUSLY_BYPASS_PERMISSIONS\s*=\s*["']?(?:1|true)["']?/i,
|
|
46
|
+
description: "boundary_guard.sh bypass env var assignment",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
id: "boundary-disable-directive",
|
|
50
|
+
pattern: /\b(?:disable|bypass|ignore|skip|circumvent)\b[^\n]{0,60}\bboundary[_-]?guard\b/i,
|
|
51
|
+
description: "natural-language directive to disable the boundary guard",
|
|
52
|
+
},
|
|
53
|
+
];
|
|
54
|
+
/**
|
|
55
|
+
* Test a candidate skill string against the S2 deny list.
|
|
56
|
+
* Returns `{ allowed: true }` if no pattern matches; otherwise the first
|
|
57
|
+
* match's id and a formatted reason.
|
|
58
|
+
*
|
|
59
|
+
* The order of patterns in {@link S2_DENY_PATTERNS} is deterministic; the
|
|
60
|
+
* function returns on the first match, so put more-specific patterns first
|
|
61
|
+
* if precision matters.
|
|
62
|
+
*/
|
|
63
|
+
export function s2DenyPrefilter(candidate) {
|
|
64
|
+
for (const p of S2_DENY_PATTERNS) {
|
|
65
|
+
if (p.pattern.test(candidate)) {
|
|
66
|
+
return {
|
|
67
|
+
allowed: false,
|
|
68
|
+
matchedPatternId: p.id,
|
|
69
|
+
reason: `S2 deny-pattern matched: ${p.id} (${p.description})`,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return { allowed: true };
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=s2-deny.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"s2-deny.js","sourceRoot":"","sources":["../../src/evolve/s2-deny.ts"],"names":[],"mappings":"AAAA,yEAAyE;AACzE,EAAE;AACF,2EAA2E;AAC3E,sEAAsE;AACtE,2EAA2E;AAC3E,iEAAiE;AACjE,EAAE;AACF,wEAAwE;AACxE,iEAAiE;AACjE,0EAA0E;AAC1E,uCAAuC;AAWvC,MAAM,CAAC,MAAM,gBAAgB,GAA6B;IACxD;QACE,EAAE,EAAE,OAAO;QACX,OAAO,EAAE,uBAAuB;QAChC,WAAW,EAAE,uCAAuC;KACrD;IACD;QACE,EAAE,EAAE,MAAM;QACV,OAAO,EAAE,cAAc;QACvB,WAAW,EAAE,wCAAwC;KACtD;IACD;QACE,EAAE,EAAE,WAAW;QACf,OAAO,EAAE,iBAAiB;QAC1B,WAAW,EAAE,wCAAwC;KACtD;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,OAAO,EAAE,uCAAuC;QAChD,WAAW,EAAE,6CAA6C;KAC3D;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,OAAO,EAAE,uCAAuC;QAChD,WAAW,EAAE,6CAA6C;KAC3D;IACD;QACE,EAAE,EAAE,8BAA8B;QAClC,OAAO,EAAE,gCAAgC;QACzC,WAAW,EAAE,6CAA6C;KAC3D;IACD;QACE,EAAE,EAAE,qBAAqB;QACzB,OAAO,EAAE,qEAAqE;QAC9E,WAAW,EAAE,6CAA6C;KAC3D;IACD;QACE,EAAE,EAAE,4BAA4B;QAChC,OAAO,EAAE,iFAAiF;QAC1F,WAAW,EAAE,0DAA0D;KACxE;CACF,CAAC;AAUF;;;;;;;;GAQG;AACH,MAAM,UAAU,eAAe,CAAC,SAAiB;IAC/C,KAAK,MAAM,CAAC,IAAI,gBAAgB,EAAE,CAAC;QACjC,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9B,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,gBAAgB,EAAE,CAAC,CAAC,EAAE;gBACtB,MAAM,EAAE,4BAA4B,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,WAAW,GAAG;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC3B,CAAC"}
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Wave 1 F2.1 — runtime entry point. Loads composer.config.json, wires the
|
|
3
|
+
// registry, and serves the three composer_* MCP tools over stdio.
|
|
4
|
+
//
|
|
5
|
+
// Override config path via COMPOSER_CONFIG env var.
|
|
6
|
+
// Errors at startup → exit 1 (fail-fast, instead of crashing mid-request).
|
|
7
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
8
|
+
import { loadConfig } from "./config/loader.js";
|
|
9
|
+
import { applyEnvJson } from "./config/env.js";
|
|
10
|
+
import { ProviderRegistry } from "./registry.js";
|
|
11
|
+
import { createComposerServer } from "./server.js";
|
|
12
|
+
import { runInit } from "./cli/init.js";
|
|
13
|
+
const CONFIG_PATH = process.env["COMPOSER_CONFIG"] ?? "composer.config.json";
|
|
14
|
+
const ENV_PATH = process.env["COMPOSER_ENV"] ?? ".env.json";
|
|
15
|
+
async function main() {
|
|
16
|
+
const subcommand = process.argv[2];
|
|
17
|
+
if (subcommand === "init") {
|
|
18
|
+
runInit({ cwd: process.cwd() });
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
applyEnvJson(ENV_PATH);
|
|
22
|
+
const config = loadConfig(CONFIG_PATH);
|
|
23
|
+
const registry = new ProviderRegistry(config);
|
|
24
|
+
const server = createComposerServer(registry);
|
|
25
|
+
const transport = new StdioServerTransport();
|
|
26
|
+
await server.connect(transport);
|
|
27
|
+
// Connection success message goes to stderr — stdio MCP requires stdout
|
|
28
|
+
// for the protocol itself.
|
|
29
|
+
process.stderr.write(`composer MCP server connected (stdio) — config: ${CONFIG_PATH}\n`);
|
|
30
|
+
}
|
|
31
|
+
main().catch((err) => {
|
|
32
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
33
|
+
process.stderr.write(`composer MCP server startup failed: ${msg}\n`);
|
|
34
|
+
process.exit(1);
|
|
35
|
+
});
|
|
36
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,2EAA2E;AAC3E,kEAAkE;AAClE,EAAE;AACF,oDAAoD;AACpD,2EAA2E;AAE3E,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAExC,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,IAAI,sBAAsB,CAAC;AAC7E,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,WAAW,CAAC;AAE5D,KAAK,UAAU,IAAI;IACjB,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QAC1B,OAAO,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QAChC,OAAO;IACT,CAAC;IAED,YAAY,CAAC,QAAQ,CAAC,CAAC;IACvB,MAAM,MAAM,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;IACvC,MAAM,QAAQ,GAAG,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,wEAAwE;IACxE,2BAA2B;IAC3B,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,mDAAmD,WAAW,IAAI,CACnE,CAAC;AACJ,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAY,EAAE,EAAE;IAC5B,MAAM,GAAG,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC7D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,uCAAuC,GAAG,IAAI,CAAC,CAAC;IACrE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type { IProvider, IProviderExecuteInput, IProviderExecuteOutput, ProviderId } from "./IProvider.js";
|
|
2
|
+
/** Minimal shape we need from the Anthropic client — eases DI in tests. */
|
|
3
|
+
export interface AnthropicLike {
|
|
4
|
+
messages: {
|
|
5
|
+
create: (params: AnthropicCreateParams) => Promise<AnthropicCreateResult>;
|
|
6
|
+
};
|
|
7
|
+
}
|
|
8
|
+
export interface AnthropicCreateParams {
|
|
9
|
+
model: string;
|
|
10
|
+
max_tokens: number;
|
|
11
|
+
messages: ReadonlyArray<{
|
|
12
|
+
role: "user" | "assistant";
|
|
13
|
+
content: ReadonlyArray<{
|
|
14
|
+
type: "text";
|
|
15
|
+
text: string;
|
|
16
|
+
}>;
|
|
17
|
+
}>;
|
|
18
|
+
}
|
|
19
|
+
export interface AnthropicCreateResult {
|
|
20
|
+
content: ReadonlyArray<{
|
|
21
|
+
type: string;
|
|
22
|
+
text?: string;
|
|
23
|
+
}>;
|
|
24
|
+
usage: {
|
|
25
|
+
input_tokens: number;
|
|
26
|
+
output_tokens: number;
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
export interface AnthropicCompatibleProviderOptions {
|
|
30
|
+
baseUrl: string;
|
|
31
|
+
apiKey: string;
|
|
32
|
+
model: string;
|
|
33
|
+
defaultMaxTokens?: number;
|
|
34
|
+
/** Override Anthropic SDK construction. Used by tests. */
|
|
35
|
+
clientFactory?: (opts: {
|
|
36
|
+
baseURL: string;
|
|
37
|
+
apiKey: string;
|
|
38
|
+
}) => AnthropicLike;
|
|
39
|
+
}
|
|
40
|
+
export declare class AnthropicCompatibleProvider implements IProvider {
|
|
41
|
+
readonly id: ProviderId;
|
|
42
|
+
readonly modelLabel: string;
|
|
43
|
+
private readonly client;
|
|
44
|
+
private readonly defaultMaxTokens;
|
|
45
|
+
constructor(opts: AnthropicCompatibleProviderOptions);
|
|
46
|
+
healthCheck(): Promise<boolean>;
|
|
47
|
+
execute(input: IProviderExecuteInput): Promise<IProviderExecuteOutput>;
|
|
48
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
// Wave 1 F1.1 — Anthropic-SDK-shaped provider, used against GLM's
|
|
2
|
+
// Anthropic-compatible endpoint and any future compatible host.
|
|
3
|
+
// Real network calls happen here; tests inject a fake via clientFactory.
|
|
4
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
5
|
+
const DEFAULT_FACTORY = ({ baseURL, apiKey }) => new Anthropic({ baseURL, apiKey });
|
|
6
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
7
|
+
export class AnthropicCompatibleProvider {
|
|
8
|
+
id = "anthropic";
|
|
9
|
+
modelLabel;
|
|
10
|
+
client;
|
|
11
|
+
defaultMaxTokens;
|
|
12
|
+
constructor(opts) {
|
|
13
|
+
if (!opts.baseUrl)
|
|
14
|
+
throw new Error("AnthropicCompatibleProvider: baseUrl required");
|
|
15
|
+
if (!opts.apiKey)
|
|
16
|
+
throw new Error("AnthropicCompatibleProvider: apiKey required");
|
|
17
|
+
if (!opts.model)
|
|
18
|
+
throw new Error("AnthropicCompatibleProvider: model required");
|
|
19
|
+
this.modelLabel = opts.model;
|
|
20
|
+
this.defaultMaxTokens = opts.defaultMaxTokens ?? DEFAULT_MAX_TOKENS;
|
|
21
|
+
const factory = opts.clientFactory ?? DEFAULT_FACTORY;
|
|
22
|
+
this.client = factory({ baseURL: opts.baseUrl, apiKey: opts.apiKey });
|
|
23
|
+
}
|
|
24
|
+
async healthCheck() {
|
|
25
|
+
// SDK construction is the only cheap signal; a real ping would burn
|
|
26
|
+
// tokens. Wave-2 may add a `models.list()` probe.
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
async execute(input) {
|
|
30
|
+
const userContent = [];
|
|
31
|
+
if (input.context) {
|
|
32
|
+
userContent.push({ type: "text", text: `Context:\n${input.context}` });
|
|
33
|
+
}
|
|
34
|
+
userContent.push({ type: "text", text: input.prompt });
|
|
35
|
+
const msg = await this.client.messages.create({
|
|
36
|
+
model: this.modelLabel,
|
|
37
|
+
max_tokens: input.maxTokens ?? this.defaultMaxTokens,
|
|
38
|
+
messages: [{ role: "user", content: userContent }],
|
|
39
|
+
});
|
|
40
|
+
const text = msg.content
|
|
41
|
+
.map((b) => (b.type === "text" && typeof b.text === "string" ? b.text : ""))
|
|
42
|
+
.join("");
|
|
43
|
+
return {
|
|
44
|
+
text,
|
|
45
|
+
tokensIn: msg.usage.input_tokens,
|
|
46
|
+
tokensOut: msg.usage.output_tokens,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=AnthropicCompatibleProvider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AnthropicCompatibleProvider.js","sourceRoot":"","sources":["../../src/providers/AnthropicCompatibleProvider.ts"],"names":[],"mappings":"AAAA,kEAAkE;AAClE,gEAAgE;AAChE,yEAAyE;AAEzE,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAsC1C,MAAM,eAAe,GAAG,CAAC,EAAE,OAAO,EAAE,MAAM,EAAuC,EAAiB,EAAE,CAClG,IAAI,SAAS,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,CAA6B,CAAC;AAEjE,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAEhC,MAAM,OAAO,2BAA2B;IAC7B,EAAE,GAAe,WAAW,CAAC;IAC7B,UAAU,CAAS;IAEX,MAAM,CAAgB;IACtB,gBAAgB,CAAS;IAE1C,YAAY,IAAwC;QAClD,IAAI,CAAC,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACpF,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAClF,IAAI,CAAC,IAAI,CAAC,KAAK;YAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAChF,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC;QAC7B,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,kBAAkB,CAAC;QACpE,MAAM,OAAO,GAAG,IAAI,CAAC,aAAa,IAAI,eAAe,CAAC;QACtD,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IACxE,CAAC;IAED,KAAK,CAAC,WAAW;QACf,oEAAoE;QACpE,kDAAkD;QAClD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,OAAO,CACX,KAA4B;QAE5B,MAAM,WAAW,GAA0C,EAAE,CAAC;QAC9D,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAClB,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,aAAa,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACzE,CAAC;QACD,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAEvD,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC5C,KAAK,EAAE,IAAI,CAAC,UAAU;YACtB,UAAU,EAAE,KAAK,CAAC,SAAS,IAAI,IAAI,CAAC,gBAAgB;YACpD,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC;SACnD,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO;aACrB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;aAC3E,IAAI,CAAC,EAAE,CAAC,CAAC;QAEZ,OAAO;YACL,IAAI;YACJ,QAAQ,EAAE,GAAG,CAAC,KAAK,CAAC,YAAY;YAChC,SAAS,EAAE,GAAG,CAAC,KAAK,CAAC,aAAa;SACnC,CAAC;IACJ,CAAC;CACF"}
|