@poolzin/pool-bot 2026.4.29 → 2026.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/cli/config-cli.d.ts.map +1 -1
- package/dist/cli/config-cli.js +56 -0
- package/dist/control-ui/assets/{index-1DWuMa4l.js → index-CAx-3zc2.js} +532 -545
- package/dist/control-ui/assets/index-CAx-3zc2.js.map +1 -0
- package/dist/control-ui/index.html +1 -1
- package/dist/infra/heartbeat-runner.d.ts.map +1 -1
- package/dist/infra/heartbeat-runner.js +10 -2
- package/dist/infra/skill-evolution.d.ts +57 -0
- package/dist/infra/skill-evolution.d.ts.map +1 -0
- package/dist/infra/skill-evolution.js +183 -0
- package/dist/process/command-queue.d.ts +7 -0
- package/dist/process/command-queue.d.ts.map +1 -1
- package/dist/process/command-queue.js +26 -1
- package/package.json +1 -1
- package/dist/control-ui/assets/index-1DWuMa4l.js.map +0 -1
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
<title>Poolbot Control</title>
|
|
7
7
|
<meta name="color-scheme" content="dark light" />
|
|
8
8
|
<link rel="icon" href="./favicon.ico" sizes="any" />
|
|
9
|
-
<script type="module" crossorigin src="./assets/index-
|
|
9
|
+
<script type="module" crossorigin src="./assets/index-CAx-3zc2.js"></script>
|
|
10
10
|
<link rel="stylesheet" crossorigin href="./assets/index-Cm16MN6l.css">
|
|
11
11
|
</head>
|
|
12
12
|
<body>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"heartbeat-runner.d.ts","sourceRoot":"","sources":["../../src/infra/heartbeat-runner.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AAEzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAYzD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AAK7E,OAAO,EAAkB,KAAK,UAAU,EAAE,MAAM,eAAe,CAAC;AAIhE,OAAO,EAEL,iBAAiB,EAElB,MAAM,8BAA8B,CAAC;AAItC,OAAO,EACL,KAAK,kBAAkB,EAIxB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,MAAM,MAAM,aAAa,GAAG,gBAAgB,GAC1C,oBAAoB,GAAG;IACrB,OAAO,CAAC,EAAE,UAAU,CAAC;IACrB,YAAY,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,MAAM,KAAK,MAAM,CAAC;IACzC,KAAK,CAAC,EAAE,MAAM,MAAM,CAAC;CACtB,CAAC;AAKJ,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,QAEpD;AAED,KAAK,eAAe,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;AAMxD,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAWF,OAAO,EAAE,iBAAiB,EAAE,CAAC;AAU7B,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,MAAM,IAAI,CAAC;IACjB,YAAY,EAAE,CAAC,GAAG,EAAE,aAAa,KAAK,IAAI,CAAC;CAC5C,CAAC;AAOF,wBAAgB,0BAA0B,CAAC,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAUxF;AAcD,wBAAgB,+BAA+B,CAC7C,GAAG,EAAE,aAAa,EAClB,OAAO,CAAC,EAAE,MAAM,GACf,gBAAgB,CA2ClB;AAiBD,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,aAAa,EAClB,aAAa,CAAC,EAAE,MAAM,EACtB,SAAS,CAAC,EAAE,eAAe,iBAwB5B;AAED,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,aAAa,EAAE,SAAS,CAAC,EAAE,eAAe,UAErF;AA2TD,wBAAsB,gBAAgB,CAAC,IAAI,EAAE;IAC3C,GAAG,CAAC,EAAE,aAAa,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,aAAa,CAAC;CACtB,GAAG,OAAO,CAAC,kBAAkB,CAAC,
|
|
1
|
+
{"version":3,"file":"heartbeat-runner.d.ts","sourceRoot":"","sources":["../../src/infra/heartbeat-runner.ts"],"names":[],"mappings":"AAsBA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AAEzE,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAYzD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mCAAmC,CAAC;AAK7E,OAAO,EAAkB,KAAK,UAAU,EAAE,MAAM,eAAe,CAAC;AAIhE,OAAO,EAEL,iBAAiB,EAElB,MAAM,8BAA8B,CAAC;AAItC,OAAO,EACL,KAAK,kBAAkB,EAIxB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,MAAM,MAAM,aAAa,GAAG,gBAAgB,GAC1C,oBAAoB,GAAG;IACrB,OAAO,CAAC,EAAE,UAAU,CAAC;IACrB,YAAY,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,MAAM,KAAK,MAAM,CAAC;IACzC,KAAK,CAAC,EAAE,MAAM,MAAM,CAAC;CACtB,CAAC;AAKJ,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,OAAO,QAEpD;AAED,KAAK,eAAe,GAAG,mBAAmB,CAAC,WAAW,CAAC,CAAC;AAMxD,MAAM,MAAM,gBAAgB,GAAG;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACrB,CAAC;AAWF,OAAO,EAAE,iBAAiB,EAAE,CAAC;AAU7B,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,MAAM,IAAI,CAAC;IACjB,YAAY,EAAE,CAAC,GAAG,EAAE,aAAa,KAAK,IAAI,CAAC;CAC5C,CAAC;AAOF,wBAAgB,0BAA0B,CAAC,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,CAUxF;AAcD,wBAAgB,+BAA+B,CAC7C,GAAG,EAAE,aAAa,EAClB,OAAO,CAAC,EAAE,MAAM,GACf,gBAAgB,CA2ClB;AAiBD,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,aAAa,EAClB,aAAa,CAAC,EAAE,MAAM,EACtB,SAAS,CAAC,EAAE,eAAe,iBAwB5B;AAED,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,aAAa,EAAE,SAAS,CAAC,EAAE,eAAe,UAErF;AA2TD,wBAAsB,gBAAgB,CAAC,IAAI,EAAE;IAC3C,GAAG,CAAC,EAAE,aAAa,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,eAAe,CAAC;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,aAAa,CAAC;CACtB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAuY9B;AAED,wBAAgB,oBAAoB,CAAC,IAAI,EAAE;IACzC,GAAG,CAAC,EAAE,aAAa,CAAC;IACpB,OAAO,CAAC,EAAE,UAAU,CAAC;IACrB,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,OAAO,CAAC,EAAE,OAAO,gBAAgB,CAAC;CACnC,GAAG,eAAe,CAqOlB"}
|
|
@@ -13,7 +13,7 @@ import { parseDurationMs } from "../cli/parse-duration.js";
|
|
|
13
13
|
import { loadConfig } from "../config/config.js";
|
|
14
14
|
import { canonicalizeMainSessionAlias, loadSessionStore, resolveAgentIdFromSessionKey, resolveAgentMainSessionKey, resolveSessionFilePath, resolveStorePath, saveSessionStore, updateSessionStore, } from "../config/sessions.js";
|
|
15
15
|
import { createSubsystemLogger } from "../logging/subsystem.js";
|
|
16
|
-
import { getQueueSize } from "../process/command-queue.js";
|
|
16
|
+
import { getQueueSize, pruneStuckTasks } from "../process/command-queue.js";
|
|
17
17
|
import { normalizeAgentId, toAgentStoreSessionKey } from "../routing/session-key.js";
|
|
18
18
|
import { defaultRuntime } from "../runtime.js";
|
|
19
19
|
import { escapeRegExp } from "../utils.js";
|
|
@@ -392,7 +392,15 @@ export async function runHeartbeatOnce(opts) {
|
|
|
392
392
|
}
|
|
393
393
|
const queueSize = (opts.deps?.getQueueSize ?? getQueueSize)("main" /* CommandLane.Main */);
|
|
394
394
|
if (queueSize > 0) {
|
|
395
|
-
|
|
395
|
+
// Prune stuck tasks (active > 15 min) that may permanently block the lane
|
|
396
|
+
const pruned = pruneStuckTasks();
|
|
397
|
+
if (pruned > 0) {
|
|
398
|
+
log.info(`heartbeat: pruned ${pruned} stuck task(s) from command lane`);
|
|
399
|
+
}
|
|
400
|
+
const queueSizeAfter = (opts.deps?.getQueueSize ?? getQueueSize)("main" /* CommandLane.Main */);
|
|
401
|
+
if (queueSizeAfter > 0) {
|
|
402
|
+
return { status: "skipped", reason: "requests-in-flight" };
|
|
403
|
+
}
|
|
396
404
|
}
|
|
397
405
|
// Preflight centralizes trigger classification, event inspection, and HEARTBEAT.md gating.
|
|
398
406
|
const preflight = await resolveHeartbeatPreflight({
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Self-evolving skill optimizer for PoolBot.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Hermes Agent Self-Evolution (DSPy + GEPA).
|
|
5
|
+
* Uses the agent's own LLM to propose and evaluate skill improvements.
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* 1. Load a skill file (SKILL.md or similar)
|
|
9
|
+
* 2. Generate improvement candidates via LLM
|
|
10
|
+
* 3. Evaluate candidates via LLM judge
|
|
11
|
+
* 4. Save the best candidate if it beats the baseline
|
|
12
|
+
*/
|
|
13
|
+
export type SkillArtifact = {
|
|
14
|
+
path: string;
|
|
15
|
+
name: string;
|
|
16
|
+
content: string;
|
|
17
|
+
size: number;
|
|
18
|
+
};
|
|
19
|
+
export type EvalTask = {
|
|
20
|
+
input: string;
|
|
21
|
+
expectedBehavior: string;
|
|
22
|
+
};
|
|
23
|
+
export type EvalResult = {
|
|
24
|
+
correctness: number;
|
|
25
|
+
procedureFollowing: number;
|
|
26
|
+
conciseness: number;
|
|
27
|
+
lengthPenalty: number;
|
|
28
|
+
feedback: string;
|
|
29
|
+
};
|
|
30
|
+
export type CandidateScore = {
|
|
31
|
+
variant: string;
|
|
32
|
+
score: number;
|
|
33
|
+
eval: EvalResult;
|
|
34
|
+
};
|
|
35
|
+
export type EvolutionConfig = {
|
|
36
|
+
/** Max iterations (default: 5) */
|
|
37
|
+
iterations?: number;
|
|
38
|
+
/** Max skill size in chars (default: 15000) */
|
|
39
|
+
maxSize?: number;
|
|
40
|
+
/** Dry run — don't write changes (default: false) */
|
|
41
|
+
dryRun?: boolean;
|
|
42
|
+
};
|
|
43
|
+
export type LLMCallFn = (prompt: string, systemPrompt: string) => Promise<string>;
|
|
44
|
+
export declare function loadSkill(path: string): SkillArtifact;
|
|
45
|
+
export declare function findSkill(skillName: string, skillsDir: string): string | null;
|
|
46
|
+
export declare function generateCandidates(skill: SkillArtifact, count: number, llmCall: LLMCallFn): Promise<string[]>;
|
|
47
|
+
export declare function evaluateCandidate(original: SkillArtifact, candidate: string, llmCall: LLMCallFn): Promise<EvalResult>;
|
|
48
|
+
export declare function evolveSkill(params: {
|
|
49
|
+
skillPath: string;
|
|
50
|
+
llmCall: LLMCallFn;
|
|
51
|
+
config?: EvolutionConfig;
|
|
52
|
+
}): Promise<{
|
|
53
|
+
original: SkillArtifact;
|
|
54
|
+
bestCandidate: CandidateScore | null;
|
|
55
|
+
improved: boolean;
|
|
56
|
+
}>;
|
|
57
|
+
//# sourceMappingURL=skill-evolution.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skill-evolution.d.ts","sourceRoot":"","sources":["../../src/infra/skill-evolution.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAWH,MAAM,MAAM,aAAa,GAAG;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,MAAM,QAAQ,GAAG;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,gBAAgB,EAAE,MAAM,CAAC;CAC1B,CAAC;AAEF,MAAM,MAAM,UAAU,GAAG;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,cAAc,GAAG;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,UAAU,CAAC;CAClB,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,kCAAkC;IAClC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB,CAAC;AAIF,MAAM,MAAM,SAAS,GAAG,CACtB,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,KACjB,OAAO,CAAC,MAAM,CAAC,CAAC;AAIrB,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa,CAOrD;AAED,wBAAgB,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAgB7E;AAoBD,wBAAsB,kBAAkB,CACtC,KAAK,EAAE,aAAa,EACpB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,SAAS,GACjB,OAAO,CAAC,MAAM,EAAE,CAAC,CAgBnB;AA6BD,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,aAAa,EACvB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,SAAS,GACjB,OAAO,CAAC,UAAU,CAAC,CAgCrB;AAUD,wBAAsB,WAAW,CAAC,MAAM,EAAE;IACxC,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,SAAS,CAAC;IACnB,MAAM,CAAC,EAAE,eAAe,CAAC;CAC1B,GAAG,OAAO,CAAC;IACV,QAAQ,EAAE,aAAa,CAAC;IACxB,aAAa,EAAE,cAAc,GAAG,IAAI,CAAC;IACrC,QAAQ,EAAE,OAAO,CAAC;CACnB,CAAC,CAiED"}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Self-evolving skill optimizer for PoolBot.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Hermes Agent Self-Evolution (DSPy + GEPA).
|
|
5
|
+
* Uses the agent's own LLM to propose and evaluate skill improvements.
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* 1. Load a skill file (SKILL.md or similar)
|
|
9
|
+
* 2. Generate improvement candidates via LLM
|
|
10
|
+
* 3. Evaluate candidates via LLM judge
|
|
11
|
+
* 4. Save the best candidate if it beats the baseline
|
|
12
|
+
*/
|
|
13
|
+
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
|
14
|
+
import { join } from "node:path";
|
|
15
|
+
import { createSubsystemLogger } from "../logging/subsystem.js";
|
|
16
|
+
const log = createSubsystemLogger("skill-evolution");
|
|
17
|
+
// ── Skill Loading ──────────────────────────────────────────────────────
|
|
18
|
+
export function loadSkill(path) {
|
|
19
|
+
if (!existsSync(path)) {
|
|
20
|
+
throw new Error(`Skill file not found: ${path}`);
|
|
21
|
+
}
|
|
22
|
+
const content = readFileSync(path, "utf-8");
|
|
23
|
+
const name = path.split("/").pop()?.replace(/\.(md|txt|skill)$/, "") ?? "unknown";
|
|
24
|
+
return { path, name, content, size: content.length };
|
|
25
|
+
}
|
|
26
|
+
export function findSkill(skillName, skillsDir) {
|
|
27
|
+
if (!existsSync(skillsDir))
|
|
28
|
+
return null;
|
|
29
|
+
// Try exact match with common extensions
|
|
30
|
+
for (const ext of [".md", ".txt", ".skill", ""]) {
|
|
31
|
+
const candidate = join(skillsDir, `${skillName}${ext}`);
|
|
32
|
+
if (existsSync(candidate))
|
|
33
|
+
return candidate;
|
|
34
|
+
}
|
|
35
|
+
// Try subdirectory
|
|
36
|
+
const subdir = join(skillsDir, skillName);
|
|
37
|
+
if (existsSync(subdir)) {
|
|
38
|
+
for (const ext of [".md", ".txt", ".skill", ""]) {
|
|
39
|
+
const candidate = join(subdir, `SKILL${ext}`);
|
|
40
|
+
if (existsSync(candidate))
|
|
41
|
+
return candidate;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
// ── Candidate Generation ───────────────────────────────────────────────
|
|
47
|
+
const GENERATE_VARIANTS_PROMPT = `You are an expert prompt engineer optimizing agent skill instructions.
|
|
48
|
+
|
|
49
|
+
Current skill:
|
|
50
|
+
\`\`\`
|
|
51
|
+
{currentSkill}
|
|
52
|
+
\`\`\`
|
|
53
|
+
|
|
54
|
+
Generate {count} improved variants. Each variant should:
|
|
55
|
+
1. Be clearer and more actionable
|
|
56
|
+
2. Use specific, measurable language
|
|
57
|
+
3. Include edge case handling
|
|
58
|
+
4. Be concise (no unnecessary verbosity)
|
|
59
|
+
5. Maintain the original skill's purpose
|
|
60
|
+
|
|
61
|
+
Return ONLY the variants, numbered 1 to {count}, each wrapped in \`\`\` markers.`;
|
|
62
|
+
export async function generateCandidates(skill, count, llmCall) {
|
|
63
|
+
const prompt = GENERATE_VARIANTS_PROMPT
|
|
64
|
+
.replace("{currentSkill}", skill.content)
|
|
65
|
+
.replace(/{count}/g, String(count));
|
|
66
|
+
const systemPrompt = "You are an expert prompt engineer. Generate improved skill variants. " +
|
|
67
|
+
"Return ONLY the numbered variants in code blocks. No explanations.";
|
|
68
|
+
const response = await llmCall(prompt, systemPrompt);
|
|
69
|
+
// Extract code blocks
|
|
70
|
+
const blocks = response.match(/```[\s\S]*?```/g) ?? [];
|
|
71
|
+
return blocks
|
|
72
|
+
.map((b) => b.replace(/^```[a-z]*\n?/i, "").replace(/```$/, "").trim())
|
|
73
|
+
.filter((b) => b.length > 0);
|
|
74
|
+
}
|
|
75
|
+
// ── Evaluation ─────────────────────────────────────────────────────────
|
|
76
|
+
const EVALUATE_PROMPT = `You are an expert evaluator of agent skill instructions.
|
|
77
|
+
|
|
78
|
+
Original skill:
|
|
79
|
+
\`\`\`
|
|
80
|
+
{originalSkill}
|
|
81
|
+
\`\`\`
|
|
82
|
+
|
|
83
|
+
Candidate variant:
|
|
84
|
+
\`\`\`
|
|
85
|
+
{candidateSkill}
|
|
86
|
+
\`\`\`
|
|
87
|
+
|
|
88
|
+
Evaluate the candidate on these dimensions (0.0 to 1.0 each):
|
|
89
|
+
1. **correctness**: Does it cover the same ground as the original? Would it produce correct agent behavior?
|
|
90
|
+
2. **clarity**: Is it clearer and less ambiguous than the original?
|
|
91
|
+
3. **conciseness**: Is it appropriately concise without losing important details?
|
|
92
|
+
|
|
93
|
+
Also provide a length penalty (0.0 to 1.0, where 0 = no penalty):
|
|
94
|
+
- If the candidate is significantly longer than the original without proportional improvement, apply a penalty.
|
|
95
|
+
|
|
96
|
+
Return your evaluation as JSON:
|
|
97
|
+
{"correctness": 0.85, "clarity": 0.9, "conciseness": 0.8, "lengthPenalty": 0.1, "feedback": "Specific feedback"}
|
|
98
|
+
|
|
99
|
+
Do NOT include any other text.`;
|
|
100
|
+
export async function evaluateCandidate(original, candidate, llmCall) {
|
|
101
|
+
const prompt = EVALUATE_PROMPT
|
|
102
|
+
.replace("{originalSkill}", original.content)
|
|
103
|
+
.replace("{candidateSkill}", candidate);
|
|
104
|
+
const systemPrompt = "You are an expert evaluator. Score the candidate skill variant. " +
|
|
105
|
+
"Return ONLY a JSON object with correctness, clarity, conciseness, lengthPenalty, and feedback.";
|
|
106
|
+
const response = await llmCall(prompt, systemPrompt);
|
|
107
|
+
// Extract JSON
|
|
108
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
109
|
+
if (!jsonMatch) {
|
|
110
|
+
log.warn("LLM judge did not return valid JSON, using fallback scores");
|
|
111
|
+
return {
|
|
112
|
+
correctness: 0.5,
|
|
113
|
+
procedureFollowing: 0.5,
|
|
114
|
+
conciseness: 0.5,
|
|
115
|
+
lengthPenalty: 0.2,
|
|
116
|
+
feedback: "LLM judge returned non-JSON response",
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
120
|
+
return {
|
|
121
|
+
correctness: typeof parsed.correctness === "number" ? parsed.correctness : 0.5,
|
|
122
|
+
procedureFollowing: typeof parsed.clarity === "number" ? parsed.clarity : 0.5,
|
|
123
|
+
conciseness: typeof parsed.conciseness === "number" ? parsed.conciseness : 0.5,
|
|
124
|
+
lengthPenalty: typeof parsed.lengthPenalty === "number" ? parsed.lengthPenalty : 0.0,
|
|
125
|
+
feedback: typeof parsed.feedback === "string" ? parsed.feedback : "No feedback provided",
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
function computeFitness(r) {
|
|
129
|
+
const raw = 0.5 * r.correctness + 0.3 * r.procedureFollowing + 0.2 * r.conciseness;
|
|
130
|
+
return Math.max(0, raw - r.lengthPenalty);
|
|
131
|
+
}
|
|
132
|
+
// ── Main Evolution Loop ────────────────────────────────────────────────
|
|
133
|
+
export async function evolveSkill(params) {
|
|
134
|
+
const { skillPath, llmCall, config = {} } = params;
|
|
135
|
+
const iterations = config.iterations ?? 5;
|
|
136
|
+
const maxSize = config.maxSize ?? 15_000;
|
|
137
|
+
const dryRun = config.dryRun ?? false;
|
|
138
|
+
// 1. Load skill
|
|
139
|
+
const skill = loadSkill(skillPath);
|
|
140
|
+
log.info(`Evolving skill: ${skill.name} (${skill.size} chars, ${iterations} iterations)`);
|
|
141
|
+
if (skill.size > maxSize) {
|
|
142
|
+
log.warn(`Skill exceeds max size (${skill.size} > ${maxSize}), truncating for evolution`);
|
|
143
|
+
}
|
|
144
|
+
// 2. Evaluate baseline
|
|
145
|
+
const baselineEval = await evaluateCandidate(skill, skill.content, llmCall);
|
|
146
|
+
const baselineScore = computeFitness(baselineEval);
|
|
147
|
+
log.info(`Baseline fitness: ${baselineScore.toFixed(3)}`);
|
|
148
|
+
let bestCandidate = null;
|
|
149
|
+
// 3. Generate and evaluate candidates
|
|
150
|
+
for (let i = 0; i < iterations; i++) {
|
|
151
|
+
log.info(`Iteration ${i + 1}/${iterations}`);
|
|
152
|
+
const candidates = await generateCandidates(skill, 3, llmCall);
|
|
153
|
+
log.info(`Generated ${candidates.length} candidates`);
|
|
154
|
+
for (const candidate of candidates) {
|
|
155
|
+
if (candidate.length > maxSize) {
|
|
156
|
+
log.debug(`Candidate too long (${candidate.length} chars), skipping`);
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
const evalResult = await evaluateCandidate(skill, candidate, llmCall);
|
|
160
|
+
const score = computeFitness(evalResult);
|
|
161
|
+
if (!bestCandidate || score > bestCandidate.score) {
|
|
162
|
+
bestCandidate = { variant: candidate, score, eval: evalResult };
|
|
163
|
+
log.info(`New best: fitness=${score.toFixed(3)} (${evalResult.feedback.slice(0, 80)}...)`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// 4. Check if best candidate beats baseline
|
|
168
|
+
const improved = bestCandidate !== null && bestCandidate.score > baselineScore;
|
|
169
|
+
if (improved && bestCandidate) {
|
|
170
|
+
log.info(`Improvement found! Baseline: ${baselineScore.toFixed(3)} → Best: ${bestCandidate.score.toFixed(3)} (+${((bestCandidate.score - baselineScore) * 100).toFixed(1)}%)`);
|
|
171
|
+
if (!dryRun) {
|
|
172
|
+
writeFileSync(skillPath, bestCandidate.variant, "utf-8");
|
|
173
|
+
log.info(`Skill updated: ${skillPath}`);
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
log.info(`Dry run — would update: ${skillPath}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
log.info(`No improvement over baseline (${baselineScore.toFixed(3)})`);
|
|
181
|
+
}
|
|
182
|
+
return { original: skill, bestCandidate, improved };
|
|
183
|
+
}
|
|
@@ -38,6 +38,13 @@ export declare function resetAllLanes(): void;
|
|
|
38
38
|
* (excludes queued-but-not-started entries).
|
|
39
39
|
*/
|
|
40
40
|
export declare function getActiveTaskCount(): number;
|
|
41
|
+
/**
|
|
42
|
+
* Prune stuck active tasks that have been running longer than `maxAgeMs`.
|
|
43
|
+
* This prevents permanently blocked lanes when a task promise never settles
|
|
44
|
+
* (e.g. LLM timeout that leaves the task in active state without completion).
|
|
45
|
+
* Returns the number of pruned tasks.
|
|
46
|
+
*/
|
|
47
|
+
export declare function pruneStuckTasks(maxAgeMs?: number): number;
|
|
41
48
|
/**
|
|
42
49
|
* Wait for all currently active tasks across all lanes to finish.
|
|
43
50
|
* Polls at a short interval; resolves when no tasks are active or
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"command-queue.d.ts","sourceRoot":"","sources":["../../src/process/command-queue.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,qBAAa,uBAAwB,SAAQ,KAAK;gBACpC,IAAI,CAAC,EAAE,MAAM;CAI1B;
|
|
1
|
+
{"version":3,"file":"command-queue.d.ts","sourceRoot":"","sources":["../../src/process/command-queue.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,qBAAa,uBAAwB,SAAQ,KAAK;gBACpC,IAAI,CAAC,EAAE,MAAM;CAI1B;AA+GD,wBAAgB,yBAAyB,CAAC,IAAI,EAAE,MAAM,EAAE,aAAa,EAAE,MAAM,QAK5E;AAED,wBAAgB,oBAAoB,CAAC,CAAC,EACpC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACtB,IAAI,CAAC,EAAE;IACL,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;CACxD,GACA,OAAO,CAAC,CAAC,CAAC,CAgBZ;AAED,wBAAgB,cAAc,CAAC,CAAC,EAC9B,IAAI,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACtB,IAAI,CAAC,EAAE;IACL,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,KAAK,IAAI,CAAC;CACxD,GACA,OAAO,CAAC,CAAC,CAAC,CAEZ;AAED,wBAAgB,YAAY,CAAC,IAAI,GAAE,MAAyB,UAO3D;AAED,wBAAgB,iBAAiB,WAMhC;AAED,wBAAgB,gBAAgB,CAAC,IAAI,GAAE,MAAyB,UAY/D;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,aAAa,IAAI,IAAI,CAcpC;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,IAAI,MAAM,CAM3C;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,QAAQ,SAAc,GAAG,MAAM,CAiB9D;AAED;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,CA2CnF"}
|
|
@@ -21,6 +21,7 @@ function getLaneState(lane) {
|
|
|
21
21
|
lane,
|
|
22
22
|
queue: [],
|
|
23
23
|
activeTaskIds: new Set(),
|
|
24
|
+
activeTaskStartTimes: new Map(),
|
|
24
25
|
maxConcurrent: 1,
|
|
25
26
|
draining: false,
|
|
26
27
|
generation: 0,
|
|
@@ -29,10 +30,11 @@ function getLaneState(lane) {
|
|
|
29
30
|
return created;
|
|
30
31
|
}
|
|
31
32
|
function completeTask(state, taskId, taskGeneration) {
|
|
32
|
-
if (
|
|
33
|
+
if (state.generation !== taskGeneration) {
|
|
33
34
|
return false;
|
|
34
35
|
}
|
|
35
36
|
state.activeTaskIds.delete(taskId);
|
|
37
|
+
state.activeTaskStartTimes.delete(taskId);
|
|
36
38
|
return true;
|
|
37
39
|
}
|
|
38
40
|
function drainLane(lane) {
|
|
@@ -53,6 +55,7 @@ function drainLane(lane) {
|
|
|
53
55
|
const taskId = nextTaskId++;
|
|
54
56
|
const taskGeneration = state.generation;
|
|
55
57
|
state.activeTaskIds.add(taskId);
|
|
58
|
+
state.activeTaskStartTimes.set(taskId, Date.now());
|
|
56
59
|
void (async () => {
|
|
57
60
|
const startTime = Date.now();
|
|
58
61
|
try {
|
|
@@ -175,6 +178,28 @@ export function getActiveTaskCount() {
|
|
|
175
178
|
}
|
|
176
179
|
return total;
|
|
177
180
|
}
|
|
181
|
+
/**
|
|
182
|
+
* Prune stuck active tasks that have been running longer than `maxAgeMs`.
|
|
183
|
+
* This prevents permanently blocked lanes when a task promise never settles
|
|
184
|
+
* (e.g. LLM timeout that leaves the task in active state without completion).
|
|
185
|
+
* Returns the number of pruned tasks.
|
|
186
|
+
*/
|
|
187
|
+
export function pruneStuckTasks(maxAgeMs = 15 * 60_000) {
|
|
188
|
+
const now = Date.now();
|
|
189
|
+
let pruned = 0;
|
|
190
|
+
for (const state of lanes.values()) {
|
|
191
|
+
for (const taskId of state.activeTaskIds) {
|
|
192
|
+
const startedAt = state.activeTaskStartTimes.get(taskId);
|
|
193
|
+
if (startedAt && now - startedAt > maxAgeMs) {
|
|
194
|
+
state.activeTaskIds.delete(taskId);
|
|
195
|
+
state.activeTaskStartTimes.delete(taskId);
|
|
196
|
+
pruned++;
|
|
197
|
+
diag.warn(`lane stuck task pruned: lane=${state.lane} taskId=${taskId} ageMs=${now - startedAt}`);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
return pruned;
|
|
202
|
+
}
|
|
178
203
|
/**
|
|
179
204
|
* Wait for all currently active tasks across all lanes to finish.
|
|
180
205
|
* Polls at a short interval; resolves when no tasks are active or
|