@caupulican/pi-adaptative 0.80.75 → 0.80.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/core/agent-session.d.ts +38 -1
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +83 -3
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/cost-guard.d.ts +55 -0
- package/dist/core/cost-guard.d.ts.map +1 -0
- package/dist/core/cost-guard.js +50 -0
- package/dist/core/cost-guard.js.map +1 -0
- package/dist/core/learning/reflection-engine.d.ts +7 -0
- package/dist/core/learning/reflection-engine.d.ts.map +1 -1
- package/dist/core/learning/reflection-engine.js +22 -13
- package/dist/core/learning/reflection-engine.js.map +1 -1
- package/dist/core/learning/skill-curator.d.ts +71 -0
- package/dist/core/learning/skill-curator.d.ts.map +1 -0
- package/dist/core/learning/skill-curator.js +179 -0
- package/dist/core/learning/skill-curator.js.map +1 -0
- package/dist/core/settings-manager.d.ts +10 -0
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +7 -0
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/core/slash-commands.d.ts.map +1 -1
- package/dist/core/slash-commands.js +1 -0
- package/dist/core/slash-commands.js.map +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts +7 -0
- package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
- package/dist/modes/interactive/interactive-mode.js +38 -0
- package/dist/modes/interactive/interactive-mode.js.map +1 -1
- package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
- package/examples/extensions/custom-provider-anthropic/package.json +1 -1
- package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
- package/examples/extensions/sandbox/package-lock.json +2 -2
- package/examples/extensions/sandbox/package.json +1 -1
- package/examples/extensions/with-deps/package-lock.json +2 -2
- package/examples/extensions/with-deps/package.json +1 -1
- package/npm-shrinkwrap.json +12 -12
- package/package.json +4 -4
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proactive per-turn token cost guard (Hermes-parity superiority item #34).
|
|
3
|
+
*
|
|
4
|
+
* Hermes (and pi today) only react to context growth by compressing AFTER it's expensive. This estimates
|
|
5
|
+
* the dollar cost of the NEXT LLM call BEFORE it is submitted, so the agent can warn the user or
|
|
6
|
+
* automatically reduce reasoning effort before a runaway billing spike — a proactive ceiling, not a
|
|
7
|
+
* reactive cleanup. Pure functions: no I/O, fully testable.
|
|
8
|
+
*/
|
|
9
|
+
/** Per-token USD prices (as carried on `Model.cost`, which is per-token, not per-million). */
|
|
10
|
+
export interface ModelTokenCost {
|
|
11
|
+
input: number;
|
|
12
|
+
output: number;
|
|
13
|
+
cacheRead?: number;
|
|
14
|
+
cacheWrite?: number;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Estimate the USD cost of one turn: the whole current context is billed as input, plus up to
|
|
18
|
+
* `maxOutputTokens` of output. `cachedInputTokens` (prefix-cache hits) are billed at the cheaper
|
|
19
|
+
* cache-read rate instead of the full input rate. This is an UPPER bound on the turn (it assumes the
|
|
20
|
+
* model emits its full output budget), which is what a spending ceiling should bound against.
|
|
21
|
+
*/
|
|
22
|
+
export declare function estimateTurnCostUsd(args: {
|
|
23
|
+
inputTokens: number;
|
|
24
|
+
maxOutputTokens: number;
|
|
25
|
+
cost: ModelTokenCost;
|
|
26
|
+
cachedInputTokens?: number;
|
|
27
|
+
}): number;
|
|
28
|
+
/** What to do when a turn's projected cost exceeds the threshold. */
|
|
29
|
+
export type CostGuardAction = "warn" | "downgrade";
|
|
30
|
+
export interface CostGuardSettings {
|
|
31
|
+
/** Per-turn USD ceiling. `0` (default) disables the guard entirely. */
|
|
32
|
+
maxTurnUsd: number;
|
|
33
|
+
/** Over the ceiling: `warn` (surface a notice) or `downgrade` (also reduce reasoning effort). */
|
|
34
|
+
action: CostGuardAction;
|
|
35
|
+
}
|
|
36
|
+
export declare const DEFAULT_COST_GUARD_SETTINGS: CostGuardSettings;
|
|
37
|
+
export interface CostGuardDecision {
|
|
38
|
+
/** True when the guard is enabled AND the projected cost exceeds the ceiling. */
|
|
39
|
+
over: boolean;
|
|
40
|
+
estUsd: number;
|
|
41
|
+
thresholdUsd: number;
|
|
42
|
+
action: CostGuardAction;
|
|
43
|
+
}
|
|
44
|
+
/** Decide whether the projected turn cost trips the guard. Disabled (`maxTurnUsd<=0`) is never `over`. */
|
|
45
|
+
export declare function evaluateCostGuard(estUsd: number, settings: CostGuardSettings): CostGuardDecision;
|
|
46
|
+
/** Reasoning levels in descending cost order, used to pick the next-cheaper level on a downgrade. */
|
|
47
|
+
declare const REASONING_LADDER: readonly ["xhigh", "high", "medium", "low", "minimal", "off"];
|
|
48
|
+
export type ReasoningLevel = (typeof REASONING_LADDER)[number];
|
|
49
|
+
/**
|
|
50
|
+
* One step down the reasoning ladder (cost reduction) from `current`. Returns `current` unchanged when
|
|
51
|
+
* already at the floor or unrecognized — the guard never raises effort, only lowers it.
|
|
52
|
+
*/
|
|
53
|
+
export declare function downgradeReasoning(current: string): ReasoningLevel | string;
|
|
54
|
+
export {};
|
|
55
|
+
//# sourceMappingURL=cost-guard.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cost-guard.d.ts","sourceRoot":"","sources":["../../src/core/cost-guard.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,8FAA8F;AAC9F,MAAM,WAAW,cAAc;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,IAAI,EAAE,cAAc,CAAC;IACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC3B,GAAG,MAAM,CAQT;AAED,qEAAqE;AACrE,MAAM,MAAM,eAAe,GAAG,MAAM,GAAG,WAAW,CAAC;AAEnD,MAAM,WAAW,iBAAiB;IACjC,uEAAuE;IACvE,UAAU,EAAE,MAAM,CAAC;IACnB,iGAAiG;IACjG,MAAM,EAAE,eAAe,CAAC;CACxB;AAED,eAAO,MAAM,2BAA2B,EAAE,iBAGzC,CAAC;AAEF,MAAM,WAAW,iBAAiB;IACjC,iFAAiF;IACjF,IAAI,EAAE,OAAO,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,eAAe,CAAC;CACxB;AAED,0GAA0G;AAC1G,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,GAAG,iBAAiB,CAQhG;AAED,qGAAqG;AACrG,QAAA,MAAM,gBAAgB,+DAAgE,CAAC;AACvF,MAAM,MAAM,cAAc,GAAG,CAAC,OAAO,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAC;AAE/D;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,cAAc,GAAG,MAAM,CAI3E","sourcesContent":["/**\n * Proactive per-turn token cost guard (Hermes-parity superiority item #34).\n *\n * Hermes (and pi today) only react to context growth by compressing AFTER it's expensive. This estimates\n * the dollar cost of the NEXT LLM call BEFORE it is submitted, so the agent can warn the user or\n * automatically reduce reasoning effort before a runaway billing spike — a proactive ceiling, not a\n * reactive cleanup. Pure functions: no I/O, fully testable.\n */\n\n/** Per-token USD prices (as carried on `Model.cost`, which is per-token, not per-million). */\nexport interface ModelTokenCost {\n\tinput: number;\n\toutput: number;\n\tcacheRead?: number;\n\tcacheWrite?: number;\n}\n\n/**\n * Estimate the USD cost of one turn: the whole current context is billed as input, plus up to\n * `maxOutputTokens` of output. `cachedInputTokens` (prefix-cache hits) are billed at the cheaper\n * cache-read rate instead of the full input rate. This is an UPPER bound on the turn (it assumes the\n * model emits its full output budget), which is what a spending ceiling should bound against.\n */\nexport function estimateTurnCostUsd(args: {\n\tinputTokens: number;\n\tmaxOutputTokens: number;\n\tcost: ModelTokenCost;\n\tcachedInputTokens?: number;\n}): number {\n\tconst { inputTokens, maxOutputTokens, cost } = args;\n\tconst cached = Math.max(0, Math.min(args.cachedInputTokens ?? 0, inputTokens));\n\tconst freshInput = inputTokens - cached;\n\tconst cacheReadRate = cost.cacheRead ?? cost.input;\n\tconst inputUsd = freshInput * cost.input + cached * cacheReadRate;\n\tconst outputUsd = Math.max(0, maxOutputTokens) * cost.output;\n\treturn inputUsd + outputUsd;\n}\n\n/** What to do when a turn's projected cost exceeds the threshold. */\nexport type CostGuardAction = \"warn\" | \"downgrade\";\n\nexport interface CostGuardSettings {\n\t/** Per-turn USD ceiling. `0` (default) disables the guard entirely. */\n\tmaxTurnUsd: number;\n\t/** Over the ceiling: `warn` (surface a notice) or `downgrade` (also reduce reasoning effort). */\n\taction: CostGuardAction;\n}\n\nexport const DEFAULT_COST_GUARD_SETTINGS: CostGuardSettings = {\n\tmaxTurnUsd: 0,\n\taction: \"warn\",\n};\n\nexport interface CostGuardDecision {\n\t/** True when the guard is enabled AND the projected cost exceeds the ceiling. */\n\tover: boolean;\n\testUsd: number;\n\tthresholdUsd: number;\n\taction: CostGuardAction;\n}\n\n/** Decide whether the projected turn cost trips the guard. Disabled (`maxTurnUsd<=0`) is never `over`. */\nexport function evaluateCostGuard(estUsd: number, settings: CostGuardSettings): CostGuardDecision {\n\tconst enabled = settings.maxTurnUsd > 0;\n\treturn {\n\t\tover: enabled && estUsd > settings.maxTurnUsd,\n\t\testUsd,\n\t\tthresholdUsd: settings.maxTurnUsd,\n\t\taction: settings.action,\n\t};\n}\n\n/** Reasoning levels in descending cost order, used to pick the next-cheaper level on a downgrade. */\nconst REASONING_LADDER = [\"xhigh\", \"high\", \"medium\", \"low\", \"minimal\", \"off\"] as const;\nexport type ReasoningLevel = (typeof REASONING_LADDER)[number];\n\n/**\n * One step down the reasoning ladder (cost reduction) from `current`. Returns `current` unchanged when\n * already at the floor or unrecognized — the guard never raises effort, only lowers it.\n */\nexport function downgradeReasoning(current: string): ReasoningLevel | string {\n\tconst i = REASONING_LADDER.indexOf(current as ReasoningLevel);\n\tif (i < 0) return current;\n\treturn REASONING_LADDER[Math.min(i + 1, REASONING_LADDER.length - 1)];\n}\n"]}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proactive per-turn token cost guard (Hermes-parity superiority item #34).
|
|
3
|
+
*
|
|
4
|
+
* Hermes (and pi today) only react to context growth by compressing AFTER it's expensive. This estimates
|
|
5
|
+
* the dollar cost of the NEXT LLM call BEFORE it is submitted, so the agent can warn the user or
|
|
6
|
+
* automatically reduce reasoning effort before a runaway billing spike — a proactive ceiling, not a
|
|
7
|
+
* reactive cleanup. Pure functions: no I/O, fully testable.
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* Estimate the USD cost of one turn: the whole current context is billed as input, plus up to
|
|
11
|
+
* `maxOutputTokens` of output. `cachedInputTokens` (prefix-cache hits) are billed at the cheaper
|
|
12
|
+
* cache-read rate instead of the full input rate. This is an UPPER bound on the turn (it assumes the
|
|
13
|
+
* model emits its full output budget), which is what a spending ceiling should bound against.
|
|
14
|
+
*/
|
|
15
|
+
export function estimateTurnCostUsd(args) {
|
|
16
|
+
const { inputTokens, maxOutputTokens, cost } = args;
|
|
17
|
+
const cached = Math.max(0, Math.min(args.cachedInputTokens ?? 0, inputTokens));
|
|
18
|
+
const freshInput = inputTokens - cached;
|
|
19
|
+
const cacheReadRate = cost.cacheRead ?? cost.input;
|
|
20
|
+
const inputUsd = freshInput * cost.input + cached * cacheReadRate;
|
|
21
|
+
const outputUsd = Math.max(0, maxOutputTokens) * cost.output;
|
|
22
|
+
return inputUsd + outputUsd;
|
|
23
|
+
}
|
|
24
|
+
export const DEFAULT_COST_GUARD_SETTINGS = {
|
|
25
|
+
maxTurnUsd: 0,
|
|
26
|
+
action: "warn",
|
|
27
|
+
};
|
|
28
|
+
/** Decide whether the projected turn cost trips the guard. Disabled (`maxTurnUsd<=0`) is never `over`. */
|
|
29
|
+
export function evaluateCostGuard(estUsd, settings) {
|
|
30
|
+
const enabled = settings.maxTurnUsd > 0;
|
|
31
|
+
return {
|
|
32
|
+
over: enabled && estUsd > settings.maxTurnUsd,
|
|
33
|
+
estUsd,
|
|
34
|
+
thresholdUsd: settings.maxTurnUsd,
|
|
35
|
+
action: settings.action,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
/** Reasoning levels in descending cost order, used to pick the next-cheaper level on a downgrade. */
|
|
39
|
+
const REASONING_LADDER = ["xhigh", "high", "medium", "low", "minimal", "off"];
|
|
40
|
+
/**
|
|
41
|
+
* One step down the reasoning ladder (cost reduction) from `current`. Returns `current` unchanged when
|
|
42
|
+
* already at the floor or unrecognized — the guard never raises effort, only lowers it.
|
|
43
|
+
*/
|
|
44
|
+
export function downgradeReasoning(current) {
|
|
45
|
+
const i = REASONING_LADDER.indexOf(current);
|
|
46
|
+
if (i < 0)
|
|
47
|
+
return current;
|
|
48
|
+
return REASONING_LADDER[Math.min(i + 1, REASONING_LADDER.length - 1)];
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=cost-guard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cost-guard.js","sourceRoot":"","sources":["../../src/core/cost-guard.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAUH;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAKnC,EAAU;IACV,MAAM,EAAE,WAAW,EAAE,eAAe,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;IACpD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,iBAAiB,IAAI,CAAC,EAAE,WAAW,CAAC,CAAC,CAAC;IAC/E,MAAM,UAAU,GAAG,WAAW,GAAG,MAAM,CAAC;IACxC,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC;IACnD,MAAM,QAAQ,GAAG,UAAU,GAAG,IAAI,CAAC,KAAK,GAAG,MAAM,GAAG,aAAa,CAAC;IAClE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,eAAe,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;IAC7D,OAAO,QAAQ,GAAG,SAAS,CAAC;AAAA,CAC5B;AAYD,MAAM,CAAC,MAAM,2BAA2B,GAAsB;IAC7D,UAAU,EAAE,CAAC;IACb,MAAM,EAAE,MAAM;CACd,CAAC;AAUF,0GAA0G;AAC1G,MAAM,UAAU,iBAAiB,CAAC,MAAc,EAAE,QAA2B,EAAqB;IACjG,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,GAAG,CAAC,CAAC;IACxC,OAAO;QACN,IAAI,EAAE,OAAO,IAAI,MAAM,GAAG,QAAQ,CAAC,UAAU;QAC7C,MAAM;QACN,YAAY,EAAE,QAAQ,CAAC,UAAU;QACjC,MAAM,EAAE,QAAQ,CAAC,MAAM;KACvB,CAAC;AAAA,CACF;AAED,qGAAqG;AACrG,MAAM,gBAAgB,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,CAAU,CAAC;AAGvF;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAe,EAA2B;IAC5E,MAAM,CAAC,GAAG,gBAAgB,CAAC,OAAO,CAAC,OAAyB,CAAC,CAAC;IAC9D,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,OAAO,CAAC;IAC1B,OAAO,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;AAAA,CACtE","sourcesContent":["/**\n * Proactive per-turn token cost guard (Hermes-parity superiority item #34).\n *\n * Hermes (and pi today) only react to context growth by compressing AFTER it's expensive. This estimates\n * the dollar cost of the NEXT LLM call BEFORE it is submitted, so the agent can warn the user or\n * automatically reduce reasoning effort before a runaway billing spike — a proactive ceiling, not a\n * reactive cleanup. Pure functions: no I/O, fully testable.\n */\n\n/** Per-token USD prices (as carried on `Model.cost`, which is per-token, not per-million). */\nexport interface ModelTokenCost {\n\tinput: number;\n\toutput: number;\n\tcacheRead?: number;\n\tcacheWrite?: number;\n}\n\n/**\n * Estimate the USD cost of one turn: the whole current context is billed as input, plus up to\n * `maxOutputTokens` of output. `cachedInputTokens` (prefix-cache hits) are billed at the cheaper\n * cache-read rate instead of the full input rate. This is an UPPER bound on the turn (it assumes the\n * model emits its full output budget), which is what a spending ceiling should bound against.\n */\nexport function estimateTurnCostUsd(args: {\n\tinputTokens: number;\n\tmaxOutputTokens: number;\n\tcost: ModelTokenCost;\n\tcachedInputTokens?: number;\n}): number {\n\tconst { inputTokens, maxOutputTokens, cost } = args;\n\tconst cached = Math.max(0, Math.min(args.cachedInputTokens ?? 0, inputTokens));\n\tconst freshInput = inputTokens - cached;\n\tconst cacheReadRate = cost.cacheRead ?? cost.input;\n\tconst inputUsd = freshInput * cost.input + cached * cacheReadRate;\n\tconst outputUsd = Math.max(0, maxOutputTokens) * cost.output;\n\treturn inputUsd + outputUsd;\n}\n\n/** What to do when a turn's projected cost exceeds the threshold. */\nexport type CostGuardAction = \"warn\" | \"downgrade\";\n\nexport interface CostGuardSettings {\n\t/** Per-turn USD ceiling. `0` (default) disables the guard entirely. */\n\tmaxTurnUsd: number;\n\t/** Over the ceiling: `warn` (surface a notice) or `downgrade` (also reduce reasoning effort). */\n\taction: CostGuardAction;\n}\n\nexport const DEFAULT_COST_GUARD_SETTINGS: CostGuardSettings = {\n\tmaxTurnUsd: 0,\n\taction: \"warn\",\n};\n\nexport interface CostGuardDecision {\n\t/** True when the guard is enabled AND the projected cost exceeds the ceiling. */\n\tover: boolean;\n\testUsd: number;\n\tthresholdUsd: number;\n\taction: CostGuardAction;\n}\n\n/** Decide whether the projected turn cost trips the guard. Disabled (`maxTurnUsd<=0`) is never `over`. */\nexport function evaluateCostGuard(estUsd: number, settings: CostGuardSettings): CostGuardDecision {\n\tconst enabled = settings.maxTurnUsd > 0;\n\treturn {\n\t\tover: enabled && estUsd > settings.maxTurnUsd,\n\t\testUsd,\n\t\tthresholdUsd: settings.maxTurnUsd,\n\t\taction: settings.action,\n\t};\n}\n\n/** Reasoning levels in descending cost order, used to pick the next-cheaper level on a downgrade. */\nconst REASONING_LADDER = [\"xhigh\", \"high\", \"medium\", \"low\", \"minimal\", \"off\"] as const;\nexport type ReasoningLevel = (typeof REASONING_LADDER)[number];\n\n/**\n * One step down the reasoning ladder (cost reduction) from `current`. Returns `current` unchanged when\n * already at the floor or unrecognized — the guard never raises effort, only lowers it.\n */\nexport function downgradeReasoning(current: string): ReasoningLevel | string {\n\tconst i = REASONING_LADDER.indexOf(current as ReasoningLevel);\n\tif (i < 0) return current;\n\treturn REASONING_LADDER[Math.min(i + 1, REASONING_LADDER.length - 1)];\n}\n"]}
|
|
@@ -51,6 +51,13 @@ export interface ReflectionResult {
|
|
|
51
51
|
usage: Usage;
|
|
52
52
|
rationale: string;
|
|
53
53
|
}
|
|
54
|
+
/**
|
|
55
|
+
* STATIC reflection system prompt (Hermes-parity #33). It is byte-identical across every reflection
|
|
56
|
+
* pass — the variable parts (existing memory snapshot + the turn transcript) live in the USER prompt —
|
|
57
|
+
* so the provider prompt-cache reuses this prefix instead of re-billing it each pass (cost guard).
|
|
58
|
+
* Do NOT interpolate per-call data into this constant or caching breaks.
|
|
59
|
+
*/
|
|
60
|
+
export declare const REFLECTION_SYSTEM_PROMPT = "You are a reflection engine. Your job is to analyze the recent conversation turn, compare it against the agent's existing memory, and decide if any memory updates are needed.\n\nMemory guidelines:\n- \"MEMORY\" is for project facts, configuration, repeatable workflows, and coding findings.\n- \"USER\" is for user preferences, patterns, and style specifications.\n- Avoid duplicate facts. If the fact is already represented, do not add it.\n- CONFRONT existing memory: if the new turn contradicts or updates an existing fact, use \"memory_replace\" or \"memory_remove\" to supersede the old fact rather than blindly appending.\n- Keep memories short, factual, and direct. No fluff.\n- Do NOT capture transient/environment-specific noise: tool/network failures, one-off errors, or a single narrative event. Persist only durable facts and preferences.\n- PROMOTE to behavior: if the turn established a REPEATABLE, multi-step PROCEDURE/workflow (not a one-off fact) that should govern a future class of tasks, emit a \"promote_skill\" instead of (or in addition to) a memory fact. Only promote a genuinely reusable procedure \u2014 never a single fact, a one-off narrative, or environment-specific noise. Prefer a memory fact when unsure.\n\nYou must output your analysis and writes in the following JSON format inside a ```json``` code fence:\n{\n \"rationale\": \"Explanation of your reasoning\",\n \"writes\": [\n { \"kind\": \"memory_add\", \"section\": \"MEMORY\" | \"USER\", \"text\": \"New direct fact to append\" },\n { \"kind\": \"memory_replace\", \"target\": \"Exact text substring to replace\", \"text\": \"New replacement text\" },\n { \"kind\": \"memory_remove\", \"target\": \"Exact text substring to remove\" },\n { \"kind\": \"promote_skill\", \"name\": \"kebab-case-skill-name\", \"description\": \"one line of when to use it\", \"body\": \"Markdown: the step-by-step procedure\" }\n ]\n}\n";
|
|
54
61
|
export declare class ReflectionEngine {
|
|
55
62
|
/**
|
|
56
63
|
* Build the reflection prompt, call the injected isolated complete(),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reflection-engine.d.ts","sourceRoot":"","sources":["../../../src/core/learning/reflection-engine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAE/C,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,OAAO,GAAG,MAAM,CAAC;AAE3E,MAAM,WAAW,wBAAwB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,KAAK,CAAC;IACb,UAAU,EAAE,UAAU,CAAC;CACvB;AAED,MAAM,MAAM,iBAAiB,GAAG,SAAS,GAAG,YAAY,GAAG,aAAa,GAAG,MAAM,CAAC;AAElF,MAAM,WAAW,aAAa;IAC7B,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,OAAO,CAAC;IACvB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,UAAU;IAC1B,GAAG,EAAE,MAAM,GAAG,SAAS,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa,GAAG,UAAU,CAyB/D;AAED,MAAM,WAAW,eAAe;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,IAAI,EAAE,UAAU,CAAC;IAEjB,QAAQ,EAAE,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;CAC1F;AAED,MAAM,MAAM,eAAe,GACxB;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,OAAO,EAAE,QAAQ,GAAG,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GACxD;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAEzC;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAE9E,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,KAAK,EAAE,KAAK,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,qBAAa,gBAAgB;IAC5B;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,gBAAgB,CAAC,
|
|
1
|
+
{"version":3,"file":"reflection-engine.d.ts","sourceRoot":"","sources":["../../../src/core/learning/reflection-engine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAE/C,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,OAAO,GAAG,MAAM,CAAC;AAE3E,MAAM,WAAW,wBAAwB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,KAAK,CAAC;IACb,UAAU,EAAE,UAAU,CAAC;CACvB;AAED,MAAM,MAAM,iBAAiB,GAAG,SAAS,GAAG,YAAY,GAAG,aAAa,GAAG,MAAM,CAAC;AAElF,MAAM,WAAW,aAAa;IAC7B,OAAO,EAAE,iBAAiB,CAAC;IAC3B,aAAa,EAAE,MAAM,CAAC;IACtB,aAAa,EAAE,OAAO,CAAC;IACvB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,UAAU;IAC1B,GAAG,EAAE,MAAM,GAAG,SAAS,CAAC;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,aAAa,GAAG,UAAU,CAyB/D;AAED,MAAM,WAAW,eAAe;IAC/B,cAAc,EAAE,MAAM,CAAC;IACvB,cAAc,EAAE,MAAM,CAAC;IACvB,IAAI,EAAE,UAAU,CAAC;IAEjB,QAAQ,EAAE,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,KAAK,OAAO,CAAC,wBAAwB,CAAC,CAAC;CAC1F;AAED,MAAM,MAAM,eAAe,GACxB;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,OAAO,EAAE,QAAQ,GAAG,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GAChE;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,GACxD;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAEzC;IAAE,IAAI,EAAE,eAAe,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAE9E,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,KAAK,EAAE,KAAK,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CAClB;AAED;;;;;GAKG;AACH,eAAO,MAAM,wBAAwB,s4DAqBpC,CAAC;AAEF,qBAAa,gBAAgB;IAC5B;;;;OAIG;IACG,OAAO,CAAC,KAAK,EAAE,eAAe,GAAG,OAAO,CAAC,gBAAgB,CAAC,CA+E/D;CACD","sourcesContent":["import type { Usage } from \"@caupulican/pi-ai\";\n\nexport type StopReason = \"stop\" | \"toolUse\" | \"aborted\" | \"error\" | string;\n\nexport interface IsolatedCompletionResult {\n\ttext: string;\n\tusage: Usage;\n\tstopReason: StopReason;\n}\n\nexport type ReflectionTrigger = \"complex\" | \"corrective\" | \"session-end\" | \"none\";\n\nexport interface DemandSignals {\n\ttrigger: ReflectionTrigger;\n\ttoolCallCount: number;\n\thadCorrection: boolean;\n\tcontextHeadroomPct: number; // 0..100\n\tusefulLately: number; // 0..1 rolling score\n}\n\nexport interface DemandPlan {\n\tact: \"skip\" | \"reflect\";\n\treason: string;\n\ttokenBudget: number;\n}\n\n/**\n * Pure zero-I/O heuristic to decide whether the current turn justifies a reflection run\n * and determine the token budget under the cheap-tool net-negative doctrine.\n */\nexport function decideDemand(signals: DemandSignals): DemandPlan {\n\tif (signals.trigger === \"none\") {\n\t\treturn { act: \"skip\", reason: \"No trigger detected\", tokenBudget: 0 };\n\t}\n\tif (signals.contextHeadroomPct < 10) {\n\t\treturn { act: \"skip\", reason: \"Context headroom is critically low (< 10%)\", tokenBudget: 0 };\n\t}\n\n\t// Dynamic token budget based on headroom (keep reflection bounded between 500 and 1500 tokens)\n\tconst baseBudget = 1000;\n\tconst tokenBudget = Math.max(500, Math.min(1500, Math.round(baseBudget * (signals.contextHeadroomPct / 100))));\n\n\tif (signals.hadCorrection) {\n\t\treturn { act: \"reflect\", reason: \"Correction detected in the turn\", tokenBudget };\n\t}\n\tif (signals.trigger === \"session-end\") {\n\t\treturn { act: \"reflect\", reason: \"Session end reflection triggered\", tokenBudget };\n\t}\n\tif (signals.trigger === \"complex\") {\n\t\tif (signals.toolCallCount >= 3) {\n\t\t\treturn { act: \"reflect\", reason: `Complex turn with ${signals.toolCallCount} tool calls`, tokenBudget };\n\t\t}\n\t}\n\n\treturn { act: \"skip\", reason: \"Signals do not justify reflection overhead\", tokenBudget: 0 };\n}\n\nexport interface ReflectionInput {\n\trecentTurnText: string; // host serializes the just-finished turn\n\texistingMemory: string; // current MEMORY.md + USER.md snapshot\n\tplan: DemandPlan;\n\t// host-injected isolated completion function:\n\tcomplete: (systemPrompt: string, userPrompt: string) => Promise<IsolatedCompletionResult>;\n}\n\nexport type ReflectionWrite =\n\t| { kind: \"memory_add\"; section: \"MEMORY\" | \"USER\"; text: string }\n\t| { kind: \"memory_replace\"; target: string; text: string }\n\t| { kind: \"memory_remove\"; target: string }\n\t// R7 memory-to-behavior: promote a recurring procedural workflow into an executable skill.\n\t| { kind: \"promote_skill\"; name: string; description: string; body: string };\n\nexport interface ReflectionResult {\n\twrites: ReflectionWrite[];\n\tusage: Usage;\n\trationale: string;\n}\n\n/**\n * STATIC reflection system prompt (Hermes-parity #33). It is byte-identical across every reflection\n * pass — the variable parts (existing memory snapshot + the turn transcript) live in the USER prompt —\n * so the provider prompt-cache reuses this prefix instead of re-billing it each pass (cost guard).\n * Do NOT interpolate per-call data into this constant or caching breaks.\n */\nexport const REFLECTION_SYSTEM_PROMPT = `You are a reflection engine. Your job is to analyze the recent conversation turn, compare it against the agent's existing memory, and decide if any memory updates are needed.\n\nMemory guidelines:\n- \"MEMORY\" is for project facts, configuration, repeatable workflows, and coding findings.\n- \"USER\" is for user preferences, patterns, and style specifications.\n- Avoid duplicate facts. If the fact is already represented, do not add it.\n- CONFRONT existing memory: if the new turn contradicts or updates an existing fact, use \"memory_replace\" or \"memory_remove\" to supersede the old fact rather than blindly appending.\n- Keep memories short, factual, and direct. No fluff.\n- Do NOT capture transient/environment-specific noise: tool/network failures, one-off errors, or a single narrative event. Persist only durable facts and preferences.\n- PROMOTE to behavior: if the turn established a REPEATABLE, multi-step PROCEDURE/workflow (not a one-off fact) that should govern a future class of tasks, emit a \"promote_skill\" instead of (or in addition to) a memory fact. Only promote a genuinely reusable procedure — never a single fact, a one-off narrative, or environment-specific noise. Prefer a memory fact when unsure.\n\nYou must output your analysis and writes in the following JSON format inside a \\`\\`\\`json\\`\\`\\` code fence:\n{\n \"rationale\": \"Explanation of your reasoning\",\n \"writes\": [\n { \"kind\": \"memory_add\", \"section\": \"MEMORY\" | \"USER\", \"text\": \"New direct fact to append\" },\n { \"kind\": \"memory_replace\", \"target\": \"Exact text substring to replace\", \"text\": \"New replacement text\" },\n { \"kind\": \"memory_remove\", \"target\": \"Exact text substring to remove\" },\n { \"kind\": \"promote_skill\", \"name\": \"kebab-case-skill-name\", \"description\": \"one line of when to use it\", \"body\": \"Markdown: the step-by-step procedure\" }\n ]\n}\n`;\n\nexport class ReflectionEngine {\n\t/**\n\t * Build the reflection prompt, call the injected isolated complete(),\n\t * parse the response, confront existing memory, and return memory writes.\n\t * Zero direct I/O.\n\t */\n\tasync reflect(input: ReflectionInput): Promise<ReflectionResult> {\n\t\tconst systemPrompt = REFLECTION_SYSTEM_PROMPT;\n\n\t\t// Variable inputs go in the USER prompt so the system prefix above stays cache-stable (#33).\n\t\tconst userPrompt = `Existing Memory snapshot:\n${input.existingMemory}\n\nRecent turn transcript:\n${input.recentTurnText}\n\nAnalyze this turn against the existing memory and output your memory updates.`;\n\n\t\ttry {\n\t\t\tconst compResult = await input.complete(systemPrompt, userPrompt);\n\t\t\tconst text = compResult.text;\n\n\t\t\tconst jsonMatch = text.match(/```json\\s*([\\s\\S]*?)\\s*```/) || text.match(/{[\\s\\S]*}/);\n\t\t\tif (!jsonMatch) {\n\t\t\t\treturn {\n\t\t\t\t\twrites: [],\n\t\t\t\t\tusage: compResult.usage,\n\t\t\t\t\trationale: `Failed to locate JSON response. Raw text:\\n${text}`,\n\t\t\t\t};\n\t\t\t}\n\n\t\t\tconst parsed = JSON.parse(jsonMatch[1] || jsonMatch[0]);\n\t\t\tconst rationale = parsed.rationale || \"\";\n\t\t\tconst writes: ReflectionWrite[] = [];\n\n\t\t\tif (Array.isArray(parsed.writes)) {\n\t\t\t\tfor (const w of parsed.writes) {\n\t\t\t\t\tif (w && typeof w === \"object\") {\n\t\t\t\t\t\tif (\n\t\t\t\t\t\t\tw.kind === \"memory_add\" &&\n\t\t\t\t\t\t\t(w.section === \"MEMORY\" || w.section === \"USER\") &&\n\t\t\t\t\t\t\ttypeof w.text === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_add\", section: w.section, text: w.text });\n\t\t\t\t\t\t} else if (\n\t\t\t\t\t\t\tw.kind === \"memory_replace\" &&\n\t\t\t\t\t\t\ttypeof w.target === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.text === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_replace\", target: w.target, text: w.text });\n\t\t\t\t\t\t} else if (w.kind === \"memory_remove\" && typeof w.target === \"string\") {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_remove\", target: w.target });\n\t\t\t\t\t\t} else if (\n\t\t\t\t\t\t\tw.kind === \"promote_skill\" &&\n\t\t\t\t\t\t\ttypeof w.name === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.description === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.body === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"promote_skill\", name: w.name, description: w.description, body: w.body });\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\treturn {\n\t\t\t\twrites,\n\t\t\t\tusage: compResult.usage,\n\t\t\t\trationale,\n\t\t\t};\n\t\t} catch (err) {\n\t\t\t// Zeroed/fallback usage representation\n\t\t\tconst emptyUsage: Usage = {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t};\n\t\t\treturn {\n\t\t\t\twrites: [],\n\t\t\t\tusage: emptyUsage,\n\t\t\t\trationale: `Error during reflection: ${String(err)}`,\n\t\t\t};\n\t\t}\n\t}\n}\n"]}
|
|
@@ -25,17 +25,13 @@ export function decideDemand(signals) {
|
|
|
25
25
|
}
|
|
26
26
|
return { act: "skip", reason: "Signals do not justify reflection overhead", tokenBudget: 0 };
|
|
27
27
|
}
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
const systemPrompt = `You are a reflection engine. Your job is to analyze the recent conversation turn, compare it against the agent's existing memory, and decide if any memory updates are needed.
|
|
36
|
-
|
|
37
|
-
Existing Memory snapshot:
|
|
38
|
-
${input.existingMemory}
|
|
28
|
+
/**
|
|
29
|
+
* STATIC reflection system prompt (Hermes-parity #33). It is byte-identical across every reflection
|
|
30
|
+
* pass — the variable parts (existing memory snapshot + the turn transcript) live in the USER prompt —
|
|
31
|
+
* so the provider prompt-cache reuses this prefix instead of re-billing it each pass (cost guard).
|
|
32
|
+
* Do NOT interpolate per-call data into this constant or caching breaks.
|
|
33
|
+
*/
|
|
34
|
+
export const REFLECTION_SYSTEM_PROMPT = `You are a reflection engine. Your job is to analyze the recent conversation turn, compare it against the agent's existing memory, and decide if any memory updates are needed.
|
|
39
35
|
|
|
40
36
|
Memory guidelines:
|
|
41
37
|
- "MEMORY" is for project facts, configuration, repeatable workflows, and coding findings.
|
|
@@ -43,6 +39,7 @@ Memory guidelines:
|
|
|
43
39
|
- Avoid duplicate facts. If the fact is already represented, do not add it.
|
|
44
40
|
- CONFRONT existing memory: if the new turn contradicts or updates an existing fact, use "memory_replace" or "memory_remove" to supersede the old fact rather than blindly appending.
|
|
45
41
|
- Keep memories short, factual, and direct. No fluff.
|
|
42
|
+
- Do NOT capture transient/environment-specific noise: tool/network failures, one-off errors, or a single narrative event. Persist only durable facts and preferences.
|
|
46
43
|
- PROMOTE to behavior: if the turn established a REPEATABLE, multi-step PROCEDURE/workflow (not a one-off fact) that should govern a future class of tasks, emit a "promote_skill" instead of (or in addition to) a memory fact. Only promote a genuinely reusable procedure — never a single fact, a one-off narrative, or environment-specific noise. Prefer a memory fact when unsure.
|
|
47
44
|
|
|
48
45
|
You must output your analysis and writes in the following JSON format inside a \`\`\`json\`\`\` code fence:
|
|
@@ -56,10 +53,22 @@ You must output your analysis and writes in the following JSON format inside a \
|
|
|
56
53
|
]
|
|
57
54
|
}
|
|
58
55
|
`;
|
|
59
|
-
|
|
56
|
+
export class ReflectionEngine {
|
|
57
|
+
/**
|
|
58
|
+
* Build the reflection prompt, call the injected isolated complete(),
|
|
59
|
+
* parse the response, confront existing memory, and return memory writes.
|
|
60
|
+
* Zero direct I/O.
|
|
61
|
+
*/
|
|
62
|
+
async reflect(input) {
|
|
63
|
+
const systemPrompt = REFLECTION_SYSTEM_PROMPT;
|
|
64
|
+
// Variable inputs go in the USER prompt so the system prefix above stays cache-stable (#33).
|
|
65
|
+
const userPrompt = `Existing Memory snapshot:
|
|
66
|
+
${input.existingMemory}
|
|
67
|
+
|
|
68
|
+
Recent turn transcript:
|
|
60
69
|
${input.recentTurnText}
|
|
61
70
|
|
|
62
|
-
Analyze this turn and output your memory updates.`;
|
|
71
|
+
Analyze this turn against the existing memory and output your memory updates.`;
|
|
63
72
|
try {
|
|
64
73
|
const compResult = await input.complete(systemPrompt, userPrompt);
|
|
65
74
|
const text = compResult.text;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reflection-engine.js","sourceRoot":"","sources":["../../../src/core/learning/reflection-engine.ts"],"names":[],"mappings":"AA0BA;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,OAAsB,EAAc;IAChE,IAAI,OAAO,CAAC,OAAO,KAAK,MAAM,EAAE,CAAC;QAChC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,qBAAqB,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;IACvE,CAAC;IACD,IAAI,OAAO,CAAC,kBAAkB,GAAG,EAAE,EAAE,CAAC;QACrC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,4CAA4C,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;IAC9F,CAAC;IAED,+FAA+F;IAC/F,MAAM,UAAU,GAAG,IAAI,CAAC;IACxB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,CAAC,kBAAkB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/G,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;QAC3B,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,iCAAiC,EAAE,WAAW,EAAE,CAAC;IACnF,CAAC;IACD,IAAI,OAAO,CAAC,OAAO,KAAK,aAAa,EAAE,CAAC;QACvC,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kCAAkC,EAAE,WAAW,EAAE,CAAC;IACpF,CAAC;IACD,IAAI,OAAO,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,aAAa,IAAI,CAAC,EAAE,CAAC;YAChC,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,qBAAqB,OAAO,CAAC,aAAa,aAAa,EAAE,WAAW,EAAE,CAAC;QACzG,CAAC;IACF,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,4CAA4C,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;AAAA,CAC7F;AAuBD,MAAM,OAAO,gBAAgB;IAC5B;;;;OAIG;IACH,KAAK,CAAC,OAAO,CAAC,KAAsB,EAA6B;QAChE,MAAM,YAAY,GAAG;;;EAGrB,KAAK,CAAC,cAAc;;;;;;;;;;;;;;;;;;;;CAoBrB,CAAC;QAEA,MAAM,UAAU,GAAG;EACnB,KAAK,CAAC,cAAc;;kDAE4B,CAAC;QAEjD,IAAI,CAAC;YACJ,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;YAClE,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC;YAE7B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,4BAA4B,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACtF,IAAI,CAAC,SAAS,EAAE,CAAC;gBAChB,OAAO;oBACN,MAAM,EAAE,EAAE;oBACV,KAAK,EAAE,UAAU,CAAC,KAAK;oBACvB,SAAS,EAAE,8CAA8C,IAAI,EAAE;iBAC/D,CAAC;YACH,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;YACxD,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;YACzC,MAAM,MAAM,GAAsB,EAAE,CAAC;YAErC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;gBAClC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;oBAC/B,IAAI,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;wBAChC,IACC,CAAC,CAAC,IAAI,KAAK,YAAY;4BACvB,CAAC,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC;4BAChD,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,EACzB,CAAC;4BACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBACvE,CAAC;6BAAM,IACN,CAAC,CAAC,IAAI,KAAK,gBAAgB;4BAC3B,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ;4BAC5B,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,EACzB,CAAC;4BACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBACzE,CAAC;6BAAM,IAAI,CAAC,CAAC,IAAI,KAAK,eAAe,IAAI,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;4BACvE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;wBAC1D,CAAC;6BAAM,IACN,CAAC,CAAC,IAAI,KAAK,eAAe;4BAC1B,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ;4BAC1B,OAAO,CAAC,CAAC,WAAW,KAAK,QAAQ;4BACjC,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,EACzB,CAAC;4BACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBAChG,CAAC;oBACF,CAAC;gBACF,CAAC;YACF,CAAC;YAED,OAAO;gBACN,MAAM;gBACN,KAAK,EAAE,UAAU,CAAC,KAAK;gBACvB,SAAS;aACT,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,uCAAuC;YACvC,MAAM,UAAU,GAAU;gBACzB,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;gBACT,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;aACpE,CAAC;YACF,OAAO;gBACN,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,UAAU;gBACjB,SAAS,EAAE,4BAA4B,MAAM,CAAC,GAAG,CAAC,EAAE;aACpD,CAAC;QACH,CAAC;IAAA,CACD;CACD","sourcesContent":["import type { Usage } from \"@caupulican/pi-ai\";\n\nexport type StopReason = \"stop\" | \"toolUse\" | \"aborted\" | \"error\" | string;\n\nexport interface IsolatedCompletionResult {\n\ttext: string;\n\tusage: Usage;\n\tstopReason: StopReason;\n}\n\nexport type ReflectionTrigger = \"complex\" | \"corrective\" | \"session-end\" | \"none\";\n\nexport interface DemandSignals {\n\ttrigger: ReflectionTrigger;\n\ttoolCallCount: number;\n\thadCorrection: boolean;\n\tcontextHeadroomPct: number; // 0..100\n\tusefulLately: number; // 0..1 rolling score\n}\n\nexport interface DemandPlan {\n\tact: \"skip\" | \"reflect\";\n\treason: string;\n\ttokenBudget: number;\n}\n\n/**\n * Pure zero-I/O heuristic to decide whether the current turn justifies a reflection run\n * and determine the token budget under the cheap-tool net-negative doctrine.\n */\nexport function decideDemand(signals: DemandSignals): DemandPlan {\n\tif (signals.trigger === \"none\") {\n\t\treturn { act: \"skip\", reason: \"No trigger detected\", tokenBudget: 0 };\n\t}\n\tif (signals.contextHeadroomPct < 10) {\n\t\treturn { act: \"skip\", reason: \"Context headroom is critically low (< 10%)\", tokenBudget: 0 };\n\t}\n\n\t// Dynamic token budget based on headroom (keep reflection bounded between 500 and 1500 tokens)\n\tconst baseBudget = 1000;\n\tconst tokenBudget = Math.max(500, Math.min(1500, Math.round(baseBudget * (signals.contextHeadroomPct / 100))));\n\n\tif (signals.hadCorrection) {\n\t\treturn { act: \"reflect\", reason: \"Correction detected in the turn\", tokenBudget };\n\t}\n\tif (signals.trigger === \"session-end\") {\n\t\treturn { act: \"reflect\", reason: \"Session end reflection triggered\", tokenBudget };\n\t}\n\tif (signals.trigger === \"complex\") {\n\t\tif (signals.toolCallCount >= 3) {\n\t\t\treturn { act: \"reflect\", reason: `Complex turn with ${signals.toolCallCount} tool calls`, tokenBudget };\n\t\t}\n\t}\n\n\treturn { act: \"skip\", reason: \"Signals do not justify reflection overhead\", tokenBudget: 0 };\n}\n\nexport interface ReflectionInput {\n\trecentTurnText: string; // host serializes the just-finished turn\n\texistingMemory: string; // current MEMORY.md + USER.md snapshot\n\tplan: DemandPlan;\n\t// host-injected isolated completion function:\n\tcomplete: (systemPrompt: string, userPrompt: string) => Promise<IsolatedCompletionResult>;\n}\n\nexport type ReflectionWrite =\n\t| { kind: \"memory_add\"; section: \"MEMORY\" | \"USER\"; text: string }\n\t| { kind: \"memory_replace\"; target: string; text: string }\n\t| { kind: \"memory_remove\"; target: string }\n\t// R7 memory-to-behavior: promote a recurring procedural workflow into an executable skill.\n\t| { kind: \"promote_skill\"; name: string; description: string; body: string };\n\nexport interface ReflectionResult {\n\twrites: ReflectionWrite[];\n\tusage: Usage;\n\trationale: string;\n}\n\nexport class ReflectionEngine {\n\t/**\n\t * Build the reflection prompt, call the injected isolated complete(),\n\t * parse the response, confront existing memory, and return memory writes.\n\t * Zero direct I/O.\n\t */\n\tasync reflect(input: ReflectionInput): Promise<ReflectionResult> {\n\t\tconst systemPrompt = `You are a reflection engine. Your job is to analyze the recent conversation turn, compare it against the agent's existing memory, and decide if any memory updates are needed.\n\nExisting Memory snapshot:\n${input.existingMemory}\n\nMemory guidelines:\n- \"MEMORY\" is for project facts, configuration, repeatable workflows, and coding findings.\n- \"USER\" is for user preferences, patterns, and style specifications.\n- Avoid duplicate facts. If the fact is already represented, do not add it.\n- CONFRONT existing memory: if the new turn contradicts or updates an existing fact, use \"memory_replace\" or \"memory_remove\" to supersede the old fact rather than blindly appending.\n- Keep memories short, factual, and direct. No fluff.\n- PROMOTE to behavior: if the turn established a REPEATABLE, multi-step PROCEDURE/workflow (not a one-off fact) that should govern a future class of tasks, emit a \"promote_skill\" instead of (or in addition to) a memory fact. Only promote a genuinely reusable procedure — never a single fact, a one-off narrative, or environment-specific noise. Prefer a memory fact when unsure.\n\nYou must output your analysis and writes in the following JSON format inside a \\`\\`\\`json\\`\\`\\` code fence:\n{\n \"rationale\": \"Explanation of your reasoning\",\n \"writes\": [\n { \"kind\": \"memory_add\", \"section\": \"MEMORY\" | \"USER\", \"text\": \"New direct fact to append\" },\n { \"kind\": \"memory_replace\", \"target\": \"Exact text substring to replace\", \"text\": \"New replacement text\" },\n { \"kind\": \"memory_remove\", \"target\": \"Exact text substring to remove\" },\n { \"kind\": \"promote_skill\", \"name\": \"kebab-case-skill-name\", \"description\": \"one line of when to use it\", \"body\": \"Markdown: the step-by-step procedure\" }\n ]\n}\n`;\n\n\t\tconst userPrompt = `Recent turn transcript:\n${input.recentTurnText}\n\nAnalyze this turn and output your memory updates.`;\n\n\t\ttry {\n\t\t\tconst compResult = await input.complete(systemPrompt, userPrompt);\n\t\t\tconst text = compResult.text;\n\n\t\t\tconst jsonMatch = text.match(/```json\\s*([\\s\\S]*?)\\s*```/) || text.match(/{[\\s\\S]*}/);\n\t\t\tif (!jsonMatch) {\n\t\t\t\treturn {\n\t\t\t\t\twrites: [],\n\t\t\t\t\tusage: compResult.usage,\n\t\t\t\t\trationale: `Failed to locate JSON response. Raw text:\\n${text}`,\n\t\t\t\t};\n\t\t\t}\n\n\t\t\tconst parsed = JSON.parse(jsonMatch[1] || jsonMatch[0]);\n\t\t\tconst rationale = parsed.rationale || \"\";\n\t\t\tconst writes: ReflectionWrite[] = [];\n\n\t\t\tif (Array.isArray(parsed.writes)) {\n\t\t\t\tfor (const w of parsed.writes) {\n\t\t\t\t\tif (w && typeof w === \"object\") {\n\t\t\t\t\t\tif (\n\t\t\t\t\t\t\tw.kind === \"memory_add\" &&\n\t\t\t\t\t\t\t(w.section === \"MEMORY\" || w.section === \"USER\") &&\n\t\t\t\t\t\t\ttypeof w.text === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_add\", section: w.section, text: w.text });\n\t\t\t\t\t\t} else if (\n\t\t\t\t\t\t\tw.kind === \"memory_replace\" &&\n\t\t\t\t\t\t\ttypeof w.target === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.text === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_replace\", target: w.target, text: w.text });\n\t\t\t\t\t\t} else if (w.kind === \"memory_remove\" && typeof w.target === \"string\") {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_remove\", target: w.target });\n\t\t\t\t\t\t} else if (\n\t\t\t\t\t\t\tw.kind === \"promote_skill\" &&\n\t\t\t\t\t\t\ttypeof w.name === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.description === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.body === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"promote_skill\", name: w.name, description: w.description, body: w.body });\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\treturn {\n\t\t\t\twrites,\n\t\t\t\tusage: compResult.usage,\n\t\t\t\trationale,\n\t\t\t};\n\t\t} catch (err) {\n\t\t\t// Zeroed/fallback usage representation\n\t\t\tconst emptyUsage: Usage = {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t};\n\t\t\treturn {\n\t\t\t\twrites: [],\n\t\t\t\tusage: emptyUsage,\n\t\t\t\trationale: `Error during reflection: ${String(err)}`,\n\t\t\t};\n\t\t}\n\t}\n}\n"]}
|
|
1
|
+
{"version":3,"file":"reflection-engine.js","sourceRoot":"","sources":["../../../src/core/learning/reflection-engine.ts"],"names":[],"mappings":"AA0BA;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,OAAsB,EAAc;IAChE,IAAI,OAAO,CAAC,OAAO,KAAK,MAAM,EAAE,CAAC;QAChC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,qBAAqB,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;IACvE,CAAC;IACD,IAAI,OAAO,CAAC,kBAAkB,GAAG,EAAE,EAAE,CAAC;QACrC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,4CAA4C,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;IAC9F,CAAC;IAED,+FAA+F;IAC/F,MAAM,UAAU,GAAG,IAAI,CAAC;IACxB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,OAAO,CAAC,kBAAkB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAE/G,IAAI,OAAO,CAAC,aAAa,EAAE,CAAC;QAC3B,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,iCAAiC,EAAE,WAAW,EAAE,CAAC;IACnF,CAAC;IACD,IAAI,OAAO,CAAC,OAAO,KAAK,aAAa,EAAE,CAAC;QACvC,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,kCAAkC,EAAE,WAAW,EAAE,CAAC;IACpF,CAAC;IACD,IAAI,OAAO,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,aAAa,IAAI,CAAC,EAAE,CAAC;YAChC,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE,qBAAqB,OAAO,CAAC,aAAa,aAAa,EAAE,WAAW,EAAE,CAAC;QACzG,CAAC;IACF,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,4CAA4C,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;AAAA,CAC7F;AAuBD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;;;;;;;;CAqBvC,CAAC;AAEF,MAAM,OAAO,gBAAgB;IAC5B;;;;OAIG;IACH,KAAK,CAAC,OAAO,CAAC,KAAsB,EAA6B;QAChE,MAAM,YAAY,GAAG,wBAAwB,CAAC;QAE9C,6FAA6F;QAC7F,MAAM,UAAU,GAAG;EACnB,KAAK,CAAC,cAAc;;;EAGpB,KAAK,CAAC,cAAc;;8EAEwD,CAAC;QAE7E,IAAI,CAAC;YACJ,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;YAClE,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC;YAE7B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,4BAA4B,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACtF,IAAI,CAAC,SAAS,EAAE,CAAC;gBAChB,OAAO;oBACN,MAAM,EAAE,EAAE;oBACV,KAAK,EAAE,UAAU,CAAC,KAAK;oBACvB,SAAS,EAAE,8CAA8C,IAAI,EAAE;iBAC/D,CAAC;YACH,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;YACxD,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC;YACzC,MAAM,MAAM,GAAsB,EAAE,CAAC;YAErC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;gBAClC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;oBAC/B,IAAI,CAAC,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;wBAChC,IACC,CAAC,CAAC,IAAI,KAAK,YAAY;4BACvB,CAAC,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,KAAK,MAAM,CAAC;4BAChD,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,EACzB,CAAC;4BACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBACvE,CAAC;6BAAM,IACN,CAAC,CAAC,IAAI,KAAK,gBAAgB;4BAC3B,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ;4BAC5B,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,EACzB,CAAC;4BACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBACzE,CAAC;6BAAM,IAAI,CAAC,CAAC,IAAI,KAAK,eAAe,IAAI,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;4BACvE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;wBAC1D,CAAC;6BAAM,IACN,CAAC,CAAC,IAAI,KAAK,eAAe;4BAC1B,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ;4BAC1B,OAAO,CAAC,CAAC,WAAW,KAAK,QAAQ;4BACjC,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,EACzB,CAAC;4BACF,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,WAAW,EAAE,CAAC,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;wBAChG,CAAC;oBACF,CAAC;gBACF,CAAC;YACF,CAAC;YAED,OAAO;gBACN,MAAM;gBACN,KAAK,EAAE,UAAU,CAAC,KAAK;gBACvB,SAAS;aACT,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,uCAAuC;YACvC,MAAM,UAAU,GAAU;gBACzB,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;gBACT,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;aACpE,CAAC;YACF,OAAO;gBACN,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,UAAU;gBACjB,SAAS,EAAE,4BAA4B,MAAM,CAAC,GAAG,CAAC,EAAE;aACpD,CAAC;QACH,CAAC;IAAA,CACD;CACD","sourcesContent":["import type { Usage } from \"@caupulican/pi-ai\";\n\nexport type StopReason = \"stop\" | \"toolUse\" | \"aborted\" | \"error\" | string;\n\nexport interface IsolatedCompletionResult {\n\ttext: string;\n\tusage: Usage;\n\tstopReason: StopReason;\n}\n\nexport type ReflectionTrigger = \"complex\" | \"corrective\" | \"session-end\" | \"none\";\n\nexport interface DemandSignals {\n\ttrigger: ReflectionTrigger;\n\ttoolCallCount: number;\n\thadCorrection: boolean;\n\tcontextHeadroomPct: number; // 0..100\n\tusefulLately: number; // 0..1 rolling score\n}\n\nexport interface DemandPlan {\n\tact: \"skip\" | \"reflect\";\n\treason: string;\n\ttokenBudget: number;\n}\n\n/**\n * Pure zero-I/O heuristic to decide whether the current turn justifies a reflection run\n * and determine the token budget under the cheap-tool net-negative doctrine.\n */\nexport function decideDemand(signals: DemandSignals): DemandPlan {\n\tif (signals.trigger === \"none\") {\n\t\treturn { act: \"skip\", reason: \"No trigger detected\", tokenBudget: 0 };\n\t}\n\tif (signals.contextHeadroomPct < 10) {\n\t\treturn { act: \"skip\", reason: \"Context headroom is critically low (< 10%)\", tokenBudget: 0 };\n\t}\n\n\t// Dynamic token budget based on headroom (keep reflection bounded between 500 and 1500 tokens)\n\tconst baseBudget = 1000;\n\tconst tokenBudget = Math.max(500, Math.min(1500, Math.round(baseBudget * (signals.contextHeadroomPct / 100))));\n\n\tif (signals.hadCorrection) {\n\t\treturn { act: \"reflect\", reason: \"Correction detected in the turn\", tokenBudget };\n\t}\n\tif (signals.trigger === \"session-end\") {\n\t\treturn { act: \"reflect\", reason: \"Session end reflection triggered\", tokenBudget };\n\t}\n\tif (signals.trigger === \"complex\") {\n\t\tif (signals.toolCallCount >= 3) {\n\t\t\treturn { act: \"reflect\", reason: `Complex turn with ${signals.toolCallCount} tool calls`, tokenBudget };\n\t\t}\n\t}\n\n\treturn { act: \"skip\", reason: \"Signals do not justify reflection overhead\", tokenBudget: 0 };\n}\n\nexport interface ReflectionInput {\n\trecentTurnText: string; // host serializes the just-finished turn\n\texistingMemory: string; // current MEMORY.md + USER.md snapshot\n\tplan: DemandPlan;\n\t// host-injected isolated completion function:\n\tcomplete: (systemPrompt: string, userPrompt: string) => Promise<IsolatedCompletionResult>;\n}\n\nexport type ReflectionWrite =\n\t| { kind: \"memory_add\"; section: \"MEMORY\" | \"USER\"; text: string }\n\t| { kind: \"memory_replace\"; target: string; text: string }\n\t| { kind: \"memory_remove\"; target: string }\n\t// R7 memory-to-behavior: promote a recurring procedural workflow into an executable skill.\n\t| { kind: \"promote_skill\"; name: string; description: string; body: string };\n\nexport interface ReflectionResult {\n\twrites: ReflectionWrite[];\n\tusage: Usage;\n\trationale: string;\n}\n\n/**\n * STATIC reflection system prompt (Hermes-parity #33). It is byte-identical across every reflection\n * pass — the variable parts (existing memory snapshot + the turn transcript) live in the USER prompt —\n * so the provider prompt-cache reuses this prefix instead of re-billing it each pass (cost guard).\n * Do NOT interpolate per-call data into this constant or caching breaks.\n */\nexport const REFLECTION_SYSTEM_PROMPT = `You are a reflection engine. Your job is to analyze the recent conversation turn, compare it against the agent's existing memory, and decide if any memory updates are needed.\n\nMemory guidelines:\n- \"MEMORY\" is for project facts, configuration, repeatable workflows, and coding findings.\n- \"USER\" is for user preferences, patterns, and style specifications.\n- Avoid duplicate facts. If the fact is already represented, do not add it.\n- CONFRONT existing memory: if the new turn contradicts or updates an existing fact, use \"memory_replace\" or \"memory_remove\" to supersede the old fact rather than blindly appending.\n- Keep memories short, factual, and direct. No fluff.\n- Do NOT capture transient/environment-specific noise: tool/network failures, one-off errors, or a single narrative event. Persist only durable facts and preferences.\n- PROMOTE to behavior: if the turn established a REPEATABLE, multi-step PROCEDURE/workflow (not a one-off fact) that should govern a future class of tasks, emit a \"promote_skill\" instead of (or in addition to) a memory fact. Only promote a genuinely reusable procedure — never a single fact, a one-off narrative, or environment-specific noise. Prefer a memory fact when unsure.\n\nYou must output your analysis and writes in the following JSON format inside a \\`\\`\\`json\\`\\`\\` code fence:\n{\n \"rationale\": \"Explanation of your reasoning\",\n \"writes\": [\n { \"kind\": \"memory_add\", \"section\": \"MEMORY\" | \"USER\", \"text\": \"New direct fact to append\" },\n { \"kind\": \"memory_replace\", \"target\": \"Exact text substring to replace\", \"text\": \"New replacement text\" },\n { \"kind\": \"memory_remove\", \"target\": \"Exact text substring to remove\" },\n { \"kind\": \"promote_skill\", \"name\": \"kebab-case-skill-name\", \"description\": \"one line of when to use it\", \"body\": \"Markdown: the step-by-step procedure\" }\n ]\n}\n`;\n\nexport class ReflectionEngine {\n\t/**\n\t * Build the reflection prompt, call the injected isolated complete(),\n\t * parse the response, confront existing memory, and return memory writes.\n\t * Zero direct I/O.\n\t */\n\tasync reflect(input: ReflectionInput): Promise<ReflectionResult> {\n\t\tconst systemPrompt = REFLECTION_SYSTEM_PROMPT;\n\n\t\t// Variable inputs go in the USER prompt so the system prefix above stays cache-stable (#33).\n\t\tconst userPrompt = `Existing Memory snapshot:\n${input.existingMemory}\n\nRecent turn transcript:\n${input.recentTurnText}\n\nAnalyze this turn against the existing memory and output your memory updates.`;\n\n\t\ttry {\n\t\t\tconst compResult = await input.complete(systemPrompt, userPrompt);\n\t\t\tconst text = compResult.text;\n\n\t\t\tconst jsonMatch = text.match(/```json\\s*([\\s\\S]*?)\\s*```/) || text.match(/{[\\s\\S]*}/);\n\t\t\tif (!jsonMatch) {\n\t\t\t\treturn {\n\t\t\t\t\twrites: [],\n\t\t\t\t\tusage: compResult.usage,\n\t\t\t\t\trationale: `Failed to locate JSON response. Raw text:\\n${text}`,\n\t\t\t\t};\n\t\t\t}\n\n\t\t\tconst parsed = JSON.parse(jsonMatch[1] || jsonMatch[0]);\n\t\t\tconst rationale = parsed.rationale || \"\";\n\t\t\tconst writes: ReflectionWrite[] = [];\n\n\t\t\tif (Array.isArray(parsed.writes)) {\n\t\t\t\tfor (const w of parsed.writes) {\n\t\t\t\t\tif (w && typeof w === \"object\") {\n\t\t\t\t\t\tif (\n\t\t\t\t\t\t\tw.kind === \"memory_add\" &&\n\t\t\t\t\t\t\t(w.section === \"MEMORY\" || w.section === \"USER\") &&\n\t\t\t\t\t\t\ttypeof w.text === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_add\", section: w.section, text: w.text });\n\t\t\t\t\t\t} else if (\n\t\t\t\t\t\t\tw.kind === \"memory_replace\" &&\n\t\t\t\t\t\t\ttypeof w.target === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.text === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_replace\", target: w.target, text: w.text });\n\t\t\t\t\t\t} else if (w.kind === \"memory_remove\" && typeof w.target === \"string\") {\n\t\t\t\t\t\t\twrites.push({ kind: \"memory_remove\", target: w.target });\n\t\t\t\t\t\t} else if (\n\t\t\t\t\t\t\tw.kind === \"promote_skill\" &&\n\t\t\t\t\t\t\ttypeof w.name === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.description === \"string\" &&\n\t\t\t\t\t\t\ttypeof w.body === \"string\"\n\t\t\t\t\t\t) {\n\t\t\t\t\t\t\twrites.push({ kind: \"promote_skill\", name: w.name, description: w.description, body: w.body });\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\treturn {\n\t\t\t\twrites,\n\t\t\t\tusage: compResult.usage,\n\t\t\t\trationale,\n\t\t\t};\n\t\t} catch (err) {\n\t\t\t// Zeroed/fallback usage representation\n\t\t\tconst emptyUsage: Usage = {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t};\n\t\t\treturn {\n\t\t\t\twrites: [],\n\t\t\t\tusage: emptyUsage,\n\t\t\t\trationale: `Error during reflection: ${String(err)}`,\n\t\t\t};\n\t\t}\n\t}\n}\n"]}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill curator (Hermes-parity #32). Reflection (R7) promotes recurring procedures into SKILL.md files;
|
|
3
|
+
* without curation they accumulate forever, bloating tool/context and raising per-turn cost. The curator
|
|
4
|
+
* tracks usage of PROMOTED skills (frontmatter `promoted: true`) and PROPOSES — never auto-applies —
|
|
5
|
+
* archiving stale ones and consolidating overlapping ones. Hand-authored user skills are never touched.
|
|
6
|
+
*
|
|
7
|
+
* Design (locked with agy): propose-only, session-start + idle triggers (not per-turn), restorable
|
|
8
|
+
* archive (non-destructive), and consolidation is a flagged suggestion (never an auto-merge).
|
|
9
|
+
*/
|
|
10
|
+
/** Per-promoted-skill signal the proposal logic reasons over. Pure data — no I/O. */
|
|
11
|
+
export interface PromotedSkillInfo {
|
|
12
|
+
name: string;
|
|
13
|
+
/** When the skill file was created (ms epoch); guards a freshly-promoted skill from instant archival. */
|
|
14
|
+
createdMs: number;
|
|
15
|
+
/** Last time the skill was loaded/used (ms epoch); 0 if never used. */
|
|
16
|
+
lastUsedMs: number;
|
|
17
|
+
useCount: number;
|
|
18
|
+
/** Tokens from name+description+body, for overlap detection. */
|
|
19
|
+
keywords: string[];
|
|
20
|
+
}
|
|
21
|
+
export interface CuratorOptions {
|
|
22
|
+
/** A promoted skill unused and older than this many days is proposed for archival. Default 30. */
|
|
23
|
+
staleDays: number;
|
|
24
|
+
/** Token-Jaccard ≥ this between two promoted skills flags them for consolidation. Default 0.5. */
|
|
25
|
+
overlapThreshold: number;
|
|
26
|
+
/** Current time (ms epoch); injected so the proposal logic stays pure/testable. */
|
|
27
|
+
now: number;
|
|
28
|
+
}
|
|
29
|
+
export declare const DEFAULT_CURATOR_OPTIONS: Omit<CuratorOptions, "now">;
|
|
30
|
+
export interface CurationProposals {
|
|
31
|
+
/** Promoted skills proposed for (restorable) archival, with a human reason. */
|
|
32
|
+
archive: Array<{
|
|
33
|
+
name: string;
|
|
34
|
+
reason: string;
|
|
35
|
+
}>;
|
|
36
|
+
/** Pairs of promoted skills that overlap enough to consider merging (flag only, never auto-merge). */
|
|
37
|
+
consolidate: Array<{
|
|
38
|
+
names: [string, string];
|
|
39
|
+
overlap: number;
|
|
40
|
+
}>;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Pure proposal logic: decide which promoted skills to PROPOSE archiving (stale + unused) and which pairs
|
|
44
|
+
* overlap enough to PROPOSE consolidating. Returns suggestions only; the caller applies them on approval.
|
|
45
|
+
*/
|
|
46
|
+
export declare function computeCurationProposals(skills: PromotedSkillInfo[], opts: CuratorOptions): CurationProposals;
|
|
47
|
+
/**
|
|
48
|
+
* Filesystem layer over {@link computeCurationProposals}: reads promoted SKILL.md files + the usage
|
|
49
|
+
* sidecar, and archives/restores skills non-destructively. The current time is injected so callers (and
|
|
50
|
+
* tests) control "now".
|
|
51
|
+
*/
|
|
52
|
+
export declare class SkillCurator {
|
|
53
|
+
private readonly skillsDir;
|
|
54
|
+
private readonly archiveDir;
|
|
55
|
+
private readonly usageFile;
|
|
56
|
+
constructor(skillsDir: string);
|
|
57
|
+
/** Record that a promoted skill was loaded/used (bumps count + last-used). Best-effort. */
|
|
58
|
+
recordUse(name: string, now: number): void;
|
|
59
|
+
/** Build the proposals from the current promoted-skill corpus. */
|
|
60
|
+
proposeCuration(now: number, options?: Partial<Omit<CuratorOptions, "now">>): CurationProposals;
|
|
61
|
+
/** Move a promoted skill into `.archive/` (restorable). Returns true if archived. */
|
|
62
|
+
archiveSkill(name: string): boolean;
|
|
63
|
+
/** Restore an archived skill back into the active skills dir. Returns true if restored. */
|
|
64
|
+
restoreSkill(name: string): boolean;
|
|
65
|
+
loadPromotedSkills(): PromotedSkillInfo[];
|
|
66
|
+
private isPromoted;
|
|
67
|
+
private loadUsage;
|
|
68
|
+
}
|
|
69
|
+
/** True if a SKILL.md's YAML frontmatter declares `promoted: true` (reflection-generated). */
|
|
70
|
+
export declare function isPromotedFrontmatter(content: string): boolean;
|
|
71
|
+
//# sourceMappingURL=skill-curator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skill-curator.d.ts","sourceRoot":"","sources":["../../../src/core/learning/skill-curator.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAMH,uFAAqF;AACrF,MAAM,WAAW,iBAAiB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,yGAAyG;IACzG,SAAS,EAAE,MAAM,CAAC;IAClB,uEAAuE;IACvE,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,gEAAgE;IAChE,QAAQ,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC9B,kGAAkG;IAClG,SAAS,EAAE,MAAM,CAAC;IAClB,oGAAkG;IAClG,gBAAgB,EAAE,MAAM,CAAC;IACzB,mFAAmF;IACnF,GAAG,EAAE,MAAM,CAAC;CACZ;AAED,eAAO,MAAM,uBAAuB,EAAE,IAAI,CAAC,cAAc,EAAE,KAAK,CAG/D,CAAC;AAEF,MAAM,WAAW,iBAAiB;IACjC,+EAA+E;IAC/E,OAAO,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACjD,sGAAsG;IACtG,WAAW,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACjE;AAED;;;GAGG;AACH,wBAAgB,wBAAwB,CAAC,MAAM,EAAE,iBAAiB,EAAE,EAAE,IAAI,EAAE,cAAc,GAAG,iBAAiB,CAgC7G;AAWD;;;;GAIG;AACH,qBAAa,YAAY;IACxB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IAEnC,YAAY,SAAS,EAAE,MAAM,EAI5B;IAED,2FAA2F;IAC3F,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,IAAI,CASzC;IAED,kEAAkE;IAClE,eAAe,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,CAAC,CAAM,GAAG,iBAAiB,CAOlG;IAED,qFAAqF;IACrF,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAUlC;IAED,2FAA2F;IAC3F,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAUlC;IAED,kBAAkB,IAAI,iBAAiB,EAAE,CA+BxC;IAED,OAAO,CAAC,UAAU;IAQlB,OAAO,CAAC,SAAS;CAOjB;AAED,8FAA8F;AAC9F,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAI9D","sourcesContent":["/**\n * Skill curator (Hermes-parity #32). Reflection (R7) promotes recurring procedures into SKILL.md files;\n * without curation they accumulate forever, bloating tool/context and raising per-turn cost. The curator\n * tracks usage of PROMOTED skills (frontmatter `promoted: true`) and PROPOSES — never auto-applies —\n * archiving stale ones and consolidating overlapping ones. Hand-authored user skills are never touched.\n *\n * Design (locked with agy): propose-only, session-start + idle triggers (not per-turn), restorable\n * archive (non-destructive), and consolidation is a flagged suggestion (never an auto-merge).\n */\n\nimport { existsSync, mkdirSync, readdirSync, readFileSync, renameSync, statSync, writeFileSync } from \"node:fs\";\nimport { join } from \"node:path\";\nimport { jaccard, tokenize } from \"../tools/skill-audit.ts\";\n\n/** Per-promoted-skill signal the proposal logic reasons over. Pure data — no I/O. */\nexport interface PromotedSkillInfo {\n\tname: string;\n\t/** When the skill file was created (ms epoch); guards a freshly-promoted skill from instant archival. */\n\tcreatedMs: number;\n\t/** Last time the skill was loaded/used (ms epoch); 0 if never used. */\n\tlastUsedMs: number;\n\tuseCount: number;\n\t/** Tokens from name+description+body, for overlap detection. */\n\tkeywords: string[];\n}\n\nexport interface CuratorOptions {\n\t/** A promoted skill unused and older than this many days is proposed for archival. Default 30. */\n\tstaleDays: number;\n\t/** Token-Jaccard ≥ this between two promoted skills flags them for consolidation. Default 0.5. */\n\toverlapThreshold: number;\n\t/** Current time (ms epoch); injected so the proposal logic stays pure/testable. */\n\tnow: number;\n}\n\nexport const DEFAULT_CURATOR_OPTIONS: Omit<CuratorOptions, \"now\"> = {\n\tstaleDays: 30,\n\toverlapThreshold: 0.5,\n};\n\nexport interface CurationProposals {\n\t/** Promoted skills proposed for (restorable) archival, with a human reason. */\n\tarchive: Array<{ name: string; reason: string }>;\n\t/** Pairs of promoted skills that overlap enough to consider merging (flag only, never auto-merge). */\n\tconsolidate: Array<{ names: [string, string]; overlap: number }>;\n}\n\n/**\n * Pure proposal logic: decide which promoted skills to PROPOSE archiving (stale + unused) and which pairs\n * overlap enough to PROPOSE consolidating. Returns suggestions only; the caller applies them on approval.\n */\nexport function computeCurationProposals(skills: PromotedSkillInfo[], opts: CuratorOptions): CurationProposals {\n\tconst staleMs = opts.staleDays * 86_400_000;\n\tconst archive: CurationProposals[\"archive\"] = [];\n\tfor (const s of skills) {\n\t\t// \"Stale\" = never recently used AND not freshly promoted: measure age from the most recent of\n\t\t// last-use / creation so a brand-new skill isn't archived before it has had a chance to be used.\n\t\tconst lastSeen = Math.max(s.lastUsedMs, s.createdMs);\n\t\tconst ageMs = opts.now - lastSeen;\n\t\tif (ageMs > staleMs) {\n\t\t\tconst days = Math.floor(ageMs / 86_400_000);\n\t\t\tarchive.push({\n\t\t\t\tname: s.name,\n\t\t\t\treason: s.useCount === 0 ? `never used, ${days}d old` : `unused for ${days}d (${s.useCount} total uses)`,\n\t\t\t});\n\t\t}\n\t}\n\n\tconst consolidate: CurationProposals[\"consolidate\"] = [];\n\tconst archiving = new Set(archive.map((a) => a.name));\n\tfor (let i = 0; i < skills.length; i++) {\n\t\tfor (let j = i + 1; j < skills.length; j++) {\n\t\t\tconst a = skills[i];\n\t\t\tconst b = skills[j];\n\t\t\t// Don't propose consolidating something already proposed for archival.\n\t\t\tif (archiving.has(a.name) || archiving.has(b.name)) continue;\n\t\t\tconst overlap = jaccard(a.keywords, b.keywords);\n\t\t\tif (overlap >= opts.overlapThreshold) {\n\t\t\t\tconsolidate.push({ names: [a.name, b.name], overlap });\n\t\t\t}\n\t\t}\n\t}\n\treturn { archive, consolidate };\n}\n\ninterface UsageRecord {\n\tlastUsedMs: number;\n\tuseCount: number;\n}\ntype UsageMap = Record<string, UsageRecord>;\n\n/** Cap on how much of a skill body feeds keyword extraction (keeps overlap detection cheap). */\nconst KEYWORD_SOURCE_CAP = 4000;\n\n/**\n * Filesystem layer over {@link computeCurationProposals}: reads promoted SKILL.md files + the usage\n * sidecar, and archives/restores skills non-destructively. The current time is injected so callers (and\n * tests) control \"now\".\n */\nexport class SkillCurator {\n\tprivate readonly skillsDir: string;\n\tprivate readonly archiveDir: string;\n\tprivate readonly usageFile: string;\n\n\tconstructor(skillsDir: string) {\n\t\tthis.skillsDir = skillsDir;\n\t\tthis.archiveDir = join(skillsDir, \".archive\");\n\t\tthis.usageFile = join(skillsDir, \".usage.json\");\n\t}\n\n\t/** Record that a promoted skill was loaded/used (bumps count + last-used). Best-effort. */\n\trecordUse(name: string, now: number): void {\n\t\ttry {\n\t\t\tconst usage = this.loadUsage();\n\t\t\tconst prev = usage[name] ?? { lastUsedMs: 0, useCount: 0 };\n\t\t\tusage[name] = { lastUsedMs: now, useCount: prev.useCount + 1 };\n\t\t\twriteFileSync(this.usageFile, JSON.stringify(usage, null, 2), \"utf-8\");\n\t\t} catch {\n\t\t\t// usage tracking must never disrupt a turn\n\t\t}\n\t}\n\n\t/** Build the proposals from the current promoted-skill corpus. */\n\tproposeCuration(now: number, options: Partial<Omit<CuratorOptions, \"now\">> = {}): CurationProposals {\n\t\tconst skills = this.loadPromotedSkills();\n\t\treturn computeCurationProposals(skills, {\n\t\t\tnow,\n\t\t\tstaleDays: options.staleDays ?? DEFAULT_CURATOR_OPTIONS.staleDays,\n\t\t\toverlapThreshold: options.overlapThreshold ?? DEFAULT_CURATOR_OPTIONS.overlapThreshold,\n\t\t});\n\t}\n\n\t/** Move a promoted skill into `.archive/` (restorable). Returns true if archived. */\n\tarchiveSkill(name: string): boolean {\n\t\ttry {\n\t\t\tconst from = join(this.skillsDir, name);\n\t\t\tif (!existsSync(join(from, \"SKILL.md\")) || !this.isPromoted(name)) return false;\n\t\t\tmkdirSync(this.archiveDir, { recursive: true });\n\t\t\trenameSync(from, join(this.archiveDir, name));\n\t\t\treturn true;\n\t\t} catch {\n\t\t\treturn false;\n\t\t}\n\t}\n\n\t/** Restore an archived skill back into the active skills dir. Returns true if restored. */\n\trestoreSkill(name: string): boolean {\n\t\ttry {\n\t\t\tconst from = join(this.archiveDir, name);\n\t\t\tconst to = join(this.skillsDir, name);\n\t\t\tif (!existsSync(join(from, \"SKILL.md\")) || existsSync(to)) return false;\n\t\t\trenameSync(from, to);\n\t\t\treturn true;\n\t\t} catch {\n\t\t\treturn false;\n\t\t}\n\t}\n\n\tloadPromotedSkills(): PromotedSkillInfo[] {\n\t\tconst out: PromotedSkillInfo[] = [];\n\t\tlet entries: string[];\n\t\ttry {\n\t\t\tentries = readdirSync(this.skillsDir);\n\t\t} catch {\n\t\t\treturn out;\n\t\t}\n\t\tconst usage = this.loadUsage();\n\t\tfor (const name of entries) {\n\t\t\tif (name.startsWith(\".\")) continue; // skip .archive, .usage.json\n\t\t\tconst file = join(this.skillsDir, name, \"SKILL.md\");\n\t\t\tlet raw: string;\n\t\t\tlet createdMs = 0;\n\t\t\ttry {\n\t\t\t\traw = readFileSync(file, \"utf-8\");\n\t\t\t\tcreatedMs = statSync(file).birthtimeMs || statSync(file).mtimeMs;\n\t\t\t} catch {\n\t\t\t\tcontinue;\n\t\t\t}\n\t\t\tif (!isPromotedFrontmatter(raw)) continue;\n\t\t\tconst u = usage[name] ?? { lastUsedMs: 0, useCount: 0 };\n\t\t\tout.push({\n\t\t\t\tname,\n\t\t\t\tcreatedMs,\n\t\t\t\tlastUsedMs: u.lastUsedMs,\n\t\t\t\tuseCount: u.useCount,\n\t\t\t\tkeywords: tokenize(raw.slice(0, KEYWORD_SOURCE_CAP)),\n\t\t\t});\n\t\t}\n\t\treturn out;\n\t}\n\n\tprivate isPromoted(name: string): boolean {\n\t\ttry {\n\t\t\treturn isPromotedFrontmatter(readFileSync(join(this.skillsDir, name, \"SKILL.md\"), \"utf-8\"));\n\t\t} catch {\n\t\t\treturn false;\n\t\t}\n\t}\n\n\tprivate loadUsage(): UsageMap {\n\t\ttry {\n\t\t\treturn JSON.parse(readFileSync(this.usageFile, \"utf-8\")) as UsageMap;\n\t\t} catch {\n\t\t\treturn {};\n\t\t}\n\t}\n}\n\n/** True if a SKILL.md's YAML frontmatter declares `promoted: true` (reflection-generated). */\nexport function isPromotedFrontmatter(content: string): boolean {\n\tconst fm = content.match(/^---\\n([\\s\\S]*?)\\n---/);\n\tif (!fm) return false;\n\treturn /^\\s*promoted\\s*:\\s*true\\s*$/im.test(fm[1]);\n}\n"]}
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Skill curator (Hermes-parity #32). Reflection (R7) promotes recurring procedures into SKILL.md files;
|
|
3
|
+
* without curation they accumulate forever, bloating tool/context and raising per-turn cost. The curator
|
|
4
|
+
* tracks usage of PROMOTED skills (frontmatter `promoted: true`) and PROPOSES — never auto-applies —
|
|
5
|
+
* archiving stale ones and consolidating overlapping ones. Hand-authored user skills are never touched.
|
|
6
|
+
*
|
|
7
|
+
* Design (locked with agy): propose-only, session-start + idle triggers (not per-turn), restorable
|
|
8
|
+
* archive (non-destructive), and consolidation is a flagged suggestion (never an auto-merge).
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, mkdirSync, readdirSync, readFileSync, renameSync, statSync, writeFileSync } from "node:fs";
|
|
11
|
+
import { join } from "node:path";
|
|
12
|
+
import { jaccard, tokenize } from "../tools/skill-audit.js";
|
|
13
|
+
export const DEFAULT_CURATOR_OPTIONS = {
|
|
14
|
+
staleDays: 30,
|
|
15
|
+
overlapThreshold: 0.5,
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Pure proposal logic: decide which promoted skills to PROPOSE archiving (stale + unused) and which pairs
|
|
19
|
+
* overlap enough to PROPOSE consolidating. Returns suggestions only; the caller applies them on approval.
|
|
20
|
+
*/
|
|
21
|
+
export function computeCurationProposals(skills, opts) {
|
|
22
|
+
const staleMs = opts.staleDays * 86_400_000;
|
|
23
|
+
const archive = [];
|
|
24
|
+
for (const s of skills) {
|
|
25
|
+
// "Stale" = never recently used AND not freshly promoted: measure age from the most recent of
|
|
26
|
+
// last-use / creation so a brand-new skill isn't archived before it has had a chance to be used.
|
|
27
|
+
const lastSeen = Math.max(s.lastUsedMs, s.createdMs);
|
|
28
|
+
const ageMs = opts.now - lastSeen;
|
|
29
|
+
if (ageMs > staleMs) {
|
|
30
|
+
const days = Math.floor(ageMs / 86_400_000);
|
|
31
|
+
archive.push({
|
|
32
|
+
name: s.name,
|
|
33
|
+
reason: s.useCount === 0 ? `never used, ${days}d old` : `unused for ${days}d (${s.useCount} total uses)`,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const consolidate = [];
|
|
38
|
+
const archiving = new Set(archive.map((a) => a.name));
|
|
39
|
+
for (let i = 0; i < skills.length; i++) {
|
|
40
|
+
for (let j = i + 1; j < skills.length; j++) {
|
|
41
|
+
const a = skills[i];
|
|
42
|
+
const b = skills[j];
|
|
43
|
+
// Don't propose consolidating something already proposed for archival.
|
|
44
|
+
if (archiving.has(a.name) || archiving.has(b.name))
|
|
45
|
+
continue;
|
|
46
|
+
const overlap = jaccard(a.keywords, b.keywords);
|
|
47
|
+
if (overlap >= opts.overlapThreshold) {
|
|
48
|
+
consolidate.push({ names: [a.name, b.name], overlap });
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return { archive, consolidate };
|
|
53
|
+
}
|
|
54
|
+
/** Cap on how much of a skill body feeds keyword extraction (keeps overlap detection cheap). */
|
|
55
|
+
const KEYWORD_SOURCE_CAP = 4000;
|
|
56
|
+
/**
|
|
57
|
+
* Filesystem layer over {@link computeCurationProposals}: reads promoted SKILL.md files + the usage
|
|
58
|
+
* sidecar, and archives/restores skills non-destructively. The current time is injected so callers (and
|
|
59
|
+
* tests) control "now".
|
|
60
|
+
*/
|
|
61
|
+
export class SkillCurator {
|
|
62
|
+
skillsDir;
|
|
63
|
+
archiveDir;
|
|
64
|
+
usageFile;
|
|
65
|
+
constructor(skillsDir) {
|
|
66
|
+
this.skillsDir = skillsDir;
|
|
67
|
+
this.archiveDir = join(skillsDir, ".archive");
|
|
68
|
+
this.usageFile = join(skillsDir, ".usage.json");
|
|
69
|
+
}
|
|
70
|
+
/** Record that a promoted skill was loaded/used (bumps count + last-used). Best-effort. */
|
|
71
|
+
recordUse(name, now) {
|
|
72
|
+
try {
|
|
73
|
+
const usage = this.loadUsage();
|
|
74
|
+
const prev = usage[name] ?? { lastUsedMs: 0, useCount: 0 };
|
|
75
|
+
usage[name] = { lastUsedMs: now, useCount: prev.useCount + 1 };
|
|
76
|
+
writeFileSync(this.usageFile, JSON.stringify(usage, null, 2), "utf-8");
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
// usage tracking must never disrupt a turn
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/** Build the proposals from the current promoted-skill corpus. */
|
|
83
|
+
proposeCuration(now, options = {}) {
|
|
84
|
+
const skills = this.loadPromotedSkills();
|
|
85
|
+
return computeCurationProposals(skills, {
|
|
86
|
+
now,
|
|
87
|
+
staleDays: options.staleDays ?? DEFAULT_CURATOR_OPTIONS.staleDays,
|
|
88
|
+
overlapThreshold: options.overlapThreshold ?? DEFAULT_CURATOR_OPTIONS.overlapThreshold,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
/** Move a promoted skill into `.archive/` (restorable). Returns true if archived. */
|
|
92
|
+
archiveSkill(name) {
|
|
93
|
+
try {
|
|
94
|
+
const from = join(this.skillsDir, name);
|
|
95
|
+
if (!existsSync(join(from, "SKILL.md")) || !this.isPromoted(name))
|
|
96
|
+
return false;
|
|
97
|
+
mkdirSync(this.archiveDir, { recursive: true });
|
|
98
|
+
renameSync(from, join(this.archiveDir, name));
|
|
99
|
+
return true;
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
/** Restore an archived skill back into the active skills dir. Returns true if restored. */
|
|
106
|
+
restoreSkill(name) {
|
|
107
|
+
try {
|
|
108
|
+
const from = join(this.archiveDir, name);
|
|
109
|
+
const to = join(this.skillsDir, name);
|
|
110
|
+
if (!existsSync(join(from, "SKILL.md")) || existsSync(to))
|
|
111
|
+
return false;
|
|
112
|
+
renameSync(from, to);
|
|
113
|
+
return true;
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
loadPromotedSkills() {
|
|
120
|
+
const out = [];
|
|
121
|
+
let entries;
|
|
122
|
+
try {
|
|
123
|
+
entries = readdirSync(this.skillsDir);
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
return out;
|
|
127
|
+
}
|
|
128
|
+
const usage = this.loadUsage();
|
|
129
|
+
for (const name of entries) {
|
|
130
|
+
if (name.startsWith("."))
|
|
131
|
+
continue; // skip .archive, .usage.json
|
|
132
|
+
const file = join(this.skillsDir, name, "SKILL.md");
|
|
133
|
+
let raw;
|
|
134
|
+
let createdMs = 0;
|
|
135
|
+
try {
|
|
136
|
+
raw = readFileSync(file, "utf-8");
|
|
137
|
+
createdMs = statSync(file).birthtimeMs || statSync(file).mtimeMs;
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
if (!isPromotedFrontmatter(raw))
|
|
143
|
+
continue;
|
|
144
|
+
const u = usage[name] ?? { lastUsedMs: 0, useCount: 0 };
|
|
145
|
+
out.push({
|
|
146
|
+
name,
|
|
147
|
+
createdMs,
|
|
148
|
+
lastUsedMs: u.lastUsedMs,
|
|
149
|
+
useCount: u.useCount,
|
|
150
|
+
keywords: tokenize(raw.slice(0, KEYWORD_SOURCE_CAP)),
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
return out;
|
|
154
|
+
}
|
|
155
|
+
isPromoted(name) {
|
|
156
|
+
try {
|
|
157
|
+
return isPromotedFrontmatter(readFileSync(join(this.skillsDir, name, "SKILL.md"), "utf-8"));
|
|
158
|
+
}
|
|
159
|
+
catch {
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
loadUsage() {
|
|
164
|
+
try {
|
|
165
|
+
return JSON.parse(readFileSync(this.usageFile, "utf-8"));
|
|
166
|
+
}
|
|
167
|
+
catch {
|
|
168
|
+
return {};
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
/** True if a SKILL.md's YAML frontmatter declares `promoted: true` (reflection-generated). */
|
|
173
|
+
export function isPromotedFrontmatter(content) {
|
|
174
|
+
const fm = content.match(/^---\n([\s\S]*?)\n---/);
|
|
175
|
+
if (!fm)
|
|
176
|
+
return false;
|
|
177
|
+
return /^\s*promoted\s*:\s*true\s*$/im.test(fm[1]);
|
|
178
|
+
}
|
|
179
|
+
//# sourceMappingURL=skill-curator.js.map
|