@caupulican/pi-adaptative 0.80.88 → 0.80.89
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/dist/core/agent-session.d.ts +35 -0
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +262 -0
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/context/brain-curator.d.ts +88 -0
- package/dist/core/context/brain-curator.d.ts.map +1 -0
- package/dist/core/context/brain-curator.js +192 -0
- package/dist/core/context/brain-curator.js.map +1 -0
- package/dist/core/context/context-composition.d.ts +122 -0
- package/dist/core/context/context-composition.d.ts.map +1 -0
- package/dist/core/context/context-composition.js +163 -0
- package/dist/core/context/context-composition.js.map +1 -0
- package/dist/core/context/context-prompt-enforcement.d.ts +13 -0
- package/dist/core/context/context-prompt-enforcement.d.ts.map +1 -1
- package/dist/core/context/context-prompt-enforcement.js +17 -2
- package/dist/core/context/context-prompt-enforcement.js.map +1 -1
- package/dist/core/context-gc.d.ts +13 -0
- package/dist/core/context-gc.d.ts.map +1 -1
- package/dist/core/context-gc.js +6 -0
- package/dist/core/context-gc.js.map +1 -1
- package/dist/core/research/model-fitness.d.ts +3 -0
- package/dist/core/research/model-fitness.d.ts.map +1 -1
- package/dist/core/research/model-fitness.js +54 -3
- package/dist/core/research/model-fitness.js.map +1 -1
- package/dist/core/settings-manager.d.ts +13 -0
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +19 -0
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/core/slash-commands.d.ts.map +1 -1
- package/dist/core/slash-commands.js +6 -1
- package/dist/core/slash-commands.js.map +1 -1
- package/dist/modes/interactive/components/fitness-role-selector.d.ts +13 -0
- package/dist/modes/interactive/components/fitness-role-selector.d.ts.map +1 -0
- package/dist/modes/interactive/components/fitness-role-selector.js +65 -0
- package/dist/modes/interactive/components/fitness-role-selector.js.map +1 -0
- package/dist/modes/interactive/components/settings-selector.d.ts +4 -1
- package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/settings-selector.js +84 -0
- package/dist/modes/interactive/components/settings-selector.js.map +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts +5 -0
- package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
- package/dist/modes/interactive/interactive-mode.js +91 -0
- package/dist/modes/interactive/interactive-mode.js.map +1 -1
- package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
- package/examples/extensions/custom-provider-anthropic/package.json +1 -1
- package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
- package/examples/extensions/sandbox/package-lock.json +2 -2
- package/examples/extensions/sandbox/package.json +1 -1
- package/examples/extensions/with-deps/package-lock.json +2 -2
- package/examples/extensions/with-deps/package.json +1 -1
- package/npm-shrinkwrap.json +12 -12
- package/package.json +4 -4
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Brain-assisted context curation (see docs/model-router-rework/brain-context-curation-design.md):
|
|
3
|
+
* a SIDECAR curator that consumes reports the context pipeline already produces and feeds back
|
|
4
|
+
* small, typed advisories. It is never a pipeline stage: every consumer must behave byte-for-byte
|
|
5
|
+
* identically when a result is absent (missing digest -> today's stub; missing relevance ->
|
|
6
|
+
* today's enforcement decision). The curator itself is provider-free — the completion executor is
|
|
7
|
+
* injected per drain, so it works against any registered local model and faux providers in tests.
|
|
8
|
+
*
|
|
9
|
+
* Memory bounds are explicit: the queue and result map are both capped, and drops are counted in
|
|
10
|
+
* telemetry rather than silent. Results are keyed for idempotency (digests by the GC record's
|
|
11
|
+
* content hash, relevance by the audit item id), so re-enqueueing the same work is free.
|
|
12
|
+
*/
|
|
13
|
+
export declare const CURATION_DIGEST_SYSTEM_PROMPT: string;
|
|
14
|
+
export declare const CURATION_RELEVANCE_SYSTEM_PROMPT: string;
|
|
15
|
+
export interface CurationJob {
|
|
16
|
+
kind: "stub_digest" | "relevance";
|
|
17
|
+
/** Idempotency key: digest jobs use the GC record's content hash, relevance jobs the item id. */
|
|
18
|
+
key: string;
|
|
19
|
+
/** Bounded chunk the local model must actually be able to process (sliced on enqueue). */
|
|
20
|
+
content: string;
|
|
21
|
+
/** Relevance jobs only: the goal/intent line the chunk is judged against. */
|
|
22
|
+
goal?: string;
|
|
23
|
+
}
|
|
24
|
+
export interface CurationResult {
|
|
25
|
+
key: string;
|
|
26
|
+
kind: CurationJob["kind"];
|
|
27
|
+
ok: boolean;
|
|
28
|
+
digest?: string;
|
|
29
|
+
relevant?: boolean;
|
|
30
|
+
confidence?: number;
|
|
31
|
+
ms: number;
|
|
32
|
+
}
|
|
33
|
+
export interface CurationTelemetrySnapshot {
|
|
34
|
+
jobsRun: number;
|
|
35
|
+
parseFailures: number;
|
|
36
|
+
droppedJobs: number;
|
|
37
|
+
/** Chars processed locally (an honest proxy for frontier tokens NOT spent on this work). */
|
|
38
|
+
localChars: number;
|
|
39
|
+
queued: number;
|
|
40
|
+
resultsHeld: number;
|
|
41
|
+
}
|
|
42
|
+
export type CurationComplete = (input: {
|
|
43
|
+
systemPrompt: string;
|
|
44
|
+
userPrompt: string;
|
|
45
|
+
signal?: AbortSignal;
|
|
46
|
+
}) => Promise<{
|
|
47
|
+
text: string;
|
|
48
|
+
costUsd: number;
|
|
49
|
+
stopReason: string;
|
|
50
|
+
}>;
|
|
51
|
+
export declare const CURATION_RELEVANCE_MIN_CONFIDENCE = 0.8;
|
|
52
|
+
export declare function parseCurationDigest(text: string): string | undefined;
|
|
53
|
+
export declare function parseCurationRelevance(text: string): {
|
|
54
|
+
relevant: boolean;
|
|
55
|
+
confidence: number;
|
|
56
|
+
} | undefined;
|
|
57
|
+
export declare class BrainCurator {
|
|
58
|
+
private readonly _queue;
|
|
59
|
+
private readonly _results;
|
|
60
|
+
private _jobsRun;
|
|
61
|
+
private _parseFailures;
|
|
62
|
+
private _droppedJobs;
|
|
63
|
+
private _localChars;
|
|
64
|
+
private _draining;
|
|
65
|
+
enqueue(job: CurationJob): void;
|
|
66
|
+
getDigest(key: string): string | undefined;
|
|
67
|
+
getRelevance(key: string): {
|
|
68
|
+
relevant: boolean;
|
|
69
|
+
confidence: number;
|
|
70
|
+
} | undefined;
|
|
71
|
+
hasWork(): boolean;
|
|
72
|
+
get isDraining(): boolean;
|
|
73
|
+
telemetry(): CurationTelemetrySnapshot;
|
|
74
|
+
/**
|
|
75
|
+
* Run up to `maxJobs` queued jobs through the injected local-model completer. Single-flight:
|
|
76
|
+
* a concurrent drain call returns [] immediately rather than double-running jobs. Every call
|
|
77
|
+
* is wall-clock bounded; a failed/unparseable job is recorded as a not-ok result (so it is
|
|
78
|
+
* not retried forever) and counted in telemetry.
|
|
79
|
+
*/
|
|
80
|
+
drain(args: {
|
|
81
|
+
complete: CurationComplete;
|
|
82
|
+
maxJobs: number;
|
|
83
|
+
signal?: AbortSignal;
|
|
84
|
+
now?: () => number;
|
|
85
|
+
}): Promise<CurationResult[]>;
|
|
86
|
+
private _storeResult;
|
|
87
|
+
}
|
|
88
|
+
//# sourceMappingURL=brain-curator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"brain-curator.d.ts","sourceRoot":"","sources":["../../../src/core/context/brain-curator.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;GAWG;AAEH,eAAO,MAAM,6BAA6B,QAK9B,CAAC;AAEb,eAAO,MAAM,gCAAgC,QAMjC,CAAC;AAEb,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,aAAa,GAAG,WAAW,CAAC;IAClC,iGAAiG;IACjG,GAAG,EAAE,MAAM,CAAC;IACZ,0FAA0F;IAC1F,OAAO,EAAE,MAAM,CAAC;IAChB,6EAA6E;IAC7E,IAAI,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,cAAc;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IAC1B,EAAE,EAAE,OAAO,CAAC;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,WAAW,yBAAyB;IACzC,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,4FAA4F;IAC5F,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,MAAM,gBAAgB,GAAG,CAAC,KAAK,EAAE;IACtC,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB,KAAK,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC;AAOrE,eAAO,MAAM,iCAAiC,MAAM,CAAC;AAqBrD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAQpE;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG;IAAE,QAAQ,EAAE,OAAO,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CAU1G;AAED,qBAAa,YAAY;IACxB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAkC;IACzD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAqC;IAC9D,OAAO,CAAC,QAAQ,CAAK;IACrB,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAS;IAE1B,OAAO,CAAC,GAAG,EAAE,WAAW,GAAG,IAAI,CAS9B;IAED,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAGzC;IAED,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG;QAAE,QAAQ,EAAE,OAAO,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,GAAG,SAAS,CAI/E;IAED,OAAO,IAAI,OAAO,CAEjB;IAED,IAAI,UAAU,IAAI,OAAO,CAExB;IAED,SAAS,IAAI,yBAAyB,CASrC;IAED;;;;;OAKG;IACG,KAAK,CAAC,IAAI,EAAE;QACjB,QAAQ,EAAE,gBAAgB,CAAC;QAC3B,OAAO,EAAE,MAAM,CAAC;QAChB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;KACnB,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAiD5B;IAED,OAAO,CAAC,YAAY;CAOpB","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\n\n/**\n * Brain-assisted context curation (see docs/model-router-rework/brain-context-curation-design.md):\n * a SIDECAR curator that consumes reports the context pipeline already produces and feeds back\n * small, typed advisories. It is never a pipeline stage: every consumer must behave byte-for-byte\n * identically when a result is absent (missing digest -> today's stub; missing relevance ->\n * today's enforcement decision). The curator itself is provider-free — the completion executor is\n * injected per drain, so it works against any registered local model and faux providers in tests.\n *\n * Memory bounds are explicit: the queue and result map are both capped, and drops are counted in\n * telemetry rather than silent. Results are keyed for idempotency (digests by the GC record's\n * content hash, relevance by the audit item id), so re-enqueueing the same work is free.\n */\n\nexport const CURATION_DIGEST_SYSTEM_PROMPT = [\n\t\"You digest tool-output chunks for a coding agent's context curator. You never solve the task.\",\n\t\"Given a chunk, respond with STRICT JSON only - no prose:\",\n\t'{\"digest\":\"<one or two sentences, max 200 characters, keeping exact identifiers>\"}',\n\t\"Keep exact file paths, symbol names, error codes, and version strings verbatim.\",\n].join(\"\\n\");\n\nexport const CURATION_RELEVANCE_SYSTEM_PROMPT = [\n\t\"You judge whether a stale tool output is still relevant to the user's current goal.\",\n\t\"You never solve the task. Respond with STRICT JSON only - no prose:\",\n\t'{\"relevant\":true|false,\"confidence\":<0..1>}',\n\t\"relevant=false means the chunk is about something the current goal no longer needs.\",\n\t\"When uncertain, answer relevant=true with low confidence - keeping content is the safe default.\",\n].join(\"\\n\");\n\nexport interface CurationJob {\n\tkind: \"stub_digest\" | \"relevance\";\n\t/** Idempotency key: digest jobs use the GC record's content hash, relevance jobs the item id. */\n\tkey: string;\n\t/** Bounded chunk the local model must actually be able to process (sliced on enqueue). */\n\tcontent: string;\n\t/** Relevance jobs only: the goal/intent line the chunk is judged against. */\n\tgoal?: string;\n}\n\nexport interface CurationResult {\n\tkey: string;\n\tkind: CurationJob[\"kind\"];\n\tok: boolean;\n\tdigest?: string;\n\trelevant?: boolean;\n\tconfidence?: number;\n\tms: number;\n}\n\nexport interface CurationTelemetrySnapshot {\n\tjobsRun: number;\n\tparseFailures: number;\n\tdroppedJobs: number;\n\t/** Chars processed locally (an honest proxy for frontier tokens NOT spent on this work). */\n\tlocalChars: number;\n\tqueued: number;\n\tresultsHeld: number;\n}\n\nexport type CurationComplete = (input: {\n\tsystemPrompt: string;\n\tuserPrompt: string;\n\tsignal?: AbortSignal;\n}) => Promise<{ text: string; costUsd: number; stopReason: string }>;\n\nconst MAX_QUEUE = 32;\nconst MAX_RESULTS = 200;\nconst MAX_JOB_CONTENT_CHARS = 8_000;\nconst DIGEST_MAX_WALL_CLOCK_MS = 20_000;\nconst RELEVANCE_MAX_WALL_CLOCK_MS = 8_000;\nexport const CURATION_RELEVANCE_MIN_CONFIDENCE = 0.8;\n\nfunction extractJsonObject(text: string): unknown | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\tfor (const candidate of candidates) {\n\t\ttry {\n\t\t\tconst parsed = JSON.parse(candidate);\n\t\t\tif (parsed && typeof parsed === \"object\" && !Array.isArray(parsed)) return parsed;\n\t\t} catch {\n\t\t\t// try next candidate\n\t\t}\n\t}\n\treturn undefined;\n}\n\nexport function parseCurationDigest(text: string): string | undefined {\n\tconst parsed = extractJsonObject(text);\n\tif (!parsed) return undefined;\n\tconst digest = (parsed as { digest?: unknown }).digest;\n\tif (typeof digest !== \"string\") return undefined;\n\tconst trimmed = digest.trim().replace(/\\s+/g, \" \");\n\tif (trimmed.length === 0 || trimmed.length > 240) return undefined;\n\treturn trimmed;\n}\n\nexport function parseCurationRelevance(text: string): { relevant: boolean; confidence: number } | undefined {\n\tconst parsed = extractJsonObject(text);\n\tif (!parsed) return undefined;\n\tconst record = parsed as { relevant?: unknown; confidence?: unknown };\n\tif (typeof record.relevant !== \"boolean\") return undefined;\n\tconst confidence =\n\t\ttypeof record.confidence === \"number\" && Number.isFinite(record.confidence)\n\t\t\t? Math.max(0, Math.min(1, record.confidence))\n\t\t\t: 0;\n\treturn { relevant: record.relevant, confidence };\n}\n\nexport class BrainCurator {\n\tprivate readonly _queue = new Map<string, CurationJob>();\n\tprivate readonly _results = new Map<string, CurationResult>();\n\tprivate _jobsRun = 0;\n\tprivate _parseFailures = 0;\n\tprivate _droppedJobs = 0;\n\tprivate _localChars = 0;\n\tprivate _draining = false;\n\n\tenqueue(job: CurationJob): void {\n\t\tif (this._results.has(job.key) || this._queue.has(job.key)) return;\n\t\tif (this._queue.size >= MAX_QUEUE) {\n\t\t\t// Drop the OLDEST queued job (newer work reflects the current goal better) and count it.\n\t\t\tconst oldest = this._queue.keys().next().value;\n\t\t\tif (oldest !== undefined) this._queue.delete(oldest);\n\t\t\tthis._droppedJobs++;\n\t\t}\n\t\tthis._queue.set(job.key, { ...job, content: job.content.slice(0, MAX_JOB_CONTENT_CHARS) });\n\t}\n\n\tgetDigest(key: string): string | undefined {\n\t\tconst result = this._results.get(key);\n\t\treturn result?.ok && result.kind === \"stub_digest\" ? result.digest : undefined;\n\t}\n\n\tgetRelevance(key: string): { relevant: boolean; confidence: number } | undefined {\n\t\tconst result = this._results.get(key);\n\t\tif (!result?.ok || result.kind !== \"relevance\" || result.relevant === undefined) return undefined;\n\t\treturn { relevant: result.relevant, confidence: result.confidence ?? 0 };\n\t}\n\n\thasWork(): boolean {\n\t\treturn this._queue.size > 0;\n\t}\n\n\tget isDraining(): boolean {\n\t\treturn this._draining;\n\t}\n\n\ttelemetry(): CurationTelemetrySnapshot {\n\t\treturn {\n\t\t\tjobsRun: this._jobsRun,\n\t\t\tparseFailures: this._parseFailures,\n\t\t\tdroppedJobs: this._droppedJobs,\n\t\t\tlocalChars: this._localChars,\n\t\t\tqueued: this._queue.size,\n\t\t\tresultsHeld: this._results.size,\n\t\t};\n\t}\n\n\t/**\n\t * Run up to `maxJobs` queued jobs through the injected local-model completer. Single-flight:\n\t * a concurrent drain call returns [] immediately rather than double-running jobs. Every call\n\t * is wall-clock bounded; a failed/unparseable job is recorded as a not-ok result (so it is\n\t * not retried forever) and counted in telemetry.\n\t */\n\tasync drain(args: {\n\t\tcomplete: CurationComplete;\n\t\tmaxJobs: number;\n\t\tsignal?: AbortSignal;\n\t\tnow?: () => number;\n\t}): Promise<CurationResult[]> {\n\t\tif (this._draining) return [];\n\t\tthis._draining = true;\n\t\tconst now = args.now ?? Date.now;\n\t\tconst completed: CurationResult[] = [];\n\t\ttry {\n\t\t\tconst jobs = [...this._queue.values()].slice(0, Math.max(0, args.maxJobs));\n\t\t\tfor (const job of jobs) {\n\t\t\t\tif (args.signal?.aborted) break;\n\t\t\t\tthis._queue.delete(job.key);\n\t\t\t\tconst started = now();\n\t\t\t\tconst bounded = await runBoundedCompletion({\n\t\t\t\t\tmaxWallClockMs: job.kind === \"stub_digest\" ? DIGEST_MAX_WALL_CLOCK_MS : RELEVANCE_MAX_WALL_CLOCK_MS,\n\t\t\t\t\tsignal: args.signal,\n\t\t\t\t\texecute: (signal) =>\n\t\t\t\t\t\targs.complete({\n\t\t\t\t\t\t\tsystemPrompt:\n\t\t\t\t\t\t\t\tjob.kind === \"stub_digest\" ? CURATION_DIGEST_SYSTEM_PROMPT : CURATION_RELEVANCE_SYSTEM_PROMPT,\n\t\t\t\t\t\t\tuserPrompt:\n\t\t\t\t\t\t\t\tjob.kind === \"stub_digest\"\n\t\t\t\t\t\t\t\t\t? job.content\n\t\t\t\t\t\t\t\t\t: `Current goal: ${job.goal ?? \"(unknown)\"}\\n\\nStale chunk:\\n${job.content}`,\n\t\t\t\t\t\t\tsignal,\n\t\t\t\t\t\t}),\n\t\t\t\t});\n\t\t\t\tconst ms = now() - started;\n\t\t\t\tthis._jobsRun++;\n\t\t\t\tthis._localChars += job.content.length;\n\t\t\t\tlet result: CurationResult = { key: job.key, kind: job.kind, ok: false, ms };\n\t\t\t\tif (bounded.completion && !bounded.failure) {\n\t\t\t\t\tif (job.kind === \"stub_digest\") {\n\t\t\t\t\t\tconst digest = parseCurationDigest(bounded.completion.text);\n\t\t\t\t\t\tresult = digest !== undefined ? { ...result, ok: true, digest } : result;\n\t\t\t\t\t} else {\n\t\t\t\t\t\tconst relevance = parseCurationRelevance(bounded.completion.text);\n\t\t\t\t\t\tresult =\n\t\t\t\t\t\t\trelevance !== undefined\n\t\t\t\t\t\t\t\t? { ...result, ok: true, relevant: relevance.relevant, confidence: relevance.confidence }\n\t\t\t\t\t\t\t\t: result;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tif (!result.ok) this._parseFailures++;\n\t\t\t\tthis._storeResult(result);\n\t\t\t\tcompleted.push(result);\n\t\t\t}\n\t\t} finally {\n\t\t\tthis._draining = false;\n\t\t}\n\t\treturn completed;\n\t}\n\n\tprivate _storeResult(result: CurationResult): void {\n\t\tif (this._results.size >= MAX_RESULTS) {\n\t\t\tconst oldest = this._results.keys().next().value;\n\t\t\tif (oldest !== undefined) this._results.delete(oldest);\n\t\t}\n\t\tthis._results.set(result.key, result);\n\t}\n}\n"]}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { runBoundedCompletion } from "../autonomy/bounded-completion.js";
|
|
2
|
+
/**
|
|
3
|
+
* Brain-assisted context curation (see docs/model-router-rework/brain-context-curation-design.md):
|
|
4
|
+
* a SIDECAR curator that consumes reports the context pipeline already produces and feeds back
|
|
5
|
+
* small, typed advisories. It is never a pipeline stage: every consumer must behave byte-for-byte
|
|
6
|
+
* identically when a result is absent (missing digest -> today's stub; missing relevance ->
|
|
7
|
+
* today's enforcement decision). The curator itself is provider-free — the completion executor is
|
|
8
|
+
* injected per drain, so it works against any registered local model and faux providers in tests.
|
|
9
|
+
*
|
|
10
|
+
* Memory bounds are explicit: the queue and result map are both capped, and drops are counted in
|
|
11
|
+
* telemetry rather than silent. Results are keyed for idempotency (digests by the GC record's
|
|
12
|
+
* content hash, relevance by the audit item id), so re-enqueueing the same work is free.
|
|
13
|
+
*/
|
|
14
|
+
export const CURATION_DIGEST_SYSTEM_PROMPT = [
|
|
15
|
+
"You digest tool-output chunks for a coding agent's context curator. You never solve the task.",
|
|
16
|
+
"Given a chunk, respond with STRICT JSON only - no prose:",
|
|
17
|
+
'{"digest":"<one or two sentences, max 200 characters, keeping exact identifiers>"}',
|
|
18
|
+
"Keep exact file paths, symbol names, error codes, and version strings verbatim.",
|
|
19
|
+
].join("\n");
|
|
20
|
+
export const CURATION_RELEVANCE_SYSTEM_PROMPT = [
|
|
21
|
+
"You judge whether a stale tool output is still relevant to the user's current goal.",
|
|
22
|
+
"You never solve the task. Respond with STRICT JSON only - no prose:",
|
|
23
|
+
'{"relevant":true|false,"confidence":<0..1>}',
|
|
24
|
+
"relevant=false means the chunk is about something the current goal no longer needs.",
|
|
25
|
+
"When uncertain, answer relevant=true with low confidence - keeping content is the safe default.",
|
|
26
|
+
].join("\n");
|
|
27
|
+
const MAX_QUEUE = 32;
|
|
28
|
+
const MAX_RESULTS = 200;
|
|
29
|
+
const MAX_JOB_CONTENT_CHARS = 8_000;
|
|
30
|
+
const DIGEST_MAX_WALL_CLOCK_MS = 20_000;
|
|
31
|
+
const RELEVANCE_MAX_WALL_CLOCK_MS = 8_000;
|
|
32
|
+
export const CURATION_RELEVANCE_MIN_CONFIDENCE = 0.8;
|
|
33
|
+
function extractJsonObject(text) {
|
|
34
|
+
const trimmed = text.trim();
|
|
35
|
+
const candidates = [trimmed];
|
|
36
|
+
const fenced = /```(?:json)?\s*([\s\S]*?)```/.exec(trimmed);
|
|
37
|
+
if (fenced?.[1])
|
|
38
|
+
candidates.push(fenced[1].trim());
|
|
39
|
+
const start = trimmed.indexOf("{");
|
|
40
|
+
const end = trimmed.lastIndexOf("}");
|
|
41
|
+
if (start >= 0 && end > start)
|
|
42
|
+
candidates.push(trimmed.slice(start, end + 1));
|
|
43
|
+
for (const candidate of candidates) {
|
|
44
|
+
try {
|
|
45
|
+
const parsed = JSON.parse(candidate);
|
|
46
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed))
|
|
47
|
+
return parsed;
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
// try next candidate
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
export function parseCurationDigest(text) {
|
|
56
|
+
const parsed = extractJsonObject(text);
|
|
57
|
+
if (!parsed)
|
|
58
|
+
return undefined;
|
|
59
|
+
const digest = parsed.digest;
|
|
60
|
+
if (typeof digest !== "string")
|
|
61
|
+
return undefined;
|
|
62
|
+
const trimmed = digest.trim().replace(/\s+/g, " ");
|
|
63
|
+
if (trimmed.length === 0 || trimmed.length > 240)
|
|
64
|
+
return undefined;
|
|
65
|
+
return trimmed;
|
|
66
|
+
}
|
|
67
|
+
export function parseCurationRelevance(text) {
|
|
68
|
+
const parsed = extractJsonObject(text);
|
|
69
|
+
if (!parsed)
|
|
70
|
+
return undefined;
|
|
71
|
+
const record = parsed;
|
|
72
|
+
if (typeof record.relevant !== "boolean")
|
|
73
|
+
return undefined;
|
|
74
|
+
const confidence = typeof record.confidence === "number" && Number.isFinite(record.confidence)
|
|
75
|
+
? Math.max(0, Math.min(1, record.confidence))
|
|
76
|
+
: 0;
|
|
77
|
+
return { relevant: record.relevant, confidence };
|
|
78
|
+
}
|
|
79
|
+
export class BrainCurator {
|
|
80
|
+
_queue = new Map();
|
|
81
|
+
_results = new Map();
|
|
82
|
+
_jobsRun = 0;
|
|
83
|
+
_parseFailures = 0;
|
|
84
|
+
_droppedJobs = 0;
|
|
85
|
+
_localChars = 0;
|
|
86
|
+
_draining = false;
|
|
87
|
+
enqueue(job) {
|
|
88
|
+
if (this._results.has(job.key) || this._queue.has(job.key))
|
|
89
|
+
return;
|
|
90
|
+
if (this._queue.size >= MAX_QUEUE) {
|
|
91
|
+
// Drop the OLDEST queued job (newer work reflects the current goal better) and count it.
|
|
92
|
+
const oldest = this._queue.keys().next().value;
|
|
93
|
+
if (oldest !== undefined)
|
|
94
|
+
this._queue.delete(oldest);
|
|
95
|
+
this._droppedJobs++;
|
|
96
|
+
}
|
|
97
|
+
this._queue.set(job.key, { ...job, content: job.content.slice(0, MAX_JOB_CONTENT_CHARS) });
|
|
98
|
+
}
|
|
99
|
+
getDigest(key) {
|
|
100
|
+
const result = this._results.get(key);
|
|
101
|
+
return result?.ok && result.kind === "stub_digest" ? result.digest : undefined;
|
|
102
|
+
}
|
|
103
|
+
getRelevance(key) {
|
|
104
|
+
const result = this._results.get(key);
|
|
105
|
+
if (!result?.ok || result.kind !== "relevance" || result.relevant === undefined)
|
|
106
|
+
return undefined;
|
|
107
|
+
return { relevant: result.relevant, confidence: result.confidence ?? 0 };
|
|
108
|
+
}
|
|
109
|
+
hasWork() {
|
|
110
|
+
return this._queue.size > 0;
|
|
111
|
+
}
|
|
112
|
+
get isDraining() {
|
|
113
|
+
return this._draining;
|
|
114
|
+
}
|
|
115
|
+
telemetry() {
|
|
116
|
+
return {
|
|
117
|
+
jobsRun: this._jobsRun,
|
|
118
|
+
parseFailures: this._parseFailures,
|
|
119
|
+
droppedJobs: this._droppedJobs,
|
|
120
|
+
localChars: this._localChars,
|
|
121
|
+
queued: this._queue.size,
|
|
122
|
+
resultsHeld: this._results.size,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Run up to `maxJobs` queued jobs through the injected local-model completer. Single-flight:
|
|
127
|
+
* a concurrent drain call returns [] immediately rather than double-running jobs. Every call
|
|
128
|
+
* is wall-clock bounded; a failed/unparseable job is recorded as a not-ok result (so it is
|
|
129
|
+
* not retried forever) and counted in telemetry.
|
|
130
|
+
*/
|
|
131
|
+
async drain(args) {
|
|
132
|
+
if (this._draining)
|
|
133
|
+
return [];
|
|
134
|
+
this._draining = true;
|
|
135
|
+
const now = args.now ?? Date.now;
|
|
136
|
+
const completed = [];
|
|
137
|
+
try {
|
|
138
|
+
const jobs = [...this._queue.values()].slice(0, Math.max(0, args.maxJobs));
|
|
139
|
+
for (const job of jobs) {
|
|
140
|
+
if (args.signal?.aborted)
|
|
141
|
+
break;
|
|
142
|
+
this._queue.delete(job.key);
|
|
143
|
+
const started = now();
|
|
144
|
+
const bounded = await runBoundedCompletion({
|
|
145
|
+
maxWallClockMs: job.kind === "stub_digest" ? DIGEST_MAX_WALL_CLOCK_MS : RELEVANCE_MAX_WALL_CLOCK_MS,
|
|
146
|
+
signal: args.signal,
|
|
147
|
+
execute: (signal) => args.complete({
|
|
148
|
+
systemPrompt: job.kind === "stub_digest" ? CURATION_DIGEST_SYSTEM_PROMPT : CURATION_RELEVANCE_SYSTEM_PROMPT,
|
|
149
|
+
userPrompt: job.kind === "stub_digest"
|
|
150
|
+
? job.content
|
|
151
|
+
: `Current goal: ${job.goal ?? "(unknown)"}\n\nStale chunk:\n${job.content}`,
|
|
152
|
+
signal,
|
|
153
|
+
}),
|
|
154
|
+
});
|
|
155
|
+
const ms = now() - started;
|
|
156
|
+
this._jobsRun++;
|
|
157
|
+
this._localChars += job.content.length;
|
|
158
|
+
let result = { key: job.key, kind: job.kind, ok: false, ms };
|
|
159
|
+
if (bounded.completion && !bounded.failure) {
|
|
160
|
+
if (job.kind === "stub_digest") {
|
|
161
|
+
const digest = parseCurationDigest(bounded.completion.text);
|
|
162
|
+
result = digest !== undefined ? { ...result, ok: true, digest } : result;
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
const relevance = parseCurationRelevance(bounded.completion.text);
|
|
166
|
+
result =
|
|
167
|
+
relevance !== undefined
|
|
168
|
+
? { ...result, ok: true, relevant: relevance.relevant, confidence: relevance.confidence }
|
|
169
|
+
: result;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
if (!result.ok)
|
|
173
|
+
this._parseFailures++;
|
|
174
|
+
this._storeResult(result);
|
|
175
|
+
completed.push(result);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
finally {
|
|
179
|
+
this._draining = false;
|
|
180
|
+
}
|
|
181
|
+
return completed;
|
|
182
|
+
}
|
|
183
|
+
_storeResult(result) {
|
|
184
|
+
if (this._results.size >= MAX_RESULTS) {
|
|
185
|
+
const oldest = this._results.keys().next().value;
|
|
186
|
+
if (oldest !== undefined)
|
|
187
|
+
this._results.delete(oldest);
|
|
188
|
+
}
|
|
189
|
+
this._results.set(result.key, result);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
//# sourceMappingURL=brain-curator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"brain-curator.js","sourceRoot":"","sources":["../../../src/core/context/brain-curator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AAEzE;;;;;;;;;;;GAWG;AAEH,MAAM,CAAC,MAAM,6BAA6B,GAAG;IAC5C,+FAA+F;IAC/F,0DAA0D;IAC1D,oFAAoF;IACpF,iFAAiF;CACjF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,CAAC,MAAM,gCAAgC,GAAG;IAC/C,qFAAqF;IACrF,qEAAqE;IACrE,6CAA6C;IAC7C,qFAAqF;IACrF,iGAAiG;CACjG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAsCb,MAAM,SAAS,GAAG,EAAE,CAAC;AACrB,MAAM,WAAW,GAAG,GAAG,CAAC;AACxB,MAAM,qBAAqB,GAAG,KAAK,CAAC;AACpC,MAAM,wBAAwB,GAAG,MAAM,CAAC;AACxC,MAAM,2BAA2B,GAAG,KAAK,CAAC;AAC1C,MAAM,CAAC,MAAM,iCAAiC,GAAG,GAAG,CAAC;AAErD,SAAS,iBAAiB,CAAC,IAAY,EAAuB;IAC7D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAa,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,8BAA8B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC5D,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC;QAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACnD,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,KAAK,IAAI,CAAC,IAAI,GAAG,GAAG,KAAK;QAAE,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IAC9E,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACrC,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;gBAAE,OAAO,MAAM,CAAC;QACnF,CAAC;QAAC,MAAM,CAAC;YACR,qBAAqB;QACtB,CAAC;IACF,CAAC;IACD,OAAO,SAAS,CAAC;AAAA,CACjB;AAED,MAAM,UAAU,mBAAmB,CAAC,IAAY,EAAsB;IACrE,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,CAAC,MAAM;QAAE,OAAO,SAAS,CAAC;IAC9B,MAAM,MAAM,GAAI,MAA+B,CAAC,MAAM,CAAC;IACvD,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IACjD,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACnD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG;QAAE,OAAO,SAAS,CAAC;IACnE,OAAO,OAAO,CAAC;AAAA,CACf;AAED,MAAM,UAAU,sBAAsB,CAAC,IAAY,EAAyD;IAC3G,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IACvC,IAAI,CAAC,MAAM;QAAE,OAAO,SAAS,CAAC;IAC9B,MAAM,MAAM,GAAG,MAAsD,CAAC;IACtE,IAAI,OAAO,MAAM,CAAC,QAAQ,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IAC3D,MAAM,UAAU,GACf,OAAO,MAAM,CAAC,UAAU,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC;QAC1E,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC,CAAC;IACN,OAAO,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;AAAA,CACjD;AAED,MAAM,OAAO,YAAY;IACP,MAAM,GAAG,IAAI,GAAG,EAAuB,CAAC;IACxC,QAAQ,GAAG,IAAI,GAAG,EAA0B,CAAC;IACtD,QAAQ,GAAG,CAAC,CAAC;IACb,cAAc,GAAG,CAAC,CAAC;IACnB,YAAY,GAAG,CAAC,CAAC;IACjB,WAAW,GAAG,CAAC,CAAC;IAChB,SAAS,GAAG,KAAK,CAAC;IAE1B,OAAO,CAAC,GAAgB,EAAQ;QAC/B,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,OAAO;QACnE,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,SAAS,EAAE,CAAC;YACnC,yFAAyF;YACzF,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YAC/C,IAAI,MAAM,KAAK,SAAS;gBAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrD,IAAI,CAAC,YAAY,EAAE,CAAC;QACrB,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,GAAG,GAAG,EAAE,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,qBAAqB,CAAC,EAAE,CAAC,CAAC;IAAA,CAC3F;IAED,SAAS,CAAC,GAAW,EAAsB;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACtC,OAAO,MAAM,EAAE,EAAE,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;IAAA,CAC/E;IAED,YAAY,CAAC,GAAW,EAAyD;QAChF,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,MAAM,CAAC,IAAI,KAAK,WAAW,IAAI,MAAM,CAAC,QAAQ,KAAK,SAAS;YAAE,OAAO,SAAS,CAAC;QAClG,OAAO,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,CAAC,EAAE,CAAC;IAAA,CACzE;IAED,OAAO,GAAY;QAClB,OAAO,IAAI,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;IAAA,CAC5B;IAED,IAAI,UAAU,GAAY;QACzB,OAAO,IAAI,CAAC,SAAS,CAAC;IAAA,CACtB;IAED,SAAS,GAA8B;QACtC,OAAO;YACN,OAAO,EAAE,IAAI,CAAC,QAAQ;YACtB,aAAa,EAAE,IAAI,CAAC,cAAc;YAClC,WAAW,EAAE,IAAI,CAAC,YAAY;YAC9B,UAAU,EAAE,IAAI,CAAC,WAAW;YAC5B,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI;YACxB,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI;SAC/B,CAAC;IAAA,CACF;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK,CAAC,IAKX,EAA6B;QAC7B,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAC;QAC9B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC;QACjC,MAAM,SAAS,GAAqB,EAAE,CAAC;QACvC,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;YAC3E,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;gBACxB,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;oBAAE,MAAM;gBAChC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC5B,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,MAAM,oBAAoB,CAAC;oBAC1C,cAAc,EAAE,GAAG,CAAC,IAAI,KAAK,aAAa,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,2BAA2B;oBACnG,MAAM,EAAE,IAAI,CAAC,MAAM;oBACnB,OAAO,EAAE,CAAC,MAAM,EAAE,EAAE,CACnB,IAAI,CAAC,QAAQ,CAAC;wBACb,YAAY,EACX,GAAG,CAAC,IAAI,KAAK,aAAa,CAAC,CAAC,CAAC,6BAA6B,CAAC,CAAC,CAAC,gCAAgC;wBAC9F,UAAU,EACT,GAAG,CAAC,IAAI,KAAK,aAAa;4BACzB,CAAC,CAAC,GAAG,CAAC,OAAO;4BACb,CAAC,CAAC,iBAAiB,GAAG,CAAC,IAAI,IAAI,WAAW,qBAAqB,GAAG,CAAC,OAAO,EAAE;wBAC9E,MAAM;qBACN,CAAC;iBACH,CAAC,CAAC;gBACH,MAAM,EAAE,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC;gBAC3B,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAChB,IAAI,CAAC,WAAW,IAAI,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC;gBACvC,IAAI,MAAM,GAAmB,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;gBAC7E,IAAI,OAAO,CAAC,UAAU,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;oBAC5C,IAAI,GAAG,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;wBAChC,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;wBAC5D,MAAM,GAAG,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,GAAG,MAAM,EAAE,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;oBAC1E,CAAC;yBAAM,CAAC;wBACP,MAAM,SAAS,GAAG,sBAAsB,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;wBAClE,MAAM;4BACL,SAAS,KAAK,SAAS;gCACtB,CAAC,CAAC,EAAE,GAAG,MAAM,EAAE,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,CAAC,QAAQ,EAAE,UAAU,EAAE,SAAS,CAAC,UAAU,EAAE;gCACzF,CAAC,CAAC,MAAM,CAAC;oBACZ,CAAC;gBACF,CAAC;gBACD,IAAI,CAAC,MAAM,CAAC,EAAE;oBAAE,IAAI,CAAC,cAAc,EAAE,CAAC;gBACtC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBAC1B,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACxB,CAAC;QACF,CAAC;gBAAS,CAAC;YACV,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACxB,CAAC;QACD,OAAO,SAAS,CAAC;IAAA,CACjB;IAEO,YAAY,CAAC,MAAsB,EAAQ;QAClD,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,WAAW,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YACjD,IAAI,MAAM,KAAK,SAAS;gBAAE,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACxD,CAAC;QACD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAAA,CACtC;CACD","sourcesContent":["import { runBoundedCompletion } from \"../autonomy/bounded-completion.ts\";\n\n/**\n * Brain-assisted context curation (see docs/model-router-rework/brain-context-curation-design.md):\n * a SIDECAR curator that consumes reports the context pipeline already produces and feeds back\n * small, typed advisories. It is never a pipeline stage: every consumer must behave byte-for-byte\n * identically when a result is absent (missing digest -> today's stub; missing relevance ->\n * today's enforcement decision). The curator itself is provider-free — the completion executor is\n * injected per drain, so it works against any registered local model and faux providers in tests.\n *\n * Memory bounds are explicit: the queue and result map are both capped, and drops are counted in\n * telemetry rather than silent. Results are keyed for idempotency (digests by the GC record's\n * content hash, relevance by the audit item id), so re-enqueueing the same work is free.\n */\n\nexport const CURATION_DIGEST_SYSTEM_PROMPT = [\n\t\"You digest tool-output chunks for a coding agent's context curator. You never solve the task.\",\n\t\"Given a chunk, respond with STRICT JSON only - no prose:\",\n\t'{\"digest\":\"<one or two sentences, max 200 characters, keeping exact identifiers>\"}',\n\t\"Keep exact file paths, symbol names, error codes, and version strings verbatim.\",\n].join(\"\\n\");\n\nexport const CURATION_RELEVANCE_SYSTEM_PROMPT = [\n\t\"You judge whether a stale tool output is still relevant to the user's current goal.\",\n\t\"You never solve the task. Respond with STRICT JSON only - no prose:\",\n\t'{\"relevant\":true|false,\"confidence\":<0..1>}',\n\t\"relevant=false means the chunk is about something the current goal no longer needs.\",\n\t\"When uncertain, answer relevant=true with low confidence - keeping content is the safe default.\",\n].join(\"\\n\");\n\nexport interface CurationJob {\n\tkind: \"stub_digest\" | \"relevance\";\n\t/** Idempotency key: digest jobs use the GC record's content hash, relevance jobs the item id. */\n\tkey: string;\n\t/** Bounded chunk the local model must actually be able to process (sliced on enqueue). */\n\tcontent: string;\n\t/** Relevance jobs only: the goal/intent line the chunk is judged against. */\n\tgoal?: string;\n}\n\nexport interface CurationResult {\n\tkey: string;\n\tkind: CurationJob[\"kind\"];\n\tok: boolean;\n\tdigest?: string;\n\trelevant?: boolean;\n\tconfidence?: number;\n\tms: number;\n}\n\nexport interface CurationTelemetrySnapshot {\n\tjobsRun: number;\n\tparseFailures: number;\n\tdroppedJobs: number;\n\t/** Chars processed locally (an honest proxy for frontier tokens NOT spent on this work). */\n\tlocalChars: number;\n\tqueued: number;\n\tresultsHeld: number;\n}\n\nexport type CurationComplete = (input: {\n\tsystemPrompt: string;\n\tuserPrompt: string;\n\tsignal?: AbortSignal;\n}) => Promise<{ text: string; costUsd: number; stopReason: string }>;\n\nconst MAX_QUEUE = 32;\nconst MAX_RESULTS = 200;\nconst MAX_JOB_CONTENT_CHARS = 8_000;\nconst DIGEST_MAX_WALL_CLOCK_MS = 20_000;\nconst RELEVANCE_MAX_WALL_CLOCK_MS = 8_000;\nexport const CURATION_RELEVANCE_MIN_CONFIDENCE = 0.8;\n\nfunction extractJsonObject(text: string): unknown | undefined {\n\tconst trimmed = text.trim();\n\tconst candidates: string[] = [trimmed];\n\tconst fenced = /```(?:json)?\\s*([\\s\\S]*?)```/.exec(trimmed);\n\tif (fenced?.[1]) candidates.push(fenced[1].trim());\n\tconst start = trimmed.indexOf(\"{\");\n\tconst end = trimmed.lastIndexOf(\"}\");\n\tif (start >= 0 && end > start) candidates.push(trimmed.slice(start, end + 1));\n\tfor (const candidate of candidates) {\n\t\ttry {\n\t\t\tconst parsed = JSON.parse(candidate);\n\t\t\tif (parsed && typeof parsed === \"object\" && !Array.isArray(parsed)) return parsed;\n\t\t} catch {\n\t\t\t// try next candidate\n\t\t}\n\t}\n\treturn undefined;\n}\n\nexport function parseCurationDigest(text: string): string | undefined {\n\tconst parsed = extractJsonObject(text);\n\tif (!parsed) return undefined;\n\tconst digest = (parsed as { digest?: unknown }).digest;\n\tif (typeof digest !== \"string\") return undefined;\n\tconst trimmed = digest.trim().replace(/\\s+/g, \" \");\n\tif (trimmed.length === 0 || trimmed.length > 240) return undefined;\n\treturn trimmed;\n}\n\nexport function parseCurationRelevance(text: string): { relevant: boolean; confidence: number } | undefined {\n\tconst parsed = extractJsonObject(text);\n\tif (!parsed) return undefined;\n\tconst record = parsed as { relevant?: unknown; confidence?: unknown };\n\tif (typeof record.relevant !== \"boolean\") return undefined;\n\tconst confidence =\n\t\ttypeof record.confidence === \"number\" && Number.isFinite(record.confidence)\n\t\t\t? Math.max(0, Math.min(1, record.confidence))\n\t\t\t: 0;\n\treturn { relevant: record.relevant, confidence };\n}\n\nexport class BrainCurator {\n\tprivate readonly _queue = new Map<string, CurationJob>();\n\tprivate readonly _results = new Map<string, CurationResult>();\n\tprivate _jobsRun = 0;\n\tprivate _parseFailures = 0;\n\tprivate _droppedJobs = 0;\n\tprivate _localChars = 0;\n\tprivate _draining = false;\n\n\tenqueue(job: CurationJob): void {\n\t\tif (this._results.has(job.key) || this._queue.has(job.key)) return;\n\t\tif (this._queue.size >= MAX_QUEUE) {\n\t\t\t// Drop the OLDEST queued job (newer work reflects the current goal better) and count it.\n\t\t\tconst oldest = this._queue.keys().next().value;\n\t\t\tif (oldest !== undefined) this._queue.delete(oldest);\n\t\t\tthis._droppedJobs++;\n\t\t}\n\t\tthis._queue.set(job.key, { ...job, content: job.content.slice(0, MAX_JOB_CONTENT_CHARS) });\n\t}\n\n\tgetDigest(key: string): string | undefined {\n\t\tconst result = this._results.get(key);\n\t\treturn result?.ok && result.kind === \"stub_digest\" ? result.digest : undefined;\n\t}\n\n\tgetRelevance(key: string): { relevant: boolean; confidence: number } | undefined {\n\t\tconst result = this._results.get(key);\n\t\tif (!result?.ok || result.kind !== \"relevance\" || result.relevant === undefined) return undefined;\n\t\treturn { relevant: result.relevant, confidence: result.confidence ?? 0 };\n\t}\n\n\thasWork(): boolean {\n\t\treturn this._queue.size > 0;\n\t}\n\n\tget isDraining(): boolean {\n\t\treturn this._draining;\n\t}\n\n\ttelemetry(): CurationTelemetrySnapshot {\n\t\treturn {\n\t\t\tjobsRun: this._jobsRun,\n\t\t\tparseFailures: this._parseFailures,\n\t\t\tdroppedJobs: this._droppedJobs,\n\t\t\tlocalChars: this._localChars,\n\t\t\tqueued: this._queue.size,\n\t\t\tresultsHeld: this._results.size,\n\t\t};\n\t}\n\n\t/**\n\t * Run up to `maxJobs` queued jobs through the injected local-model completer. Single-flight:\n\t * a concurrent drain call returns [] immediately rather than double-running jobs. Every call\n\t * is wall-clock bounded; a failed/unparseable job is recorded as a not-ok result (so it is\n\t * not retried forever) and counted in telemetry.\n\t */\n\tasync drain(args: {\n\t\tcomplete: CurationComplete;\n\t\tmaxJobs: number;\n\t\tsignal?: AbortSignal;\n\t\tnow?: () => number;\n\t}): Promise<CurationResult[]> {\n\t\tif (this._draining) return [];\n\t\tthis._draining = true;\n\t\tconst now = args.now ?? Date.now;\n\t\tconst completed: CurationResult[] = [];\n\t\ttry {\n\t\t\tconst jobs = [...this._queue.values()].slice(0, Math.max(0, args.maxJobs));\n\t\t\tfor (const job of jobs) {\n\t\t\t\tif (args.signal?.aborted) break;\n\t\t\t\tthis._queue.delete(job.key);\n\t\t\t\tconst started = now();\n\t\t\t\tconst bounded = await runBoundedCompletion({\n\t\t\t\t\tmaxWallClockMs: job.kind === \"stub_digest\" ? DIGEST_MAX_WALL_CLOCK_MS : RELEVANCE_MAX_WALL_CLOCK_MS,\n\t\t\t\t\tsignal: args.signal,\n\t\t\t\t\texecute: (signal) =>\n\t\t\t\t\t\targs.complete({\n\t\t\t\t\t\t\tsystemPrompt:\n\t\t\t\t\t\t\t\tjob.kind === \"stub_digest\" ? CURATION_DIGEST_SYSTEM_PROMPT : CURATION_RELEVANCE_SYSTEM_PROMPT,\n\t\t\t\t\t\t\tuserPrompt:\n\t\t\t\t\t\t\t\tjob.kind === \"stub_digest\"\n\t\t\t\t\t\t\t\t\t? job.content\n\t\t\t\t\t\t\t\t\t: `Current goal: ${job.goal ?? \"(unknown)\"}\\n\\nStale chunk:\\n${job.content}`,\n\t\t\t\t\t\t\tsignal,\n\t\t\t\t\t\t}),\n\t\t\t\t});\n\t\t\t\tconst ms = now() - started;\n\t\t\t\tthis._jobsRun++;\n\t\t\t\tthis._localChars += job.content.length;\n\t\t\t\tlet result: CurationResult = { key: job.key, kind: job.kind, ok: false, ms };\n\t\t\t\tif (bounded.completion && !bounded.failure) {\n\t\t\t\t\tif (job.kind === \"stub_digest\") {\n\t\t\t\t\t\tconst digest = parseCurationDigest(bounded.completion.text);\n\t\t\t\t\t\tresult = digest !== undefined ? { ...result, ok: true, digest } : result;\n\t\t\t\t\t} else {\n\t\t\t\t\t\tconst relevance = parseCurationRelevance(bounded.completion.text);\n\t\t\t\t\t\tresult =\n\t\t\t\t\t\t\trelevance !== undefined\n\t\t\t\t\t\t\t\t? { ...result, ok: true, relevant: relevance.relevant, confidence: relevance.confidence }\n\t\t\t\t\t\t\t\t: result;\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tif (!result.ok) this._parseFailures++;\n\t\t\t\tthis._storeResult(result);\n\t\t\t\tcompleted.push(result);\n\t\t\t}\n\t\t} finally {\n\t\t\tthis._draining = false;\n\t\t}\n\t\treturn completed;\n\t}\n\n\tprivate _storeResult(result: CurationResult): void {\n\t\tif (this._results.size >= MAX_RESULTS) {\n\t\t\tconst oldest = this._results.keys().next().value;\n\t\t\tif (oldest !== undefined) this._results.delete(oldest);\n\t\t}\n\t\tthis._results.set(result.key, result);\n\t}\n}\n"]}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import type { AgentMessage } from "@caupulican/pi-agent-core";
|
|
2
|
+
import type { CurationTelemetrySnapshot } from "./brain-curator.ts";
|
|
3
|
+
/**
|
|
4
|
+
* Context composition dashboard (user-facing): decomposes EVERYTHING that rides along on every
|
|
5
|
+
* request — system prompt, active tool schemas, extension contributions, injected blocks
|
|
6
|
+
* (memory recall pages, evidence blocks), and the session messages themselves (raw vs. GC-packed
|
|
7
|
+
* vs. policy-stubbed) — so a user integrating their own tools/extensions can see exactly what
|
|
8
|
+
* each addition costs per request and where cleaning is (or is not) working.
|
|
9
|
+
*
|
|
10
|
+
* Honesty contract: everything here is an ESTIMATE (chars/4) EXCEPT `providerReportedTokens`,
|
|
11
|
+
* which is what the provider actually billed. The dashboard always shows both and the delta —
|
|
12
|
+
* the delta is the measure of how much the estimates can be trusted, never hidden.
|
|
13
|
+
*
|
|
14
|
+
* Known exclusions (named, not hidden): extension `context` handlers may rewrite messages at
|
|
15
|
+
* send time in ways this view cannot see. The memory evidence block and enforcement stubbing
|
|
16
|
+
* are ALSO send-time-only, but those are modeled explicitly via `adjustments`.
|
|
17
|
+
*/
|
|
18
|
+
export interface ToolCompositionRow {
|
|
19
|
+
name: string;
|
|
20
|
+
/** Estimated tokens for the tool's name+description+schema as sent to the provider. */
|
|
21
|
+
schemaTokens: number;
|
|
22
|
+
source: "built-in" | "extension";
|
|
23
|
+
}
|
|
24
|
+
export interface ExtensionCompositionRow {
|
|
25
|
+
name: string;
|
|
26
|
+
path: string;
|
|
27
|
+
toolCount: number;
|
|
28
|
+
commandCount: number;
|
|
29
|
+
/** Estimated schema tokens of this extension's ACTIVE tools (its per-request cost). */
|
|
30
|
+
activeToolSchemaTokens: number;
|
|
31
|
+
}
|
|
32
|
+
export interface MessageClassRow {
|
|
33
|
+
label: string;
|
|
34
|
+
count: number;
|
|
35
|
+
tokens: number;
|
|
36
|
+
}
|
|
37
|
+
export interface ContextCompositionReport {
|
|
38
|
+
/** Estimated tokens of the system prompt sent on every request. */
|
|
39
|
+
systemPromptTokens: number;
|
|
40
|
+
systemPromptChars: number;
|
|
41
|
+
/** Estimated tokens of ALL active tool schemas sent on every request. */
|
|
42
|
+
toolSchemaTokens: number;
|
|
43
|
+
tools: ToolCompositionRow[];
|
|
44
|
+
extensions: ExtensionCompositionRow[];
|
|
45
|
+
/** Session message classes (raw/user/assistant/stubs/recall pages), heaviest first. */
|
|
46
|
+
messageClasses: MessageClassRow[];
|
|
47
|
+
messageTokens: number;
|
|
48
|
+
messageCount: number;
|
|
49
|
+
/** Estimated total sent per request: system prompt + tool schemas + messages. */
|
|
50
|
+
estimatedRequestTokens: number;
|
|
51
|
+
/** What the provider actually reported for the current context, when known. */
|
|
52
|
+
providerReportedTokens: number | null;
|
|
53
|
+
contextWindow: number | null;
|
|
54
|
+
gc: {
|
|
55
|
+
packedCount: number;
|
|
56
|
+
savedTokens: number;
|
|
57
|
+
} | null;
|
|
58
|
+
enforcement: {
|
|
59
|
+
enforcedCount: number;
|
|
60
|
+
advisoryEvictions: number;
|
|
61
|
+
} | null;
|
|
62
|
+
curation: {
|
|
63
|
+
enabled: boolean;
|
|
64
|
+
telemetry: CurationTelemetrySnapshot;
|
|
65
|
+
lastSkipReason?: string;
|
|
66
|
+
} | null;
|
|
67
|
+
/** Background/side-channel spend that does NOT ride in this context but bills the account. */
|
|
68
|
+
spawned: {
|
|
69
|
+
cost: number;
|
|
70
|
+
reports: number;
|
|
71
|
+
} | null;
|
|
72
|
+
/** Send-time-only deltas folded into estimatedRequestTokens: +evidence block, -policy stubs. */
|
|
73
|
+
adjustments: {
|
|
74
|
+
memoryEvidenceTokens: number;
|
|
75
|
+
enforcementSavedTokens: number;
|
|
76
|
+
};
|
|
77
|
+
/** Actionable, bounded observations derived from the numbers above. */
|
|
78
|
+
observations: string[];
|
|
79
|
+
}
|
|
80
|
+
export interface BuildContextCompositionInput {
|
|
81
|
+
systemPrompt: string;
|
|
82
|
+
tools: Array<{
|
|
83
|
+
name: string;
|
|
84
|
+
description?: string;
|
|
85
|
+
parameters?: unknown;
|
|
86
|
+
source?: "built-in" | "extension";
|
|
87
|
+
}>;
|
|
88
|
+
extensions: Array<{
|
|
89
|
+
name: string;
|
|
90
|
+
path: string;
|
|
91
|
+
toolNames: string[];
|
|
92
|
+
commandCount: number;
|
|
93
|
+
}>;
|
|
94
|
+
messages: AgentMessage[];
|
|
95
|
+
providerReportedTokens: number | null;
|
|
96
|
+
contextWindow: number | null;
|
|
97
|
+
gc?: {
|
|
98
|
+
packedCount: number;
|
|
99
|
+
savedTokens: number;
|
|
100
|
+
};
|
|
101
|
+
enforcement?: {
|
|
102
|
+
enforcedCount: number;
|
|
103
|
+
advisoryEvictions: number;
|
|
104
|
+
};
|
|
105
|
+
curation?: {
|
|
106
|
+
enabled: boolean;
|
|
107
|
+
telemetry: CurationTelemetrySnapshot;
|
|
108
|
+
lastSkipReason?: string;
|
|
109
|
+
};
|
|
110
|
+
spawned?: {
|
|
111
|
+
cost: number;
|
|
112
|
+
reports: number;
|
|
113
|
+
};
|
|
114
|
+
adjustments?: {
|
|
115
|
+
memoryEvidenceTokens: number;
|
|
116
|
+
enforcementSavedTokens: number;
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
export declare function buildContextCompositionReport(input: BuildContextCompositionInput): ContextCompositionReport;
|
|
120
|
+
/** Bounded plain-text dashboard (interactive `/context` command and tests). */
|
|
121
|
+
export declare function formatContextCompositionDashboard(report: ContextCompositionReport, maxToolRows?: number): string;
|
|
122
|
+
//# sourceMappingURL=context-composition.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"context-composition.d.ts","sourceRoot":"","sources":["../../../src/core/context/context-composition.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAE9D,OAAO,KAAK,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAEpE;;;;;;;;;;;;;;GAcG;AAEH,MAAM,WAAW,kBAAkB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,uFAAuF;IACvF,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,UAAU,GAAG,WAAW,CAAC;CACjC;AAED,MAAM,WAAW,uBAAuB;IACvC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,uFAAuF;IACvF,sBAAsB,EAAE,MAAM,CAAC;CAC/B;AAED,MAAM,WAAW,eAAe;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,wBAAwB;IACxC,mEAAmE;IACnE,kBAAkB,EAAE,MAAM,CAAC;IAC3B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,yEAAyE;IACzE,gBAAgB,EAAE,MAAM,CAAC;IACzB,KAAK,EAAE,kBAAkB,EAAE,CAAC;IAC5B,UAAU,EAAE,uBAAuB,EAAE,CAAC;IACtC,uFAAuF;IACvF,cAAc,EAAE,eAAe,EAAE,CAAC;IAClC,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,iFAAiF;IACjF,sBAAsB,EAAE,MAAM,CAAC;IAC/B,+EAA+E;IAC/E,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,EAAE,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IACxD,WAAW,EAAE;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,iBAAiB,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IACzE,QAAQ,EAAE;QAAE,OAAO,EAAE,OAAO,CAAC;QAAC,SAAS,EAAE,yBAAyB,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IACrG,8FAA8F;IAC9F,OAAO,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;IAClD,gGAAgG;IAChG,WAAW,EAAE;QAAE,oBAAoB,EAAE,MAAM,CAAC;QAAC,sBAAsB,EAAE,MAAM,CAAA;KAAE,CAAC;IAC9E,uEAAuE;IACvE,YAAY,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,4BAA4B;IAC5C,YAAY,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,OAAO,CAAC;QAAC,MAAM,CAAC,EAAE,UAAU,GAAG,WAAW,CAAA;KAAE,CAAC,CAAC;IAC9G,UAAU,EAAE,KAAK,CAAC;QACjB,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,EAAE,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;KACrB,CAAC,CAAC;IACH,QAAQ,EAAE,YAAY,EAAE,CAAC;IACzB,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IACtC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,EAAE,CAAC,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE,CAAC;IAClD,WAAW,CAAC,EAAE;QAAE,aAAa,EAAE,MAAM,CAAC;QAAC,iBAAiB,EAAE,MAAM,CAAA;KAAE,CAAC;IACnE,QAAQ,CAAC,EAAE;QAAE,OAAO,EAAE,OAAO,CAAC;QAAC,SAAS,EAAE,yBAAyB,CAAC;QAAC,cAAc,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;IAC/F,OAAO,CAAC,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC;IAC5C,WAAW,CAAC,EAAE;QAAE,oBAAoB,EAAE,MAAM,CAAC;QAAC,sBAAsB,EAAE,MAAM,CAAA;KAAE,CAAC;CAC/E;AAiCD,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,4BAA4B,GAAG,wBAAwB,CAoG3G;AAED,+EAA+E;AAC/E,wBAAgB,iCAAiC,CAAC,MAAM,EAAE,wBAAwB,EAAE,WAAW,SAAK,GAAG,MAAM,CAmE5G","sourcesContent":["import type { AgentMessage } from \"@caupulican/pi-agent-core\";\nimport { estimateTokens } from \"../compaction/compaction.ts\";\nimport type { CurationTelemetrySnapshot } from \"./brain-curator.ts\";\n\n/**\n * Context composition dashboard (user-facing): decomposes EVERYTHING that rides along on every\n * request — system prompt, active tool schemas, extension contributions, injected blocks\n * (memory recall pages, evidence blocks), and the session messages themselves (raw vs. GC-packed\n * vs. policy-stubbed) — so a user integrating their own tools/extensions can see exactly what\n * each addition costs per request and where cleaning is (or is not) working.\n *\n * Honesty contract: everything here is an ESTIMATE (chars/4) EXCEPT `providerReportedTokens`,\n * which is what the provider actually billed. The dashboard always shows both and the delta —\n * the delta is the measure of how much the estimates can be trusted, never hidden.\n *\n * Known exclusions (named, not hidden): extension `context` handlers may rewrite messages at\n * send time in ways this view cannot see. The memory evidence block and enforcement stubbing\n * are ALSO send-time-only, but those are modeled explicitly via `adjustments`.\n */\n\nexport interface ToolCompositionRow {\n\tname: string;\n\t/** Estimated tokens for the tool's name+description+schema as sent to the provider. */\n\tschemaTokens: number;\n\tsource: \"built-in\" | \"extension\";\n}\n\nexport interface ExtensionCompositionRow {\n\tname: string;\n\tpath: string;\n\ttoolCount: number;\n\tcommandCount: number;\n\t/** Estimated schema tokens of this extension's ACTIVE tools (its per-request cost). */\n\tactiveToolSchemaTokens: number;\n}\n\nexport interface MessageClassRow {\n\tlabel: string;\n\tcount: number;\n\ttokens: number;\n}\n\nexport interface ContextCompositionReport {\n\t/** Estimated tokens of the system prompt sent on every request. */\n\tsystemPromptTokens: number;\n\tsystemPromptChars: number;\n\t/** Estimated tokens of ALL active tool schemas sent on every request. */\n\ttoolSchemaTokens: number;\n\ttools: ToolCompositionRow[];\n\textensions: ExtensionCompositionRow[];\n\t/** Session message classes (raw/user/assistant/stubs/recall pages), heaviest first. */\n\tmessageClasses: MessageClassRow[];\n\tmessageTokens: number;\n\tmessageCount: number;\n\t/** Estimated total sent per request: system prompt + tool schemas + messages. */\n\testimatedRequestTokens: number;\n\t/** What the provider actually reported for the current context, when known. */\n\tproviderReportedTokens: number | null;\n\tcontextWindow: number | null;\n\tgc: { packedCount: number; savedTokens: number } | null;\n\tenforcement: { enforcedCount: number; advisoryEvictions: number } | null;\n\tcuration: { enabled: boolean; telemetry: CurationTelemetrySnapshot; lastSkipReason?: string } | null;\n\t/** Background/side-channel spend that does NOT ride in this context but bills the account. */\n\tspawned: { cost: number; reports: number } | null;\n\t/** Send-time-only deltas folded into estimatedRequestTokens: +evidence block, -policy stubs. */\n\tadjustments: { memoryEvidenceTokens: number; enforcementSavedTokens: number };\n\t/** Actionable, bounded observations derived from the numbers above. */\n\tobservations: string[];\n}\n\nexport interface BuildContextCompositionInput {\n\tsystemPrompt: string;\n\ttools: Array<{ name: string; description?: string; parameters?: unknown; source?: \"built-in\" | \"extension\" }>;\n\textensions: Array<{\n\t\tname: string;\n\t\tpath: string;\n\t\ttoolNames: string[];\n\t\tcommandCount: number;\n\t}>;\n\tmessages: AgentMessage[];\n\tproviderReportedTokens: number | null;\n\tcontextWindow: number | null;\n\tgc?: { packedCount: number; savedTokens: number };\n\tenforcement?: { enforcedCount: number; advisoryEvictions: number };\n\tcuration?: { enabled: boolean; telemetry: CurationTelemetrySnapshot; lastSkipReason?: string };\n\tspawned?: { cost: number; reports: number };\n\tadjustments?: { memoryEvidenceTokens: number; enforcementSavedTokens: number };\n}\n\nfunction estimateTextTokens(text: string): number {\n\treturn Math.ceil(text.length / 4);\n}\n\nfunction messageText(message: AgentMessage): string {\n\tconst content = (message as { content?: unknown }).content;\n\tif (typeof content === \"string\") return content;\n\tif (!Array.isArray(content)) return \"\";\n\treturn content\n\t\t.filter((part): part is { type: \"text\"; text: string } => (part as { type?: string }).type === \"text\")\n\t\t.map((part) => part.text)\n\t\t.join(\"\\n\");\n}\n\nfunction classifyMessage(message: AgentMessage): string {\n\tconst details = (\n\t\tmessage as { details?: { contextGc?: { packed?: unknown }; promptPolicy?: { enforced?: unknown } } }\n\t).details;\n\tif (details?.contextGc?.packed === true) return \"gc-packed stub\";\n\tif (details?.promptPolicy?.enforced === true) return \"policy stub\";\n\tif (message.role === \"custom\") {\n\t\tconst customType = (message as { customType?: string }).customType ?? \"\";\n\t\tif (customType === \"memory_context\" || messageText(message).includes(\"<memory_context\")) {\n\t\t\treturn \"memory recall page\";\n\t\t}\n\t\treturn `custom (${customType || \"unknown\"})`;\n\t}\n\tif (message.role === \"toolResult\") return `toolResult (${(message as { toolName?: string }).toolName ?? \"?\"})`;\n\treturn message.role;\n}\n\nexport function buildContextCompositionReport(input: BuildContextCompositionInput): ContextCompositionReport {\n\tconst systemPromptTokens = estimateTextTokens(input.systemPrompt);\n\n\tconst tools: ToolCompositionRow[] = input.tools\n\t\t.map((tool) => ({\n\t\t\tname: tool.name,\n\t\t\tschemaTokens: estimateTextTokens(\n\t\t\t\tJSON.stringify({ name: tool.name, description: tool.description ?? \"\", parameters: tool.parameters ?? {} }),\n\t\t\t),\n\t\t\tsource: tool.source ?? (\"built-in\" as const),\n\t\t}))\n\t\t.sort((a, b) => b.schemaTokens - a.schemaTokens);\n\tconst toolSchemaTokens = tools.reduce((sum, tool) => sum + tool.schemaTokens, 0);\n\tconst toolTokensByName = new Map(tools.map((tool) => [tool.name, tool.schemaTokens]));\n\n\tconst extensions: ExtensionCompositionRow[] = input.extensions\n\t\t.map((extension) => ({\n\t\t\tname: extension.name,\n\t\t\tpath: extension.path,\n\t\t\ttoolCount: extension.toolNames.length,\n\t\t\tcommandCount: extension.commandCount,\n\t\t\tactiveToolSchemaTokens: extension.toolNames.reduce(\n\t\t\t\t(sum, toolName) => sum + (toolTokensByName.get(toolName) ?? 0),\n\t\t\t\t0,\n\t\t\t),\n\t\t}))\n\t\t.sort((a, b) => b.activeToolSchemaTokens - a.activeToolSchemaTokens);\n\n\tconst classes = new Map<string, MessageClassRow>();\n\tlet messageTokens = 0;\n\tfor (const message of input.messages) {\n\t\tconst label = classifyMessage(message);\n\t\tconst tokens = estimateTokens(message);\n\t\tmessageTokens += tokens;\n\t\tconst row = classes.get(label) ?? { label, count: 0, tokens: 0 };\n\t\trow.count++;\n\t\trow.tokens += tokens;\n\t\tclasses.set(label, row);\n\t}\n\tconst messageClasses = [...classes.values()].sort((a, b) => b.tokens - a.tokens);\n\n\tconst adjustments = input.adjustments ?? { memoryEvidenceTokens: 0, enforcementSavedTokens: 0 };\n\tconst estimatedRequestTokens = Math.max(\n\t\t0,\n\t\tsystemPromptTokens +\n\t\t\ttoolSchemaTokens +\n\t\t\tmessageTokens +\n\t\t\tadjustments.memoryEvidenceTokens -\n\t\t\tadjustments.enforcementSavedTokens,\n\t);\n\n\tconst observations: string[] = [];\n\tconst heaviestTool = tools[0];\n\tif (heaviestTool && toolSchemaTokens > 0 && heaviestTool.schemaTokens > Math.max(500, toolSchemaTokens * 0.3)) {\n\t\tobservations.push(\n\t\t\t`tool \"${heaviestTool.name}\" alone is ~${heaviestTool.schemaTokens} tokens of schema on EVERY request — trim its description/schema if you own it`,\n\t\t);\n\t}\n\tconst recall = messageClasses.find((row) => row.label === \"memory recall page\");\n\tif (recall && recall.tokens > 1500) {\n\t\tobservations.push(\n\t\t\t`${recall.count} memory recall page(s) hold ~${recall.tokens} tokens — verify context GC is packing stale ones (gc packed: ${input.gc?.packedCount ?? 0})`,\n\t\t);\n\t}\n\tif (input.contextWindow && systemPromptTokens + toolSchemaTokens > input.contextWindow * 0.35) {\n\t\tobservations.push(\n\t\t\t`fixed per-request overhead (system+tools) is ~${Math.round(((systemPromptTokens + toolSchemaTokens) / input.contextWindow) * 100)}% of the context window before any conversation`,\n\t\t);\n\t}\n\tif (input.providerReportedTokens !== null) {\n\t\tconst delta = input.providerReportedTokens - estimatedRequestTokens;\n\t\tif (Math.abs(delta) > Math.max(2000, estimatedRequestTokens * 0.25)) {\n\t\t\tobservations.push(\n\t\t\t\t`provider-reported context (${input.providerReportedTokens}) differs from the estimate by ${delta > 0 ? \"+\" : \"\"}${delta} tokens — treat estimates as directional`,\n\t\t\t);\n\t\t}\n\t}\n\tif (input.curation?.enabled && input.curation.lastSkipReason) {\n\t\tobservations.push(`curation is enabled but idle: ${input.curation.lastSkipReason}`);\n\t}\n\n\treturn {\n\t\tsystemPromptTokens,\n\t\tsystemPromptChars: input.systemPrompt.length,\n\t\ttoolSchemaTokens,\n\t\ttools,\n\t\textensions,\n\t\tmessageClasses,\n\t\tmessageTokens,\n\t\tmessageCount: input.messages.length,\n\t\testimatedRequestTokens,\n\t\tproviderReportedTokens: input.providerReportedTokens,\n\t\tcontextWindow: input.contextWindow,\n\t\tgc: input.gc ?? null,\n\t\tenforcement: input.enforcement ?? null,\n\t\tcuration: input.curation ?? null,\n\t\tspawned: input.spawned ?? null,\n\t\tadjustments,\n\t\tobservations,\n\t};\n}\n\n/** Bounded plain-text dashboard (interactive `/context` command and tests). */\nexport function formatContextCompositionDashboard(report: ContextCompositionReport, maxToolRows = 10): string {\n\tconst pct = (tokens: number) =>\n\t\treport.contextWindow ? ` (${((tokens / report.contextWindow) * 100).toFixed(1)}% of window)` : \"\";\n\tconst lines: string[] = [\n\t\t\"Context composition — what rides on EVERY request\",\n\t\t`estimated request total: ~${report.estimatedRequestTokens} tokens${pct(report.estimatedRequestTokens)}${\n\t\t\treport.providerReportedTokens !== null ? ` · provider-reported: ${report.providerReportedTokens}` : \"\"\n\t\t}`,\n\t\t\"\",\n\t\t`system prompt: ~${report.systemPromptTokens} tokens (${report.systemPromptChars} chars)`,\n\t\t`tool schemas: ~${report.toolSchemaTokens} tokens across ${report.tools.length} active tool(s)`,\n\t];\n\tfor (const tool of report.tools.slice(0, maxToolRows)) {\n\t\tlines.push(` - ${tool.name}: ~${tool.schemaTokens} tok [${tool.source}]`);\n\t}\n\tif (report.tools.length > maxToolRows) {\n\t\tconst rest = report.tools.slice(maxToolRows).reduce((sum, tool) => sum + tool.schemaTokens, 0);\n\t\tlines.push(` - (+${report.tools.length - maxToolRows} more: ~${rest} tok)`);\n\t}\n\tif (report.extensions.length > 0) {\n\t\tlines.push(\"\", \"extensions:\");\n\t\tfor (const extension of report.extensions.slice(0, 8)) {\n\t\t\tlines.push(\n\t\t\t\t` - ${extension.name}: ${extension.toolCount} tool(s), ${extension.commandCount} command(s), ~${extension.activeToolSchemaTokens} tok of active schemas`,\n\t\t\t);\n\t\t}\n\t}\n\tlines.push(\"\", `session messages: ${report.messageCount} row(s), ~${report.messageTokens} tokens`);\n\tif (report.adjustments.memoryEvidenceTokens > 0 || report.adjustments.enforcementSavedTokens > 0) {\n\t\tlines.push(\n\t\t\t`send-time adjustments: +${report.adjustments.memoryEvidenceTokens} memory evidence, -${report.adjustments.enforcementSavedTokens} policy stubs (applied when the request is built)`,\n\t\t);\n\t}\n\tfor (const row of report.messageClasses.slice(0, 10)) {\n\t\tlines.push(` - ${row.label}: ${row.count} row(s), ~${row.tokens} tok`);\n\t}\n\tif (report.gc) {\n\t\tlines.push(\n\t\t\t\"\",\n\t\t\t`context GC: ${report.gc.packedCount} row(s) packed, ~${report.gc.savedTokens} tokens saved this pass`,\n\t\t);\n\t}\n\tif (report.enforcement) {\n\t\tlines.push(\n\t\t\t`prompt policy: ${report.enforcement.enforcedCount} stub(s) this turn (${report.enforcement.advisoryEvictions} via brain advisory)`,\n\t\t);\n\t}\n\tif (report.curation) {\n\t\tconst t = report.curation.telemetry;\n\t\tlines.push(\n\t\t\t`brain curation: ${report.curation.enabled ? \"enabled\" : \"disabled\"} — ${t.jobsRun} job(s) run, ${t.parseFailures} parse failure(s), ${t.queued} queued, ~${Math.ceil(t.localChars / 4)} tokens processed locally${\n\t\t\t\treport.curation.lastSkipReason ? ` · last skip: ${report.curation.lastSkipReason}` : \"\"\n\t\t\t}`,\n\t\t);\n\t}\n\tif (report.spawned && report.spawned.reports > 0) {\n\t\tlines.push(\n\t\t\t`spawned/background spend (NOT in this context): ${report.spawned.reports} report(s), $${report.spawned.cost.toFixed(4)}`,\n\t\t);\n\t}\n\tif (report.observations.length > 0) {\n\t\tlines.push(\"\", \"observations:\");\n\t\tfor (const observation of report.observations.slice(0, 5)) {\n\t\t\tlines.push(` ! ${observation}`);\n\t\t}\n\t}\n\treturn lines.join(\"\\n\");\n}\n"]}
|