maestro-agent-sdk 0.1.27 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/agent.d.ts +28 -0
- package/dist/core/agent.d.ts.map +1 -1
- package/dist/core/agent.js +2 -0
- package/dist/core/agent.js.map +1 -1
- package/dist/core/is-abort-error.d.ts +18 -0
- package/dist/core/is-abort-error.d.ts.map +1 -1
- package/dist/core/is-abort-error.js +34 -0
- package/dist/core/is-abort-error.js.map +1 -1
- package/dist/core/loop.d.ts.map +1 -1
- package/dist/core/loop.js +69 -14
- package/dist/core/loop.js.map +1 -1
- package/dist/core/tool-result-truncation.d.ts +34 -0
- package/dist/core/tool-result-truncation.d.ts.map +1 -0
- package/dist/core/tool-result-truncation.js +162 -0
- package/dist/core/tool-result-truncation.js.map +1 -0
- package/dist/index.d.ts +6 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -3
- package/dist/index.js.map +1 -1
- package/dist/memory/active-task-template.d.ts +7 -4
- package/dist/memory/active-task-template.d.ts.map +1 -1
- package/dist/memory/active-task-template.js +19 -4
- package/dist/memory/active-task-template.js.map +1 -1
- package/dist/memory/aux-model-map.d.ts +49 -0
- package/dist/memory/aux-model-map.d.ts.map +1 -0
- package/dist/memory/aux-model-map.js +103 -0
- package/dist/memory/aux-model-map.js.map +1 -0
- package/dist/memory/compressor.d.ts +34 -59
- package/dist/memory/compressor.d.ts.map +1 -1
- package/dist/memory/compressor.js +222 -89
- package/dist/memory/compressor.js.map +1 -1
- package/dist/provider.d.ts +2 -2
- package/dist/provider.d.ts.map +1 -1
- package/dist/provider.js +32 -4
- package/dist/provider.js.map +1 -1
- package/dist/providers/codex-auth.d.ts.map +1 -1
- package/dist/providers/codex-auth.js +20 -0
- package/dist/providers/codex-auth.js.map +1 -1
- package/dist/providers/codex.d.ts +29 -1
- package/dist/providers/codex.d.ts.map +1 -1
- package/dist/providers/codex.js +94 -11
- package/dist/providers/codex.js.map +1 -1
- package/dist/types.d.ts +24 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -11,6 +11,7 @@ export const MAESTRO_UPSTREAM_SNAPSHOT = "v0.13.0 (2026-05-07)";
|
|
|
11
11
|
export { AIAgent } from "./core/agent.js";
|
|
12
12
|
export { runConversation } from "./core/loop.js";
|
|
13
13
|
export { ACTIVE_TASK_TEMPLATE, wrapCompactedSummary } from "./memory/active-task-template.js";
|
|
14
|
+
export { resolveAuxModel } from "./memory/aux-model-map.js";
|
|
14
15
|
// ─── Memory / compression ────────────────────────────────────────────────────
|
|
15
16
|
export { compressIfNeeded } from "./memory/compressor.js";
|
|
16
17
|
export { hashToolContent } from "./memory/hash.js";
|
|
@@ -20,14 +21,14 @@ export { onShutdown, runShutdown } from "./platform/lifecycle.js";
|
|
|
20
21
|
// ─── Host integration points (dependency injection) ──────────────────────────
|
|
21
22
|
export { setLogger } from "./platform/logger.js";
|
|
22
23
|
export { setMcpResolver } from "./platform/mcp-config.js";
|
|
23
|
-
export { applySkillAllowlist, DEFAULT_MAX_ITERATIONS, isAbortError, iterationBudgetLine, MAESTRO_DEFAULT_SKILL_KEY, maestroProvider, providerForModel, resolveSkillsDir, wrapUpOverlayLine, } from "./provider.js";
|
|
24
|
+
export { applySkillAllowlist, DEFAULT_MAX_ITERATIONS, isAbortError, isTimeoutError, iterationBudgetLine, MAESTRO_DEFAULT_SKILL_KEY, maestroProvider, providerForModel, resolveSkillsDir, wrapUpOverlayLine, } from "./provider.js";
|
|
24
25
|
export { AnthropicProvider, applyThinkingBudget, buildCacheableMessages, buildCacheableSystem, buildCacheableTools, detectThinkingKeyword, effortToPersonaPrompt, effortToThinkingBudget, isWrapUpZone, thinkingBudgetForTurn, } from "./providers/anthropic.js";
|
|
25
|
-
export { DeepseekProvider, effortForDeepseek, translateMessagesToOpenAI, translateToolsToOpenAI, } from "./providers/deepseek.js";
|
|
26
26
|
// ─── Codex Responses API (ChatGPT OAuth) ─────────────────────────────────────
|
|
27
27
|
export { CodexResponsesProvider, effortForCodex, } from "./providers/codex.js";
|
|
28
28
|
export { accessTokenExpiresAt, accessTokenIsExpiring, CodexAuthError, cloudflareHeaders as codexCloudflareHeaders, codexAuthPath, decodeJwtClaims, extractAccountId, readCodexAuth, refreshAccessToken, resolveAccessToken, writeRefreshedTokens, } from "./providers/codex-auth.js";
|
|
29
|
-
export { parseCodexStream
|
|
29
|
+
export { parseCodexStream } from "./providers/codex-stream.js";
|
|
30
30
|
export { translateMessagesToResponses, translateMessageToResponsesItems, translateToolsToResponses, } from "./providers/codex-translators.js";
|
|
31
|
+
export { DeepseekProvider, effortForDeepseek, translateMessagesToOpenAI, translateToolsToOpenAI, } from "./providers/deepseek.js";
|
|
31
32
|
// ─── Maestro registry + top-level provider entry point ───────────────────────
|
|
32
33
|
export { maestroRegistry } from "./registry.js";
|
|
33
34
|
// ─── Session store ───────────────────────────────────────────────────────────
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,MAAM,CAAC,MAAM,yBAAyB,GAAG,sBAA+B,CAAC;AAUzE,gFAAgF;AAChF,OAAO,EAAE,OAAO,EAAsB,MAAM,cAAc,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,MAAM,CAAC,MAAM,yBAAyB,GAAG,sBAA+B,CAAC;AAUzE,gFAAgF;AAChF,OAAO,EAAE,OAAO,EAAsB,MAAM,cAAc,CAAC;AAC3D,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAS9C,OAAO,EAAE,oBAAoB,EAAE,oBAAoB,EAAE,MAAM,+BAA+B,CAAC;AAC3F,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,gFAAgF;AAChF,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAC/D,gFAAgF;AAChF,OAAO,EAA2B,SAAS,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAuC,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAC5F,OAAO,EACL,mBAAmB,EACnB,sBAAsB,EACtB,YAAY,EACZ,cAAc,EACd,mBAAmB,EACnB,yBAAyB,EACzB,eAAe,EACf,gBAAgB,EAChB,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,iBAAiB,EACjB,mBAAmB,EACnB,sBAAsB,EACtB,oBAAoB,EACpB,mBAAmB,EACnB,qBAAqB,EACrB,qBAAqB,EACrB,sBAAsB,EACtB,YAAY,EACZ,qBAAqB,GACtB,MAAM,uBAAuB,CAAC;AAe/B,gFAAgF;AAChF,OAAO,EACL,sBAAsB,EACtB,cAAc,GACf,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EACL,oBAAoB,EACpB,qBAAqB,EACrB,cAAc,EAEd,iBAAiB,IAAI,sBAAsB,EAC3C,aAAa,EACb,eAAe,EACf,gBAAgB,EAChB,aAAa,EACb,kBAAkB,EAClB,kBAAkB,EAClB,oBAAoB,GACrB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAIL,4BAA4B,EAC5B,gCAAgC,EAChC,yBAAyB,GAC1B,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACL,gBAAgB,EAChB,iBAAiB,EACjB,yBAAyB,EACzB,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAC9B,gFAAgF;AAChF,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAC7C,gFAAgF;AAChF,OAAO,EACL,2BAA2B,EAC3B,8BAA8B,EAC9B,oBAAoB,EAGpB,aAAa,EACb,sBAAsB,EAEtB,kBAAkB,GACnB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,gFAAgF;AAChF,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAmB,MAAM,iBAAiB,CAAC;AACrF,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAsB,MAAM,gBAAgB,CAAC;AACzE,gFAAgF;AAChF,OAAO,EAAE,aAAa,EAAE,YAAY,EAAmC,MAAM,eAAe,CAAC;AAE7F,OAAO,EAA2B,qBAAqB,EAAE,MAAM,yBAAyB,CAAC;AACzF,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,gFAAgF;AAChF,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAChE,OAAO,EAEL,4BAA4B,EAC5B,oBAAoB,EACpB,kBAAkB,GACnB,MAAM,iCAAiC,CAAC;AACzC,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAC7D,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AACjE,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EACL,oBAAoB,EACpB,iBAAiB,EACjB,kBAAkB,EAClB,oBAAoB,GACrB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC/E,gFAAgF;AAChF,OAAO,EAUL,YAAY,GACb,MAAM,kBAAkB,CAAC;AAc1B,OAAO,EACL,cAAc,EACd,WAAW,EACX,qBAAqB,EACrB,gBAAgB,GACjB,MAAM,SAAS,CAAC"}
|
|
@@ -12,16 +12,19 @@
|
|
|
12
12
|
* pending**. Without an explicit "Pending" section the summary
|
|
13
13
|
* degenerates into a recap; the model loses track of the actual task
|
|
14
14
|
* it should be working on next.
|
|
15
|
-
* - The headers
|
|
16
|
-
*
|
|
17
|
-
*
|
|
15
|
+
* - The headers are calibrated against many real compaction events on
|
|
16
|
+
* long sessions: they preserve the current task, durable constraints,
|
|
17
|
+
* decisions already made, pending work, next steps, relevant files, and
|
|
18
|
+
* recent tool context. v0.1.28 expands the schema from five to eight
|
|
19
|
+
* headers; expect one prompt-cache miss at upgrade time, then keep the
|
|
20
|
+
* new header order stable for future compactions.
|
|
18
21
|
*
|
|
19
22
|
* Upstream reference: `hermes-agent/agent/context_compressor.py`
|
|
20
23
|
* (look for the "ACTIVE_TASK_SUMMARY_TEMPLATE" / "compression_system_prompt"
|
|
21
24
|
* constants). We keep the schema verbatim so summaries between agents stay
|
|
22
25
|
* mutually intelligible if the topic later switches via set_agent.
|
|
23
26
|
*/
|
|
24
|
-
export declare const ACTIVE_TASK_TEMPLATE = "You are compressing a long agent conversation so the main agent can continue\nwithout losing context. Produce a single concise summary using EXACTLY these\nsection headers, in this order:\n\n## Active Task\nOne sentence: what is the agent currently working on?\n\n## Goal\nOne or two sentences: the user's overall objective in this session.\n\n## Pending\nBulleted list of unresolved items, decisions to make, or work explicitly\ndeferred. Use \"(blocked: <reason>)\" when applicable.\n\n## Files\nBulleted list of `absolute/paths` touched or referenced (read, written,\ninspected). Skip if none.\n\n## Recent context\n3\u20135 bullets capturing the most recent tool calls + their salient outputs.\nPrefer specifics (paths, line numbers, exit codes, key values) over generic\nrecaps. Skip details that have no bearing on the next step.\n\nRULES:\n- Output ONLY the
|
|
27
|
+
export declare const ACTIVE_TASK_TEMPLATE = "You are compressing a long agent conversation so the main agent can continue\nwithout losing context. Produce a single concise summary using EXACTLY these\nsection headers, in this order:\n\n## Active Task\nOne sentence: what is the agent currently working on?\n\n## Goal\nOne or two sentences: the user's overall objective in this session.\n\n## Constraints\nBulleted list of durable requirements, user preferences, technical limits, or\nprocess rules that should continue to govern the work. Skip if none.\n\n## Key Decisions\nBulleted list of decisions already made that should not be reopened unless the\nuser asks. Include the rationale when it is short and important. Skip if none.\n\n## Pending\nBulleted list of unresolved items, decisions to make, or work explicitly\ndeferred. Use \"(blocked: <reason>)\" when applicable.\n\n## Next Steps\nBulleted list of the concrete next actions the main agent should take after\ncompaction, in likely execution order. Skip if none.\n\n## Files\nBulleted list of `absolute/paths` touched or referenced (read, written,\ninspected). Skip if none.\n\n## Recent context\n3\u20135 bullets capturing the most recent tool calls + their salient outputs.\nPrefer specifics (paths, line numbers, exit codes, key values) over generic\nrecaps. Skip details that have no bearing on the next step.\n\nRULES:\n- Output ONLY the eight sections above, with no preamble or postscript.\n- Do NOT echo the user's words verbatim \u2014 paraphrase tightly.\n- Do NOT invent file paths or facts not present in the transcript.\n- Keep the entire summary under 1500 words.";
|
|
25
28
|
/** Header line for the summary user message that the main loop sees in
|
|
26
29
|
* place of the compressed history. Surrounded by visible markers so the
|
|
27
30
|
* main model recognizes this as a system-injected compaction, not normal
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"active-task-template.d.ts","sourceRoot":"","sources":["../../src/memory/active-task-template.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"active-task-template.d.ts","sourceRoot":"","sources":["../../src/memory/active-task-template.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,eAAO,MAAM,oBAAoB,+jDAuCW,CAAC;AAE7C;;;;mEAImE;AACnE,eAAO,MAAM,qBAAqB,wBAAwB,CAAC;AAC3D,eAAO,MAAM,sBAAsB,yBAAyB,CAAC;AAE7D,8EAA8E;AAC9E,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAE5D"}
|
|
@@ -12,9 +12,12 @@
|
|
|
12
12
|
* pending**. Without an explicit "Pending" section the summary
|
|
13
13
|
* degenerates into a recap; the model loses track of the actual task
|
|
14
14
|
* it should be working on next.
|
|
15
|
-
* - The headers
|
|
16
|
-
*
|
|
17
|
-
*
|
|
15
|
+
* - The headers are calibrated against many real compaction events on
|
|
16
|
+
* long sessions: they preserve the current task, durable constraints,
|
|
17
|
+
* decisions already made, pending work, next steps, relevant files, and
|
|
18
|
+
* recent tool context. v0.1.28 expands the schema from five to eight
|
|
19
|
+
* headers; expect one prompt-cache miss at upgrade time, then keep the
|
|
20
|
+
* new header order stable for future compactions.
|
|
18
21
|
*
|
|
19
22
|
* Upstream reference: `hermes-agent/agent/context_compressor.py`
|
|
20
23
|
* (look for the "ACTIVE_TASK_SUMMARY_TEMPLATE" / "compression_system_prompt"
|
|
@@ -31,10 +34,22 @@ One sentence: what is the agent currently working on?
|
|
|
31
34
|
## Goal
|
|
32
35
|
One or two sentences: the user's overall objective in this session.
|
|
33
36
|
|
|
37
|
+
## Constraints
|
|
38
|
+
Bulleted list of durable requirements, user preferences, technical limits, or
|
|
39
|
+
process rules that should continue to govern the work. Skip if none.
|
|
40
|
+
|
|
41
|
+
## Key Decisions
|
|
42
|
+
Bulleted list of decisions already made that should not be reopened unless the
|
|
43
|
+
user asks. Include the rationale when it is short and important. Skip if none.
|
|
44
|
+
|
|
34
45
|
## Pending
|
|
35
46
|
Bulleted list of unresolved items, decisions to make, or work explicitly
|
|
36
47
|
deferred. Use "(blocked: <reason>)" when applicable.
|
|
37
48
|
|
|
49
|
+
## Next Steps
|
|
50
|
+
Bulleted list of the concrete next actions the main agent should take after
|
|
51
|
+
compaction, in likely execution order. Skip if none.
|
|
52
|
+
|
|
38
53
|
## Files
|
|
39
54
|
Bulleted list of \`absolute/paths\` touched or referenced (read, written,
|
|
40
55
|
inspected). Skip if none.
|
|
@@ -45,7 +60,7 @@ Prefer specifics (paths, line numbers, exit codes, key values) over generic
|
|
|
45
60
|
recaps. Skip details that have no bearing on the next step.
|
|
46
61
|
|
|
47
62
|
RULES:
|
|
48
|
-
- Output ONLY the
|
|
63
|
+
- Output ONLY the eight sections above, with no preamble or postscript.
|
|
49
64
|
- Do NOT echo the user's words verbatim — paraphrase tightly.
|
|
50
65
|
- Do NOT invent file paths or facts not present in the transcript.
|
|
51
66
|
- Keep the entire summary under 1500 words.`;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"active-task-template.js","sourceRoot":"","sources":["../../src/memory/active-task-template.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"active-task-template.js","sourceRoot":"","sources":["../../src/memory/active-task-template.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,MAAM,CAAC,MAAM,oBAAoB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;4CAuCQ,CAAC;AAE7C;;;;mEAImE;AACnE,MAAM,CAAC,MAAM,qBAAqB,GAAG,qBAAqB,CAAC;AAC3D,MAAM,CAAC,MAAM,sBAAsB,GAAG,sBAAsB,CAAC;AAE7D,8EAA8E;AAC9E,MAAM,UAAU,oBAAoB,CAAC,OAAe;IAClD,OAAO,GAAG,qBAAqB,KAAK,OAAO,CAAC,IAAI,EAAE,KAAK,sBAAsB,EAAE,CAAC;AAClF,CAAC"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Map a main model id to the cheapest sibling on the same provider for
|
|
3
|
+
* background context compaction.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists
|
|
6
|
+
* ---------------
|
|
7
|
+
* `compressIfNeeded` (memory/compressor.ts) reuses the agent's main model
|
|
8
|
+
* for its summary call by default. That's fine when the main model is
|
|
9
|
+
* already a cheap/fast tier (Haiku, deepseek-flash, gpt-5.4-mini), but
|
|
10
|
+
* pathological for the heavy tiers:
|
|
11
|
+
*
|
|
12
|
+
* - gpt-5.5: a 330K-token compaction call balloons the request body to
|
|
13
|
+
* ~1.3 MB and routinely hits undici's 5-minute headersTimeout. The
|
|
14
|
+
* v0.1.28 diagnostic logs caught this in production — both the main
|
|
15
|
+
* turn *and* the aux LLM compaction call timed out on the same
|
|
16
|
+
* response cycle, leaving the loop with a prune-only fallback.
|
|
17
|
+
* - claude-opus: each compaction costs 4-8× a Haiku call for the same
|
|
18
|
+
* summary quality.
|
|
19
|
+
* - deepseek-v4-pro: same shape as gpt-5.5 — the heavy reasoning model
|
|
20
|
+
* wasted on a structural summary pass.
|
|
21
|
+
*
|
|
22
|
+
* The fix is one indirection: pick the cheapest sibling on the *same
|
|
23
|
+
* provider* (so we don't have to swap clients) and route compaction
|
|
24
|
+
* through it. Cross-provider swaps (e.g. gpt-5.5 main + Haiku aux) would
|
|
25
|
+
* require the host to wire a second provider client, so we stay
|
|
26
|
+
* intra-provider here and let hosts override via `AIAgentConfig.auxModel`
|
|
27
|
+
* if they want something fancier.
|
|
28
|
+
*
|
|
29
|
+
* Returns the input unchanged when the main model is already the cheapest
|
|
30
|
+
* sibling on its provider, or when we don't recognize it (foreign slugs
|
|
31
|
+
* pass through so a host's custom model id doesn't get silently rewritten).
|
|
32
|
+
*/
|
|
33
|
+
/**
|
|
34
|
+
* Resolve the aux (compaction) model id for a given main model.
|
|
35
|
+
*
|
|
36
|
+
* - Known main model with a cheaper sibling → that sibling.
|
|
37
|
+
* - Known main model that *is* the cheapest sibling → itself (no-op,
|
|
38
|
+
* callers can still route compaction through it without a second
|
|
39
|
+
* network identity).
|
|
40
|
+
* - Unknown slug → returns the input unchanged. Lets hosts pin custom
|
|
41
|
+
* model ids (proxies, alternate gateways) without us guessing wrong.
|
|
42
|
+
*
|
|
43
|
+
* Prefix fallbacks catch model ids that look like a known family but
|
|
44
|
+
* differ in version suffix (e.g. `claude-sonnet-4-7`, `deepseek-v5-pro`).
|
|
45
|
+
* They are conservative — only match when the slug clearly belongs to
|
|
46
|
+
* one of the three supported providers.
|
|
47
|
+
*/
|
|
48
|
+
export declare function resolveAuxModel(mainModel: string): string;
|
|
49
|
+
//# sourceMappingURL=aux-model-map.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"aux-model-map.d.ts","sourceRoot":"","sources":["../../src/memory/aux-model-map.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAsDH;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM,CAkBzD"}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Map a main model id to the cheapest sibling on the same provider for
|
|
3
|
+
* background context compaction.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists
|
|
6
|
+
* ---------------
|
|
7
|
+
* `compressIfNeeded` (memory/compressor.ts) reuses the agent's main model
|
|
8
|
+
* for its summary call by default. That's fine when the main model is
|
|
9
|
+
* already a cheap/fast tier (Haiku, deepseek-flash, gpt-5.4-mini), but
|
|
10
|
+
* pathological for the heavy tiers:
|
|
11
|
+
*
|
|
12
|
+
* - gpt-5.5: a 330K-token compaction call balloons the request body to
|
|
13
|
+
* ~1.3 MB and routinely hits undici's 5-minute headersTimeout. The
|
|
14
|
+
* v0.1.28 diagnostic logs caught this in production — both the main
|
|
15
|
+
* turn *and* the aux LLM compaction call timed out on the same
|
|
16
|
+
* response cycle, leaving the loop with a prune-only fallback.
|
|
17
|
+
* - claude-opus: each compaction costs 4-8× a Haiku call for the same
|
|
18
|
+
* summary quality.
|
|
19
|
+
* - deepseek-v4-pro: same shape as gpt-5.5 — the heavy reasoning model
|
|
20
|
+
* wasted on a structural summary pass.
|
|
21
|
+
*
|
|
22
|
+
* The fix is one indirection: pick the cheapest sibling on the *same
|
|
23
|
+
* provider* (so we don't have to swap clients) and route compaction
|
|
24
|
+
* through it. Cross-provider swaps (e.g. gpt-5.5 main + Haiku aux) would
|
|
25
|
+
* require the host to wire a second provider client, so we stay
|
|
26
|
+
* intra-provider here and let hosts override via `AIAgentConfig.auxModel`
|
|
27
|
+
* if they want something fancier.
|
|
28
|
+
*
|
|
29
|
+
* Returns the input unchanged when the main model is already the cheapest
|
|
30
|
+
* sibling on its provider, or when we don't recognize it (foreign slugs
|
|
31
|
+
* pass through so a host's custom model id doesn't get silently rewritten).
|
|
32
|
+
*/
|
|
33
|
+
import { MODEL_CODEX_GPT5_2, MODEL_CODEX_GPT5_3_CODEX, MODEL_CODEX_GPT5_4, MODEL_CODEX_GPT5_4_MINI, MODEL_CODEX_GPT5_5, MODEL_DEEPSEEK_V4_FLASH, MODEL_DEEPSEEK_V4_PRO, MODEL_OPUS, MODEL_SONNET, } from "../platform/config.js";
|
|
34
|
+
const CLAUDE_HAIKU = "claude-haiku-4-5";
|
|
35
|
+
/**
|
|
36
|
+
* Per-family aux model. Pinned by family, not cheapest-strictly: the host
|
|
37
|
+
* picks the model it considers the right balance of summary quality and
|
|
38
|
+
* latency for each provider line.
|
|
39
|
+
*
|
|
40
|
+
* - Claude family → sonnet. Haiku turned out too lossy for production
|
|
41
|
+
* summary work; sonnet is the chosen aux tier even when main is opus
|
|
42
|
+
* (compaction is bounded so the cost differential is small) or when
|
|
43
|
+
* main is haiku (the floor is sonnet either way).
|
|
44
|
+
* - DeepSeek family → flash.
|
|
45
|
+
* - Codex (gpt-5.x) family → gpt-5.4-mini.
|
|
46
|
+
*
|
|
47
|
+
* The function stays total — callers always get a usable model id back —
|
|
48
|
+
* and the same Provider client handles both main and aux, so no extra
|
|
49
|
+
* client wiring is required at the host layer.
|
|
50
|
+
*/
|
|
51
|
+
const AUX_MODEL_BY_MAIN = {
|
|
52
|
+
// Anthropic (Claude). Sonnet on every tier — host preference, not
|
|
53
|
+
// strictly cheapest. Haiku is intentionally avoided as an aux target
|
|
54
|
+
// because the summaries it produces dropped too much structural info
|
|
55
|
+
// for downstream resume turns.
|
|
56
|
+
[MODEL_OPUS]: MODEL_SONNET,
|
|
57
|
+
[MODEL_SONNET]: MODEL_SONNET,
|
|
58
|
+
[CLAUDE_HAIKU]: MODEL_SONNET,
|
|
59
|
+
// DeepSeek V4
|
|
60
|
+
[MODEL_DEEPSEEK_V4_PRO]: MODEL_DEEPSEEK_V4_FLASH,
|
|
61
|
+
[MODEL_DEEPSEEK_V4_FLASH]: MODEL_DEEPSEEK_V4_FLASH,
|
|
62
|
+
// Codex (gpt-5.x). gpt-5.4-mini is the cheapest tier accepted by the
|
|
63
|
+
// Responses endpoint, so every heavier slug routes to it.
|
|
64
|
+
[MODEL_CODEX_GPT5_5]: MODEL_CODEX_GPT5_4_MINI,
|
|
65
|
+
[MODEL_CODEX_GPT5_4]: MODEL_CODEX_GPT5_4_MINI,
|
|
66
|
+
[MODEL_CODEX_GPT5_4_MINI]: MODEL_CODEX_GPT5_4_MINI,
|
|
67
|
+
[MODEL_CODEX_GPT5_3_CODEX]: MODEL_CODEX_GPT5_4_MINI,
|
|
68
|
+
[MODEL_CODEX_GPT5_2]: MODEL_CODEX_GPT5_4_MINI,
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Resolve the aux (compaction) model id for a given main model.
|
|
72
|
+
*
|
|
73
|
+
* - Known main model with a cheaper sibling → that sibling.
|
|
74
|
+
* - Known main model that *is* the cheapest sibling → itself (no-op,
|
|
75
|
+
* callers can still route compaction through it without a second
|
|
76
|
+
* network identity).
|
|
77
|
+
* - Unknown slug → returns the input unchanged. Lets hosts pin custom
|
|
78
|
+
* model ids (proxies, alternate gateways) without us guessing wrong.
|
|
79
|
+
*
|
|
80
|
+
* Prefix fallbacks catch model ids that look like a known family but
|
|
81
|
+
* differ in version suffix (e.g. `claude-sonnet-4-7`, `deepseek-v5-pro`).
|
|
82
|
+
* They are conservative — only match when the slug clearly belongs to
|
|
83
|
+
* one of the three supported providers.
|
|
84
|
+
*/
|
|
85
|
+
export function resolveAuxModel(mainModel) {
|
|
86
|
+
const exact = AUX_MODEL_BY_MAIN[mainModel];
|
|
87
|
+
if (exact !== undefined)
|
|
88
|
+
return exact;
|
|
89
|
+
// Prefix-based fallback for forward-compat with new minor versions.
|
|
90
|
+
// Any claude-* slug routes to sonnet (host preference); any heavy gpt-5.*
|
|
91
|
+
// routes to mini; any deepseek-pro variant routes to flash.
|
|
92
|
+
if (mainModel.startsWith("claude-")) {
|
|
93
|
+
return MODEL_SONNET;
|
|
94
|
+
}
|
|
95
|
+
if (mainModel === "deepseek-pro" || (mainModel.startsWith("deepseek-v") && mainModel.includes("pro"))) {
|
|
96
|
+
return MODEL_DEEPSEEK_V4_FLASH;
|
|
97
|
+
}
|
|
98
|
+
if (mainModel.startsWith("gpt-5") && !mainModel.includes("mini")) {
|
|
99
|
+
return MODEL_CODEX_GPT5_4_MINI;
|
|
100
|
+
}
|
|
101
|
+
return mainModel;
|
|
102
|
+
}
|
|
103
|
+
//# sourceMappingURL=aux-model-map.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"aux-model-map.js","sourceRoot":"","sources":["../../src/memory/aux-model-map.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,OAAO,EACL,kBAAkB,EAClB,wBAAwB,EACxB,kBAAkB,EAClB,uBAAuB,EACvB,kBAAkB,EAClB,uBAAuB,EACvB,qBAAqB,EACrB,UAAU,EACV,YAAY,GACb,MAAM,mBAAmB,CAAC;AAE3B,MAAM,YAAY,GAAG,kBAAkB,CAAC;AAExC;;;;;;;;;;;;;;;GAeG;AACH,MAAM,iBAAiB,GAA2B;IAChD,kEAAkE;IAClE,qEAAqE;IACrE,qEAAqE;IACrE,+BAA+B;IAC/B,CAAC,UAAU,CAAC,EAAE,YAAY;IAC1B,CAAC,YAAY,CAAC,EAAE,YAAY;IAC5B,CAAC,YAAY,CAAC,EAAE,YAAY;IAE5B,cAAc;IACd,CAAC,qBAAqB,CAAC,EAAE,uBAAuB;IAChD,CAAC,uBAAuB,CAAC,EAAE,uBAAuB;IAElD,qEAAqE;IACrE,0DAA0D;IAC1D,CAAC,kBAAkB,CAAC,EAAE,uBAAuB;IAC7C,CAAC,kBAAkB,CAAC,EAAE,uBAAuB;IAC7C,CAAC,uBAAuB,CAAC,EAAE,uBAAuB;IAClD,CAAC,wBAAwB,CAAC,EAAE,uBAAuB;IACnD,CAAC,kBAAkB,CAAC,EAAE,uBAAuB;CAC9C,CAAC;AAEF;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,eAAe,CAAC,SAAiB;IAC/C,MAAM,KAAK,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAC3C,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IAEtC,oEAAoE;IACpE,0EAA0E;IAC1E,4DAA4D;IAC5D,IAAI,SAAS,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACpC,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,IAAI,SAAS,KAAK,cAAc,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,YAAY,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;QACtG,OAAO,uBAAuB,CAAC;IACjC,CAAC;IACD,IAAI,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACjE,OAAO,uBAAuB,CAAC;IACjC,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC"}
|
|
@@ -1,90 +1,65 @@
|
|
|
1
1
|
import type { Provider, ProviderMessage } from "../providers/base.js";
|
|
2
2
|
/**
|
|
3
|
-
* Maestro context auto-compaction.
|
|
3
|
+
* Maestro context auto-compaction (OpenCode-style incremental).
|
|
4
4
|
*
|
|
5
5
|
* When estimated tokens exceed `triggerRatio` × `contextWindow`, dispatch an
|
|
6
|
-
* aux LLM call
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* aux LLM call to summarize the middle slice of the conversation. The
|
|
7
|
+
* resulting summary is appended to the canonical `messages` array as a
|
|
8
|
+
* compaction user + summary assistant pair so it survives persist → resume.
|
|
9
|
+
*
|
|
10
|
+
* Wire structure (returned to loop.ts):
|
|
9
11
|
*
|
|
10
12
|
* [ ...head_protected, { role: "user", content: "<compacted-history>..." },
|
|
11
13
|
* ...tail_protected ]
|
|
12
14
|
*
|
|
13
|
-
*
|
|
14
|
-
* - Head: the first user prompt + first assistant turn anchor the user's
|
|
15
|
-
* actual ask. Losing them to summarization makes the next compaction
|
|
16
|
-
* produce a recap with no goal, which cascades into hallucination.
|
|
17
|
-
* - Tail: the last few turns are the model's working memory; folding them
|
|
18
|
-
* into a summary destroys the in-progress reasoning.
|
|
19
|
-
*
|
|
20
|
-
* The middle is what gets compressed. We feed the aux LLM the raw
|
|
21
|
-
* ProviderMessage[] slice (with the Active Task system prompt) and replace
|
|
22
|
-
* it in the returned array with a single user message containing the fenced
|
|
23
|
-
* summary. Anthropic's tool_use/tool_result pairing requirement is
|
|
24
|
-
* preserved by snapping head/tail boundaries to message boundaries (we
|
|
25
|
-
* never split a user/assistant pair).
|
|
15
|
+
* Compaction blocks in messages (persisted):
|
|
26
16
|
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* - Aux LLM call fails → fall back to pruneMessages only and log a
|
|
30
|
-
* warning. The turn proceeds; we never throw because that would break
|
|
31
|
-
* the user's in-flight conversation over an optimization.
|
|
32
|
-
* - Compacted result is *larger* than the input (degenerate aux output)
|
|
33
|
-
* → discard compaction and return pruned-only.
|
|
17
|
+
* { role: "user", content: "\x00maestro-compaction\x00" }
|
|
18
|
+
* { role: "assistant", content: summaryText }
|
|
34
19
|
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
20
|
+
* On re-compaction (same session or resumed), previous compaction blocks
|
|
21
|
+
* are detected, the last summary extracted, and the aux LLM receives an
|
|
22
|
+
* *incremental* prompt that asks it to update the anchored summary using
|
|
23
|
+
* ONLY the conversation delta after the last compaction. This avoids
|
|
24
|
+
* re-summarizing the entire history from scratch every time.
|
|
39
25
|
*
|
|
40
|
-
*
|
|
41
|
-
*
|
|
42
|
-
*
|
|
26
|
+
* Head + tail protection:
|
|
27
|
+
* - Head: first user prompt + first assistant turn.
|
|
28
|
+
* - Tail: last 3 turns (6 messages) for working memory.
|
|
29
|
+
* - Middle: everything else gets summarized.
|
|
43
30
|
*/
|
|
44
31
|
export interface CompressOptions {
|
|
45
|
-
/** Model context window in tokens. Default reads `MAESTRO_CONTEXT_WINDOW`
|
|
46
|
-
* env (Sonnet 4.6 default = 200_000). */
|
|
47
32
|
contextWindow?: number;
|
|
48
|
-
/** Compaction triggers when estimated tokens / window ≥ this ratio.
|
|
49
|
-
* Default 0.8 — matches upstream and leaves enough headroom for the
|
|
50
|
-
* current turn's prompt + response to fit inside the cap. */
|
|
51
33
|
triggerRatio?: number;
|
|
52
|
-
/** Number of HEAD messages preserved verbatim. Default 2 (first user
|
|
53
|
-
* prompt + first assistant turn). */
|
|
54
34
|
headProtect?: number;
|
|
55
|
-
/** Number of TAIL messages preserved verbatim. Default 6 (~ last 3 turns
|
|
56
|
-
* of user/assistant alternation). */
|
|
57
35
|
tailProtect?: number;
|
|
58
|
-
/** Aux model id for the summarization call. The agent loop wires the
|
|
59
|
-
* agent's own configured model in by default, so callers usually don't
|
|
60
|
-
* set this unless they want compaction to run on a different model than
|
|
61
|
-
* the main turn. */
|
|
62
36
|
auxModel?: string;
|
|
63
|
-
/** Inject a different provider for tests. Defaults to a fresh
|
|
64
|
-
* `AnthropicProvider.fromEnv()` reuse-of-the-main-provider via DI. */
|
|
65
37
|
auxProvider?: Provider;
|
|
66
|
-
/** Disable pruning fallback when aux LLM fails. Tests use this to verify
|
|
67
|
-
* the fallback path exits cleanly without re-pruning. */
|
|
68
38
|
disablePruneFallback?: boolean;
|
|
69
|
-
/** Abort signal for the aux summarization request. */
|
|
70
39
|
abortSignal?: AbortSignal;
|
|
40
|
+
emergencyTargetTokens?: number;
|
|
41
|
+
onEmergencyTrim?: (notice: string) => void;
|
|
71
42
|
}
|
|
72
43
|
/**
|
|
73
44
|
* Run the auto-compaction pipeline.
|
|
74
45
|
*
|
|
75
46
|
* Steps:
|
|
76
|
-
* 1.
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
*
|
|
82
|
-
*
|
|
83
|
-
*
|
|
47
|
+
* 1. Prune (cheap: dedup, age-summary, truncate tool output).
|
|
48
|
+
* 2. Re-estimate tokens. If below threshold, return pruned.
|
|
49
|
+
* 3. Snap head/tail boundaries. If no middle, return pruned.
|
|
50
|
+
* 4. Find previous compaction blocks → extract last summary for
|
|
51
|
+
* incremental aux-LLM prompt.
|
|
52
|
+
* 5. Call aux LLM to summarize the middle. When a previous summary
|
|
53
|
+
* exists, only the *delta* after the last compaction
|
|
54
|
+
* (messages[assistantIdx+1 .. tailStart]) is sent to the aux LLM.
|
|
55
|
+
* 6. After the degenerate-savings guard, persist compaction user +
|
|
56
|
+
* summary assistant pair in the canonical `messages` array.
|
|
57
|
+
* 7. Return wire array: [head, wrapped-summary, tail].
|
|
84
58
|
*
|
|
85
|
-
*
|
|
86
|
-
*
|
|
59
|
+
* All wire head/tail boundaries are built from a compaction-stripped
|
|
60
|
+
* view of messages so the wire never leaks internal sentinels.
|
|
87
61
|
*/
|
|
88
62
|
export declare function compressIfNeeded(messages: ProviderMessage[], opts?: CompressOptions): Promise<ProviderMessage[]>;
|
|
63
|
+
/** Test-only: reset the compactor anti-thrash WeakMap entry for an array. */
|
|
89
64
|
export declare function __resetCompactorState(messages: ProviderMessage[]): void;
|
|
90
65
|
//# sourceMappingURL=compressor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compressor.d.ts","sourceRoot":"","sources":["../../src/memory/compressor.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAwB,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExF
|
|
1
|
+
{"version":3,"file":"compressor.d.ts","sourceRoot":"","sources":["../../src/memory/compressor.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAwB,eAAe,EAAE,MAAM,kBAAkB,CAAC;AAExF;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,MAAM,WAAW,eAAe;IAC9B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,QAAQ,CAAC;IACvB,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,eAAe,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CAC5C;AAiHD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,eAAe,EAAE,EAC3B,IAAI,GAAE,eAAoB,GACzB,OAAO,CAAC,eAAe,EAAE,CAAC,CAuK5B;AAwED,6EAA6E;AAC7E,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,eAAe,EAAE,GAAG,IAAI,CAEvE"}
|