@dexto/core 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/DextoAgent.cjs +284 -1
- package/dist/agent/DextoAgent.d.ts +114 -0
- package/dist/agent/DextoAgent.d.ts.map +1 -1
- package/dist/agent/DextoAgent.js +275 -1
- package/dist/agent/schemas.d.ts +51 -21
- package/dist/agent/schemas.d.ts.map +1 -1
- package/dist/context/compaction/overflow.cjs +6 -10
- package/dist/context/compaction/overflow.d.ts +14 -11
- package/dist/context/compaction/overflow.d.ts.map +1 -1
- package/dist/context/compaction/overflow.js +6 -10
- package/dist/context/compaction/providers/reactive-overflow-provider.cjs +15 -0
- package/dist/context/compaction/providers/reactive-overflow-provider.d.ts +15 -0
- package/dist/context/compaction/providers/reactive-overflow-provider.d.ts.map +1 -1
- package/dist/context/compaction/providers/reactive-overflow-provider.js +15 -0
- package/dist/context/compaction/schemas.cjs +22 -2
- package/dist/context/compaction/schemas.d.ts +45 -0
- package/dist/context/compaction/schemas.d.ts.map +1 -1
- package/dist/context/compaction/schemas.js +22 -2
- package/dist/context/compaction/strategies/reactive-overflow.cjs +166 -26
- package/dist/context/compaction/strategies/reactive-overflow.d.ts +21 -0
- package/dist/context/compaction/strategies/reactive-overflow.d.ts.map +1 -1
- package/dist/context/compaction/strategies/reactive-overflow.js +166 -26
- package/dist/context/manager.cjs +278 -31
- package/dist/context/manager.d.ts +192 -5
- package/dist/context/manager.d.ts.map +1 -1
- package/dist/context/manager.js +285 -32
- package/dist/context/types.d.ts +6 -0
- package/dist/context/types.d.ts.map +1 -1
- package/dist/context/utils.cjs +77 -11
- package/dist/context/utils.d.ts +86 -8
- package/dist/context/utils.d.ts.map +1 -1
- package/dist/context/utils.js +71 -11
- package/dist/events/index.cjs +4 -0
- package/dist/events/index.d.ts +41 -7
- package/dist/events/index.d.ts.map +1 -1
- package/dist/events/index.js +4 -0
- package/dist/llm/executor/stream-processor.cjs +19 -1
- package/dist/llm/executor/stream-processor.d.ts +3 -0
- package/dist/llm/executor/stream-processor.d.ts.map +1 -1
- package/dist/llm/executor/stream-processor.js +19 -1
- package/dist/llm/executor/turn-executor.cjs +219 -30
- package/dist/llm/executor/turn-executor.d.ts +62 -10
- package/dist/llm/executor/turn-executor.d.ts.map +1 -1
- package/dist/llm/executor/turn-executor.js +219 -30
- package/dist/llm/executor/types.d.ts +28 -0
- package/dist/llm/executor/types.d.ts.map +1 -1
- package/dist/llm/formatters/vercel.cjs +36 -28
- package/dist/llm/formatters/vercel.d.ts.map +1 -1
- package/dist/llm/formatters/vercel.js +36 -28
- package/dist/llm/services/factory.cjs +3 -2
- package/dist/llm/services/factory.d.ts +3 -1
- package/dist/llm/services/factory.d.ts.map +1 -1
- package/dist/llm/services/factory.js +3 -2
- package/dist/llm/services/vercel.cjs +34 -6
- package/dist/llm/services/vercel.d.ts +23 -3
- package/dist/llm/services/vercel.d.ts.map +1 -1
- package/dist/llm/services/vercel.js +34 -6
- package/dist/session/chat-session.cjs +20 -11
- package/dist/session/chat-session.d.ts +9 -4
- package/dist/session/chat-session.d.ts.map +1 -1
- package/dist/session/chat-session.js +20 -11
- package/dist/session/compaction-service.cjs +139 -0
- package/dist/session/compaction-service.d.ts +81 -0
- package/dist/session/compaction-service.d.ts.map +1 -0
- package/dist/session/compaction-service.js +106 -0
- package/dist/session/session-manager.cjs +146 -0
- package/dist/session/session-manager.d.ts +50 -0
- package/dist/session/session-manager.d.ts.map +1 -1
- package/dist/session/session-manager.js +146 -0
- package/dist/session/title-generator.cjs +2 -2
- package/dist/session/title-generator.js +2 -2
- package/dist/systemPrompt/in-built-prompts.cjs +36 -0
- package/dist/systemPrompt/in-built-prompts.d.ts +18 -1
- package/dist/systemPrompt/in-built-prompts.d.ts.map +1 -1
- package/dist/systemPrompt/in-built-prompts.js +25 -0
- package/dist/systemPrompt/manager.cjs +22 -0
- package/dist/systemPrompt/manager.d.ts +10 -0
- package/dist/systemPrompt/manager.d.ts.map +1 -1
- package/dist/systemPrompt/manager.js +22 -0
- package/dist/systemPrompt/registry.cjs +2 -1
- package/dist/systemPrompt/registry.d.ts +1 -1
- package/dist/systemPrompt/registry.d.ts.map +1 -1
- package/dist/systemPrompt/registry.js +2 -1
- package/dist/systemPrompt/schemas.cjs +7 -0
- package/dist/systemPrompt/schemas.d.ts +13 -13
- package/dist/systemPrompt/schemas.d.ts.map +1 -1
- package/dist/systemPrompt/schemas.js +7 -0
- package/dist/utils/index.cjs +3 -1
- package/dist/utils/index.d.ts +1 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +1 -0
- package/package.json +1 -1
|
@@ -4,29 +4,32 @@ import type { TokenUsage } from '../../llm/types.js';
|
|
|
4
4
|
* These limits define the context window boundaries.
|
|
5
5
|
*/
|
|
6
6
|
export interface ModelLimits {
|
|
7
|
-
/** Maximum context window size in tokens */
|
|
7
|
+
/** Maximum context window size in tokens (the model's input limit) */
|
|
8
8
|
contextWindow: number;
|
|
9
|
-
/** Maximum output tokens the model can generate */
|
|
10
|
-
maxOutput: number;
|
|
11
9
|
}
|
|
12
10
|
/**
|
|
13
|
-
* Determines if the context has overflowed based on
|
|
11
|
+
* Determines if the context has overflowed based on token usage.
|
|
14
12
|
*
|
|
15
13
|
* Overflow is detected when:
|
|
16
|
-
*
|
|
14
|
+
* inputTokens > contextWindow * thresholdPercent
|
|
17
15
|
*
|
|
18
|
-
* The
|
|
16
|
+
* The thresholdPercent allows triggering compaction before hitting 100% (e.g., at 90%).
|
|
17
|
+
* This provides a safety margin for estimation errors and prevents hitting hard limits.
|
|
19
18
|
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
19
|
+
* Note: We don't reserve space for "output" because input and output have separate limits
|
|
20
|
+
* in LLM APIs. The model's output doesn't consume from the input context window.
|
|
21
|
+
*
|
|
22
|
+
* @param tokens The token usage (actual from API or estimated)
|
|
23
|
+
* @param modelLimits The model's context window limit
|
|
24
|
+
* @param thresholdPercent Percentage of context window at which to trigger (default 0.9 = 90%)
|
|
22
25
|
* @returns true if context has overflowed and compaction is needed
|
|
23
26
|
*/
|
|
24
|
-
export declare function isOverflow(tokens: TokenUsage, modelLimits: ModelLimits): boolean;
|
|
27
|
+
export declare function isOverflow(tokens: TokenUsage, modelLimits: ModelLimits, thresholdPercent?: number): boolean;
|
|
25
28
|
/**
|
|
26
29
|
* Calculate the compaction target - how many tokens we need to reduce to.
|
|
27
30
|
*
|
|
28
|
-
* @param modelLimits The model's context window
|
|
29
|
-
* @param targetPercentage What percentage of
|
|
31
|
+
* @param modelLimits The model's context window limit
|
|
32
|
+
* @param targetPercentage What percentage of context to target (default 70%)
|
|
30
33
|
* @returns The target token count after compaction
|
|
31
34
|
*/
|
|
32
35
|
export declare function getCompactionTarget(modelLimits: ModelLimits, targetPercentage?: number): number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"overflow.d.ts","sourceRoot":"","sources":["../../../src/context/compaction/overflow.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD;;;GAGG;AACH,MAAM,WAAW,WAAW;IACxB,
|
|
1
|
+
{"version":3,"file":"overflow.d.ts","sourceRoot":"","sources":["../../../src/context/compaction/overflow.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD;;;GAGG;AACH,MAAM,WAAW,WAAW;IACxB,sEAAsE;IACtE,aAAa,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,UAAU,CACtB,MAAM,EAAE,UAAU,EAClB,WAAW,EAAE,WAAW,EACxB,gBAAgB,GAAE,MAAY,GAC/B,OAAO,CAWT;AAED;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAC/B,WAAW,EAAE,WAAW,EACxB,gBAAgB,GAAE,MAAY,GAC/B,MAAM,CAGR"}
|
|
@@ -1,17 +1,13 @@
|
|
|
1
1
|
import "../../chunk-PTJYTZNU.js";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
const
|
|
5
|
-
const outputBuffer = Math.min(maxOutput, DEFAULT_OUTPUT_BUFFER);
|
|
6
|
-
const usableTokens = contextWindow - outputBuffer;
|
|
2
|
+
function isOverflow(tokens, modelLimits, thresholdPercent = 0.9) {
|
|
3
|
+
const { contextWindow } = modelLimits;
|
|
4
|
+
const effectiveLimit = Math.floor(contextWindow * thresholdPercent);
|
|
7
5
|
const inputTokens = tokens.inputTokens ?? 0;
|
|
8
|
-
return inputTokens >
|
|
6
|
+
return inputTokens > effectiveLimit;
|
|
9
7
|
}
|
|
10
8
|
function getCompactionTarget(modelLimits, targetPercentage = 0.7) {
|
|
11
|
-
const { contextWindow
|
|
12
|
-
|
|
13
|
-
const usableTokens = contextWindow - outputBuffer;
|
|
14
|
-
return Math.floor(usableTokens * targetPercentage);
|
|
9
|
+
const { contextWindow } = modelLimits;
|
|
10
|
+
return Math.floor(contextWindow * targetPercentage);
|
|
15
11
|
}
|
|
16
12
|
export {
|
|
17
13
|
getCompactionTarget,
|
|
@@ -27,6 +27,21 @@ var import_reactive_overflow = require("../strategies/reactive-overflow.js");
|
|
|
27
27
|
const ReactiveOverflowConfigSchema = import_zod.z.object({
|
|
28
28
|
type: import_zod.z.literal("reactive-overflow"),
|
|
29
29
|
enabled: import_zod.z.boolean().default(true).describe("Enable or disable compaction"),
|
|
30
|
+
/**
|
|
31
|
+
* Maximum context tokens before compaction triggers.
|
|
32
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
33
|
+
* Useful for capping context size below the model's maximum limit.
|
|
34
|
+
*/
|
|
35
|
+
maxContextTokens: import_zod.z.number().positive().optional().describe(
|
|
36
|
+
"Maximum context tokens before compaction triggers. Overrides model context window when set."
|
|
37
|
+
),
|
|
38
|
+
/**
|
|
39
|
+
* Percentage of context window that triggers compaction (0.1 to 1.0).
|
|
40
|
+
* Default is 1.0 (100%), meaning compaction triggers when context is full.
|
|
41
|
+
*/
|
|
42
|
+
thresholdPercent: import_zod.z.number().min(0.1).max(1).default(1).describe(
|
|
43
|
+
"Percentage of context window that triggers compaction (0.1 to 1.0, default 1.0)"
|
|
44
|
+
),
|
|
30
45
|
preserveLastNTurns: import_zod.z.number().int().positive().default(2).describe("Number of recent turns (user+assistant pairs) to preserve"),
|
|
31
46
|
maxSummaryTokens: import_zod.z.number().int().positive().default(2e3).describe("Maximum tokens for the summary output"),
|
|
32
47
|
summaryPrompt: import_zod.z.string().optional().describe("Custom summary prompt template. Use {conversation} as placeholder")
|
|
@@ -6,18 +6,33 @@ import type { CompactionProvider } from '../provider.js';
|
|
|
6
6
|
export declare const ReactiveOverflowConfigSchema: z.ZodObject<{
|
|
7
7
|
type: z.ZodLiteral<"reactive-overflow">;
|
|
8
8
|
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
9
|
+
/**
|
|
10
|
+
* Maximum context tokens before compaction triggers.
|
|
11
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
12
|
+
* Useful for capping context size below the model's maximum limit.
|
|
13
|
+
*/
|
|
14
|
+
maxContextTokens: z.ZodOptional<z.ZodNumber>;
|
|
15
|
+
/**
|
|
16
|
+
* Percentage of context window that triggers compaction (0.1 to 1.0).
|
|
17
|
+
* Default is 1.0 (100%), meaning compaction triggers when context is full.
|
|
18
|
+
*/
|
|
19
|
+
thresholdPercent: z.ZodDefault<z.ZodNumber>;
|
|
9
20
|
preserveLastNTurns: z.ZodDefault<z.ZodNumber>;
|
|
10
21
|
maxSummaryTokens: z.ZodDefault<z.ZodNumber>;
|
|
11
22
|
summaryPrompt: z.ZodOptional<z.ZodString>;
|
|
12
23
|
}, "strict", z.ZodTypeAny, {
|
|
13
24
|
type: "reactive-overflow";
|
|
14
25
|
enabled: boolean;
|
|
26
|
+
thresholdPercent: number;
|
|
15
27
|
preserveLastNTurns: number;
|
|
16
28
|
maxSummaryTokens: number;
|
|
29
|
+
maxContextTokens?: number | undefined;
|
|
17
30
|
summaryPrompt?: string | undefined;
|
|
18
31
|
}, {
|
|
19
32
|
type: "reactive-overflow";
|
|
20
33
|
enabled?: boolean | undefined;
|
|
34
|
+
maxContextTokens?: number | undefined;
|
|
35
|
+
thresholdPercent?: number | undefined;
|
|
21
36
|
preserveLastNTurns?: number | undefined;
|
|
22
37
|
maxSummaryTokens?: number | undefined;
|
|
23
38
|
summaryPrompt?: string | undefined;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reactive-overflow-provider.d.ts","sourceRoot":"","sources":["../../../../src/context/compaction/providers/reactive-overflow-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAGzD;;GAEG;AACH,eAAO,MAAM,4BAA4B
|
|
1
|
+
{"version":3,"file":"reactive-overflow-provider.d.ts","sourceRoot":"","sources":["../../../../src/context/compaction/providers/reactive-overflow-provider.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAGzD;;GAEG;AACH,eAAO,MAAM,4BAA4B;;;IAIjC;;;;OAIG;;IAQH;;;OAGG;;;;;;;;;;;;;;;;;;;;;EA0BE,CAAC;AAEd,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,4BAA4B,CAAC,CAAC;AAEnF;;;;;;;;GAQG;AACH,eAAO,MAAM,wBAAwB,EAAE,kBAAkB,CACrD,mBAAmB,EACnB,sBAAsB,CA2BzB,CAAC"}
|
|
@@ -4,6 +4,21 @@ import { ReactiveOverflowStrategy } from "../strategies/reactive-overflow.js";
|
|
|
4
4
|
const ReactiveOverflowConfigSchema = z.object({
|
|
5
5
|
type: z.literal("reactive-overflow"),
|
|
6
6
|
enabled: z.boolean().default(true).describe("Enable or disable compaction"),
|
|
7
|
+
/**
|
|
8
|
+
* Maximum context tokens before compaction triggers.
|
|
9
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
10
|
+
* Useful for capping context size below the model's maximum limit.
|
|
11
|
+
*/
|
|
12
|
+
maxContextTokens: z.number().positive().optional().describe(
|
|
13
|
+
"Maximum context tokens before compaction triggers. Overrides model context window when set."
|
|
14
|
+
),
|
|
15
|
+
/**
|
|
16
|
+
* Percentage of context window that triggers compaction (0.1 to 1.0).
|
|
17
|
+
* Default is 1.0 (100%), meaning compaction triggers when context is full.
|
|
18
|
+
*/
|
|
19
|
+
thresholdPercent: z.number().min(0.1).max(1).default(1).describe(
|
|
20
|
+
"Percentage of context window that triggers compaction (0.1 to 1.0, default 1.0)"
|
|
21
|
+
),
|
|
7
22
|
preserveLastNTurns: z.number().int().positive().default(2).describe("Number of recent turns (user+assistant pairs) to preserve"),
|
|
8
23
|
maxSummaryTokens: z.number().int().positive().default(2e3).describe("Maximum tokens for the summary output"),
|
|
9
24
|
summaryPrompt: z.string().optional().describe("Custom summary prompt template. Use {conversation} as placeholder")
|
|
@@ -25,11 +25,31 @@ module.exports = __toCommonJS(schemas_exports);
|
|
|
25
25
|
var import_zod = require("zod");
|
|
26
26
|
const CompactionConfigSchema = import_zod.z.object({
|
|
27
27
|
type: import_zod.z.string().describe("Compaction provider type"),
|
|
28
|
-
enabled: import_zod.z.boolean().default(true).describe("Enable or disable compaction")
|
|
28
|
+
enabled: import_zod.z.boolean().default(true).describe("Enable or disable compaction"),
|
|
29
|
+
/**
|
|
30
|
+
* Maximum context tokens before compaction triggers.
|
|
31
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
32
|
+
* Useful for capping context size below the model's maximum limit.
|
|
33
|
+
* Example: Set to 50000 to trigger compaction at 50K tokens even if
|
|
34
|
+
* the model supports 200K tokens.
|
|
35
|
+
*/
|
|
36
|
+
maxContextTokens: import_zod.z.number().positive().optional().describe(
|
|
37
|
+
"Maximum context tokens before compaction triggers. Overrides model context window when set."
|
|
38
|
+
),
|
|
39
|
+
/**
|
|
40
|
+
* Percentage of context window that triggers compaction (0.0 to 1.0).
|
|
41
|
+
* Default is 0.9 (90%), leaving a 10% buffer to avoid context degradation.
|
|
42
|
+
* Set lower values to trigger compaction earlier.
|
|
43
|
+
* Example: 0.8 triggers compaction when 80% of context is used.
|
|
44
|
+
*/
|
|
45
|
+
thresholdPercent: import_zod.z.number().min(0.1).max(1).default(0.9).describe(
|
|
46
|
+
"Percentage of context window that triggers compaction (0.1 to 1.0, default 0.9)"
|
|
47
|
+
)
|
|
29
48
|
}).passthrough().describe("Context compaction configuration");
|
|
30
49
|
const DEFAULT_COMPACTION_CONFIG = {
|
|
31
50
|
type: "reactive-overflow",
|
|
32
|
-
enabled: true
|
|
51
|
+
enabled: true,
|
|
52
|
+
thresholdPercent: 0.9
|
|
33
53
|
};
|
|
34
54
|
// Annotate the CommonJS export names for ESM import in node:
|
|
35
55
|
0 && (module.exports = {
|
|
@@ -9,12 +9,57 @@ import { z } from 'zod';
|
|
|
9
9
|
export declare const CompactionConfigSchema: z.ZodObject<{
|
|
10
10
|
type: z.ZodString;
|
|
11
11
|
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
12
|
+
/**
|
|
13
|
+
* Maximum context tokens before compaction triggers.
|
|
14
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
15
|
+
* Useful for capping context size below the model's maximum limit.
|
|
16
|
+
* Example: Set to 50000 to trigger compaction at 50K tokens even if
|
|
17
|
+
* the model supports 200K tokens.
|
|
18
|
+
*/
|
|
19
|
+
maxContextTokens: z.ZodOptional<z.ZodNumber>;
|
|
20
|
+
/**
|
|
21
|
+
* Percentage of context window that triggers compaction (0.0 to 1.0).
|
|
22
|
+
* Default is 0.9 (90%), leaving a 10% buffer to avoid context degradation.
|
|
23
|
+
* Set lower values to trigger compaction earlier.
|
|
24
|
+
* Example: 0.8 triggers compaction when 80% of context is used.
|
|
25
|
+
*/
|
|
26
|
+
thresholdPercent: z.ZodDefault<z.ZodNumber>;
|
|
12
27
|
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
13
28
|
type: z.ZodString;
|
|
14
29
|
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
30
|
+
/**
|
|
31
|
+
* Maximum context tokens before compaction triggers.
|
|
32
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
33
|
+
* Useful for capping context size below the model's maximum limit.
|
|
34
|
+
* Example: Set to 50000 to trigger compaction at 50K tokens even if
|
|
35
|
+
* the model supports 200K tokens.
|
|
36
|
+
*/
|
|
37
|
+
maxContextTokens: z.ZodOptional<z.ZodNumber>;
|
|
38
|
+
/**
|
|
39
|
+
* Percentage of context window that triggers compaction (0.0 to 1.0).
|
|
40
|
+
* Default is 0.9 (90%), leaving a 10% buffer to avoid context degradation.
|
|
41
|
+
* Set lower values to trigger compaction earlier.
|
|
42
|
+
* Example: 0.8 triggers compaction when 80% of context is used.
|
|
43
|
+
*/
|
|
44
|
+
thresholdPercent: z.ZodDefault<z.ZodNumber>;
|
|
15
45
|
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
16
46
|
type: z.ZodString;
|
|
17
47
|
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
48
|
+
/**
|
|
49
|
+
* Maximum context tokens before compaction triggers.
|
|
50
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
51
|
+
* Useful for capping context size below the model's maximum limit.
|
|
52
|
+
* Example: Set to 50000 to trigger compaction at 50K tokens even if
|
|
53
|
+
* the model supports 200K tokens.
|
|
54
|
+
*/
|
|
55
|
+
maxContextTokens: z.ZodOptional<z.ZodNumber>;
|
|
56
|
+
/**
|
|
57
|
+
* Percentage of context window that triggers compaction (0.0 to 1.0).
|
|
58
|
+
* Default is 0.9 (90%), leaving a 10% buffer to avoid context degradation.
|
|
59
|
+
* Set lower values to trigger compaction earlier.
|
|
60
|
+
* Example: 0.8 triggers compaction when 80% of context is used.
|
|
61
|
+
*/
|
|
62
|
+
thresholdPercent: z.ZodDefault<z.ZodNumber>;
|
|
18
63
|
}, z.ZodTypeAny, "passthrough">>;
|
|
19
64
|
export type CompactionConfigInput = z.output<typeof CompactionConfigSchema>;
|
|
20
65
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../../src/context/compaction/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;;;;;GAMG;AACH,eAAO,MAAM,sBAAsB
|
|
1
|
+
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../../src/context/compaction/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;;;;;GAMG;AACH,eAAO,MAAM,sBAAsB;;;IAI3B;;;;;;OAMG;;IAQH;;;;;OAKG;;;;;IAnBH;;;;;;OAMG;;IAQH;;;;;OAKG;;;;;IAnBH;;;;;;OAMG;;IAQH;;;;;OAKG;;gCAWsC,CAAC;AAElD,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAE5E;;GAEG;AACH,eAAO,MAAM,yBAAyB,EAAE,qBAIvC,CAAC"}
|
|
@@ -2,11 +2,31 @@ import "../../chunk-PTJYTZNU.js";
|
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
const CompactionConfigSchema = z.object({
|
|
4
4
|
type: z.string().describe("Compaction provider type"),
|
|
5
|
-
enabled: z.boolean().default(true).describe("Enable or disable compaction")
|
|
5
|
+
enabled: z.boolean().default(true).describe("Enable or disable compaction"),
|
|
6
|
+
/**
|
|
7
|
+
* Maximum context tokens before compaction triggers.
|
|
8
|
+
* When set, overrides the model's context window for compaction threshold.
|
|
9
|
+
* Useful for capping context size below the model's maximum limit.
|
|
10
|
+
* Example: Set to 50000 to trigger compaction at 50K tokens even if
|
|
11
|
+
* the model supports 200K tokens.
|
|
12
|
+
*/
|
|
13
|
+
maxContextTokens: z.number().positive().optional().describe(
|
|
14
|
+
"Maximum context tokens before compaction triggers. Overrides model context window when set."
|
|
15
|
+
),
|
|
16
|
+
/**
|
|
17
|
+
* Percentage of context window that triggers compaction (0.0 to 1.0).
|
|
18
|
+
* Default is 0.9 (90%), leaving a 10% buffer to avoid context degradation.
|
|
19
|
+
* Set lower values to trigger compaction earlier.
|
|
20
|
+
* Example: 0.8 triggers compaction when 80% of context is used.
|
|
21
|
+
*/
|
|
22
|
+
thresholdPercent: z.number().min(0.1).max(1).default(0.9).describe(
|
|
23
|
+
"Percentage of context window that triggers compaction (0.1 to 1.0, default 0.9)"
|
|
24
|
+
)
|
|
6
25
|
}).passthrough().describe("Context compaction configuration");
|
|
7
26
|
const DEFAULT_COMPACTION_CONFIG = {
|
|
8
27
|
type: "reactive-overflow",
|
|
9
|
-
enabled: true
|
|
28
|
+
enabled: true,
|
|
29
|
+
thresholdPercent: 0.9
|
|
10
30
|
};
|
|
11
31
|
export {
|
|
12
32
|
CompactionConfigSchema,
|
|
@@ -26,15 +26,36 @@ var import_types = require("../../types.js");
|
|
|
26
26
|
const DEFAULT_OPTIONS = {
|
|
27
27
|
preserveLastNTurns: 2,
|
|
28
28
|
maxSummaryTokens: 2e3,
|
|
29
|
-
summaryPrompt: `You are a conversation summarizer
|
|
30
|
-
- What tasks were attempted and their outcomes
|
|
31
|
-
- Current state and context the assistant needs to remember
|
|
32
|
-
- Any important decisions or information discovered
|
|
33
|
-
- What the user was trying to accomplish
|
|
29
|
+
summaryPrompt: `You are a conversation summarizer creating a structured summary for session continuation.
|
|
34
30
|
|
|
35
|
-
|
|
31
|
+
Analyze the conversation and produce a summary in the following XML format:
|
|
36
32
|
|
|
37
|
-
|
|
33
|
+
<session_compaction>
|
|
34
|
+
<conversation_history>
|
|
35
|
+
A concise summary of what happened in the conversation:
|
|
36
|
+
- Tasks attempted and their outcomes (success/failure/in-progress)
|
|
37
|
+
- Important decisions made
|
|
38
|
+
- Key information discovered (file paths, configurations, errors encountered)
|
|
39
|
+
- Tools used and their results
|
|
40
|
+
</conversation_history>
|
|
41
|
+
|
|
42
|
+
<current_task>
|
|
43
|
+
The most recent task or instruction the user requested that may still be in progress.
|
|
44
|
+
Be specific - include the exact request and current status.
|
|
45
|
+
</current_task>
|
|
46
|
+
|
|
47
|
+
<important_context>
|
|
48
|
+
Critical state that must be preserved:
|
|
49
|
+
- File paths being worked on
|
|
50
|
+
- Variable values or configurations
|
|
51
|
+
- Error messages that need addressing
|
|
52
|
+
- Any pending actions or next steps
|
|
53
|
+
</important_context>
|
|
54
|
+
</session_compaction>
|
|
55
|
+
|
|
56
|
+
IMPORTANT: The assistant will continue working based on this summary. Ensure the current_task section clearly states what needs to be done next.
|
|
57
|
+
|
|
58
|
+
Conversation to summarize:
|
|
38
59
|
{conversation}`
|
|
39
60
|
};
|
|
40
61
|
class ReactiveOverflowStrategy {
|
|
@@ -63,15 +84,71 @@ class ReactiveOverflowStrategy {
|
|
|
63
84
|
this.logger.debug("ReactiveOverflowStrategy: History too short, skipping compaction");
|
|
64
85
|
return [];
|
|
65
86
|
}
|
|
87
|
+
let existingSummaryIndex = -1;
|
|
88
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
89
|
+
const msg = history[i];
|
|
90
|
+
if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) {
|
|
91
|
+
existingSummaryIndex = i;
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (existingSummaryIndex !== -1) {
|
|
96
|
+
const messagesAfterSummary = history.slice(existingSummaryIndex + 1);
|
|
97
|
+
if (messagesAfterSummary.length <= 4) {
|
|
98
|
+
this.logger.debug(
|
|
99
|
+
`ReactiveOverflowStrategy: Only ${messagesAfterSummary.length} messages after existing summary, skipping re-compaction`
|
|
100
|
+
);
|
|
101
|
+
return [];
|
|
102
|
+
}
|
|
103
|
+
this.logger.info(
|
|
104
|
+
`ReactiveOverflowStrategy: Found existing summary at index ${existingSummaryIndex}, working with ${messagesAfterSummary.length} messages after it`
|
|
105
|
+
);
|
|
106
|
+
return this.compactSubset(messagesAfterSummary, history);
|
|
107
|
+
}
|
|
66
108
|
const { toSummarize, toKeep } = this.splitHistory(history);
|
|
67
109
|
if (toSummarize.length === 0) {
|
|
68
110
|
this.logger.debug("ReactiveOverflowStrategy: No messages to summarize");
|
|
69
111
|
return [];
|
|
70
112
|
}
|
|
113
|
+
const currentTaskMessage = this.findCurrentTaskMessage(history);
|
|
71
114
|
this.logger.info(
|
|
72
115
|
`ReactiveOverflowStrategy: Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}`
|
|
73
116
|
);
|
|
74
|
-
const summary = await this.generateSummary(toSummarize);
|
|
117
|
+
const summary = await this.generateSummary(toSummarize, currentTaskMessage);
|
|
118
|
+
const summaryMessage = {
|
|
119
|
+
role: "assistant",
|
|
120
|
+
content: [{ type: "text", text: summary }],
|
|
121
|
+
timestamp: Date.now(),
|
|
122
|
+
metadata: {
|
|
123
|
+
isSummary: true,
|
|
124
|
+
summarizedAt: Date.now(),
|
|
125
|
+
originalMessageCount: toSummarize.length,
|
|
126
|
+
originalFirstTimestamp: toSummarize[0]?.timestamp,
|
|
127
|
+
originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
return [summaryMessage];
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Handle re-compaction when there's already a summary in history.
|
|
134
|
+
* Only summarizes messages AFTER the existing summary, preventing
|
|
135
|
+
* cascading summaries of summaries.
|
|
136
|
+
*
|
|
137
|
+
* @param messagesAfterSummary Messages after the existing summary
|
|
138
|
+
* @param fullHistory The complete history (for current task detection)
|
|
139
|
+
* @returns Array with single summary message, or empty if nothing to summarize
|
|
140
|
+
*/
|
|
141
|
+
async compactSubset(messagesAfterSummary, fullHistory) {
|
|
142
|
+
const { toSummarize, toKeep } = this.splitHistory(messagesAfterSummary);
|
|
143
|
+
if (toSummarize.length === 0) {
|
|
144
|
+
this.logger.debug("ReactiveOverflowStrategy: No messages to summarize in subset");
|
|
145
|
+
return [];
|
|
146
|
+
}
|
|
147
|
+
const currentTaskMessage = this.findCurrentTaskMessage(fullHistory);
|
|
148
|
+
this.logger.info(
|
|
149
|
+
`ReactiveOverflowStrategy (re-compact): Summarizing ${toSummarize.length} messages after existing summary, keeping ${toKeep.length}`
|
|
150
|
+
);
|
|
151
|
+
const summary = await this.generateSummary(toSummarize, currentTaskMessage);
|
|
75
152
|
const summaryMessage = {
|
|
76
153
|
role: "assistant",
|
|
77
154
|
content: [{ type: "text", text: summary }],
|
|
@@ -79,16 +156,43 @@ class ReactiveOverflowStrategy {
|
|
|
79
156
|
metadata: {
|
|
80
157
|
isSummary: true,
|
|
81
158
|
summarizedAt: Date.now(),
|
|
82
|
-
|
|
159
|
+
originalMessageCount: toSummarize.length,
|
|
160
|
+
isRecompaction: true,
|
|
161
|
+
// Mark that this is a re-compaction
|
|
83
162
|
originalFirstTimestamp: toSummarize[0]?.timestamp,
|
|
84
163
|
originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
|
|
85
164
|
}
|
|
86
165
|
};
|
|
87
166
|
return [summaryMessage];
|
|
88
167
|
}
|
|
168
|
+
/**
|
|
169
|
+
* Find the most recent user message that represents the current task.
|
|
170
|
+
* This helps preserve context about what the user is currently asking for.
|
|
171
|
+
*/
|
|
172
|
+
findCurrentTaskMessage(history) {
|
|
173
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
174
|
+
const msg = history[i];
|
|
175
|
+
if (msg?.role === "user") {
|
|
176
|
+
if (typeof msg.content === "string") {
|
|
177
|
+
return msg.content;
|
|
178
|
+
} else if (Array.isArray(msg.content)) {
|
|
179
|
+
const textParts = msg.content.filter(
|
|
180
|
+
(part) => part.type === "text"
|
|
181
|
+
).map((part) => part.text).join("\n");
|
|
182
|
+
if (textParts.length > 0) {
|
|
183
|
+
return textParts;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
89
190
|
/**
|
|
90
191
|
* Split history into messages to summarize and messages to keep.
|
|
91
192
|
* Keeps the last N turns (user + assistant pairs) intact.
|
|
193
|
+
*
|
|
194
|
+
* For long agentic conversations with many tool calls, this also ensures
|
|
195
|
+
* we don't try to keep too many messages even within preserved turns.
|
|
92
196
|
*/
|
|
93
197
|
splitHistory(history) {
|
|
94
198
|
const turnsToKeep = this.options.preserveLastNTurns;
|
|
@@ -103,20 +207,25 @@ class ReactiveOverflowStrategy {
|
|
|
103
207
|
}
|
|
104
208
|
if (userMessageIndices.length > 0) {
|
|
105
209
|
const splitIndex = userMessageIndices[0];
|
|
106
|
-
if (splitIndex !== void 0) {
|
|
107
|
-
if (splitIndex === 0) {
|
|
108
|
-
return {
|
|
109
|
-
toSummarize: [],
|
|
110
|
-
toKeep: history
|
|
111
|
-
};
|
|
112
|
-
}
|
|
210
|
+
if (splitIndex !== void 0 && splitIndex > 0) {
|
|
113
211
|
return {
|
|
114
212
|
toSummarize: history.slice(0, splitIndex),
|
|
115
213
|
toKeep: history.slice(splitIndex)
|
|
116
214
|
};
|
|
117
215
|
}
|
|
118
216
|
}
|
|
119
|
-
const
|
|
217
|
+
const minKeep = 3;
|
|
218
|
+
const maxKeepPercent = 0.2;
|
|
219
|
+
const keepCount = Math.max(minKeep, Math.floor(history.length * maxKeepPercent));
|
|
220
|
+
if (keepCount >= history.length) {
|
|
221
|
+
return {
|
|
222
|
+
toSummarize: [],
|
|
223
|
+
toKeep: history
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
this.logger.debug(
|
|
227
|
+
`splitHistory: Using fallback - keeping last ${keepCount} of ${history.length} messages`
|
|
228
|
+
);
|
|
120
229
|
return {
|
|
121
230
|
toSummarize: history.slice(0, -keepCount),
|
|
122
231
|
toKeep: history.slice(-keepCount)
|
|
@@ -124,21 +233,36 @@ class ReactiveOverflowStrategy {
|
|
|
124
233
|
}
|
|
125
234
|
/**
|
|
126
235
|
* Generate an LLM summary of the messages.
|
|
236
|
+
*
|
|
237
|
+
* @param messages Messages to summarize
|
|
238
|
+
* @param currentTask The most recent user message (current task context)
|
|
127
239
|
*/
|
|
128
|
-
async generateSummary(messages) {
|
|
240
|
+
async generateSummary(messages, currentTask) {
|
|
129
241
|
const formattedConversation = this.formatMessagesForSummary(messages);
|
|
130
|
-
|
|
242
|
+
let conversationWithContext = formattedConversation;
|
|
243
|
+
if (currentTask) {
|
|
244
|
+
conversationWithContext += `
|
|
245
|
+
|
|
246
|
+
--- CURRENT TASK (most recent user request) ---
|
|
247
|
+
${currentTask}`;
|
|
248
|
+
}
|
|
249
|
+
const prompt = this.options.summaryPrompt.replace(
|
|
250
|
+
"{conversation}",
|
|
251
|
+
conversationWithContext
|
|
252
|
+
);
|
|
131
253
|
try {
|
|
132
254
|
const result = await (0, import_ai.generateText)({
|
|
133
255
|
model: this.model,
|
|
134
256
|
prompt,
|
|
135
257
|
maxOutputTokens: this.options.maxSummaryTokens
|
|
136
258
|
});
|
|
137
|
-
return `[
|
|
259
|
+
return `[Session Compaction Summary]
|
|
138
260
|
${result.text}`;
|
|
139
261
|
} catch (error) {
|
|
140
|
-
this.logger.error(
|
|
141
|
-
|
|
262
|
+
this.logger.error(
|
|
263
|
+
`ReactiveOverflowStrategy: Failed to generate summary - ${error instanceof Error ? error.message : String(error)}`
|
|
264
|
+
);
|
|
265
|
+
return this.createFallbackSummary(messages, currentTask);
|
|
142
266
|
}
|
|
143
267
|
}
|
|
144
268
|
/**
|
|
@@ -174,7 +298,7 @@ ${result.text}`;
|
|
|
174
298
|
/**
|
|
175
299
|
* Create a fallback summary if LLM call fails.
|
|
176
300
|
*/
|
|
177
|
-
createFallbackSummary(messages) {
|
|
301
|
+
createFallbackSummary(messages, currentTask) {
|
|
178
302
|
const userMessages = messages.filter((m) => m.role === "user");
|
|
179
303
|
const assistantWithTools = messages.filter(
|
|
180
304
|
(m) => (0, import_types.isAssistantMessage)(m) && !!m.toolCalls && m.toolCalls.length > 0
|
|
@@ -190,9 +314,25 @@ ${result.text}`;
|
|
|
190
314
|
assistantWithTools.flatMap((m) => m.toolCalls.map((tc) => tc.function.name))
|
|
191
315
|
)
|
|
192
316
|
].join(", ");
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
317
|
+
let fallback = `[Session Compaction Summary - Fallback]
|
|
318
|
+
<session_compaction>
|
|
319
|
+
<conversation_history>
|
|
320
|
+
User discussed: ${userTopics || "various topics"}
|
|
321
|
+
Tools used: ${toolsUsed || "none"}
|
|
322
|
+
Messages summarized: ${messages.length}
|
|
323
|
+
</conversation_history>`;
|
|
324
|
+
if (currentTask) {
|
|
325
|
+
fallback += `
|
|
326
|
+
<current_task>
|
|
327
|
+
${currentTask.slice(0, 500)}${currentTask.length > 500 ? "..." : ""}
|
|
328
|
+
</current_task>`;
|
|
329
|
+
}
|
|
330
|
+
fallback += `
|
|
331
|
+
<important_context>
|
|
332
|
+
Note: This is a fallback summary due to LLM error. Context may be incomplete.
|
|
333
|
+
</important_context>
|
|
334
|
+
</session_compaction>`;
|
|
335
|
+
return fallback;
|
|
196
336
|
}
|
|
197
337
|
}
|
|
198
338
|
// Annotate the CommonJS export names for ESM import in node:
|
|
@@ -60,13 +60,34 @@ export declare class ReactiveOverflowStrategy implements ICompactionStrategy {
|
|
|
60
60
|
* @returns Array with single summary message to add, or empty if nothing to summarize
|
|
61
61
|
*/
|
|
62
62
|
compact(history: readonly InternalMessage[]): Promise<InternalMessage[]>;
|
|
63
|
+
/**
|
|
64
|
+
* Handle re-compaction when there's already a summary in history.
|
|
65
|
+
* Only summarizes messages AFTER the existing summary, preventing
|
|
66
|
+
* cascading summaries of summaries.
|
|
67
|
+
*
|
|
68
|
+
* @param messagesAfterSummary Messages after the existing summary
|
|
69
|
+
* @param fullHistory The complete history (for current task detection)
|
|
70
|
+
* @returns Array with single summary message, or empty if nothing to summarize
|
|
71
|
+
*/
|
|
72
|
+
private compactSubset;
|
|
73
|
+
/**
|
|
74
|
+
* Find the most recent user message that represents the current task.
|
|
75
|
+
* This helps preserve context about what the user is currently asking for.
|
|
76
|
+
*/
|
|
77
|
+
private findCurrentTaskMessage;
|
|
63
78
|
/**
|
|
64
79
|
* Split history into messages to summarize and messages to keep.
|
|
65
80
|
* Keeps the last N turns (user + assistant pairs) intact.
|
|
81
|
+
*
|
|
82
|
+
* For long agentic conversations with many tool calls, this also ensures
|
|
83
|
+
* we don't try to keep too many messages even within preserved turns.
|
|
66
84
|
*/
|
|
67
85
|
private splitHistory;
|
|
68
86
|
/**
|
|
69
87
|
* Generate an LLM summary of the messages.
|
|
88
|
+
*
|
|
89
|
+
* @param messages Messages to summarize
|
|
90
|
+
* @param currentTask The most recent user message (current task context)
|
|
70
91
|
*/
|
|
71
92
|
private generateSummary;
|
|
72
93
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reactive-overflow.d.ts","sourceRoot":"","sources":["../../../../src/context/compaction/strategies/reactive-overflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,KAAK,aAAa,EAAE,MAAM,IAAI,CAAC;AACtD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AACvD,OAAO,KAAK,EAAE,eAAe,EAAY,MAAM,gBAAgB,CAAC;AAEhE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAEhE;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACpC;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CAC1B;
|
|
1
|
+
{"version":3,"file":"reactive-overflow.d.ts","sourceRoot":"","sources":["../../../../src/context/compaction/strategies/reactive-overflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,KAAK,aAAa,EAAE,MAAM,IAAI,CAAC;AACtD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AACvD,OAAO,KAAK,EAAE,eAAe,EAAY,MAAM,gBAAgB,CAAC;AAEhE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAEhE;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACpC;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAE5B;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B;;;OAGG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CAC1B;AAsCD;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,wBAAyB,YAAW,mBAAmB;IAChE,QAAQ,CAAC,IAAI,uBAAuB;IAEpC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAgB;IACtC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoC;IAC5D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAe;gBAE1B,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,uBAAuB,YAAK,EAAE,MAAM,EAAE,YAAY;IAM7F;;;;;;;;;;OAUG;IACG,OAAO,CAAC,OAAO,EAAE,SAAS,eAAe,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC;IAiF9E;;;;;;;;OAQG;YACW,aAAa;IA0C3B;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAuB9B;;;;;;OAMG;IACH,OAAO,CAAC,YAAY;IAwDpB;;;;;OAKG;YACW,eAAe;IAmC7B;;OAEG;IACH,OAAO,CAAC,wBAAwB;IA2ChC;;OAEG;IACH,OAAO,CAAC,qBAAqB;CA0DhC"}
|