@illuma-ai/agents 1.0.96 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/constants.cjs +78 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +191 -165
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +22 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/dedup.cjs +95 -0
- package/dist/cjs/messages/dedup.cjs.map +1 -0
- package/dist/cjs/tools/CodeExecutor.cjs +22 -3
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/types/graph.cjs.map +1 -1
- package/dist/cjs/utils/contextPressure.cjs +154 -0
- package/dist/cjs/utils/contextPressure.cjs.map +1 -0
- package/dist/cjs/utils/pruneCalibration.cjs +78 -0
- package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
- package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/constants.mjs +71 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +192 -166
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +5 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/dedup.mjs +93 -0
- package/dist/esm/messages/dedup.mjs.map +1 -0
- package/dist/esm/tools/CodeExecutor.mjs +22 -3
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/types/graph.mjs.map +1 -1
- package/dist/esm/utils/contextPressure.mjs +148 -0
- package/dist/esm/utils/contextPressure.mjs.map +1 -0
- package/dist/esm/utils/pruneCalibration.mjs +74 -0
- package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
- package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/common/constants.d.ts +49 -0
- package/dist/types/graphs/Graph.d.ts +25 -0
- package/dist/types/messages/dedup.d.ts +25 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/types/graph.d.ts +63 -0
- package/dist/types/utils/contextPressure.d.ts +72 -0
- package/dist/types/utils/index.d.ts +3 -0
- package/dist/types/utils/pruneCalibration.d.ts +43 -0
- package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +7 -0
- package/src/common/constants.ts +82 -0
- package/src/graphs/Graph.ts +254 -208
- package/src/graphs/contextManagement.e2e.test.ts +28 -20
- package/src/graphs/gapFeatures.test.ts +520 -0
- package/src/graphs/nonBlockingSummarization.test.ts +307 -0
- package/src/messages/__tests__/dedup.test.ts +166 -0
- package/src/messages/dedup.ts +104 -0
- package/src/messages/index.ts +1 -0
- package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
- package/src/specs/agent-handoffs.test.ts +36 -36
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/tools/CodeExecutor.ts +22 -3
- package/src/types/graph.ts +73 -0
- package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
- package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
- package/src/utils/contextPressure.test.ts +262 -0
- package/src/utils/contextPressure.ts +188 -0
- package/src/utils/index.ts +3 -0
- package/src/utils/pruneCalibration.ts +92 -0
- package/src/utils/run.ts +108 -108
- package/src/utils/tokens.ts +118 -118
- package/src/utils/toolDiscoveryCache.ts +150 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
// src/utils/pruneCalibration.ts
|
|
2
|
+
import type { PruneCalibrationState } from '@/types/graph';
|
|
3
|
+
import {
|
|
4
|
+
PRUNING_EMA_ALPHA,
|
|
5
|
+
PRUNING_INITIAL_CALIBRATION,
|
|
6
|
+
} from '@/common/constants';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Creates an initial pruning calibration state.
|
|
10
|
+
*
|
|
11
|
+
* @param initialRatio - Starting calibration ratio (default: 1.0)
|
|
12
|
+
* @returns Fresh calibration state
|
|
13
|
+
*/
|
|
14
|
+
export function createPruneCalibration(
|
|
15
|
+
initialRatio?: number
|
|
16
|
+
): PruneCalibrationState {
|
|
17
|
+
return {
|
|
18
|
+
ratio: initialRatio ?? PRUNING_INITIAL_CALIBRATION,
|
|
19
|
+
iterations: 0,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Updates the pruning calibration using Exponential Moving Average (EMA).
|
|
25
|
+
*
|
|
26
|
+
* Problem: Without calibration, the pruner's token estimates can diverge from
|
|
27
|
+
* reality across iterations, causing either:
|
|
28
|
+
* - Over-pruning (context cliff): Too many messages removed at once, losing critical tool results
|
|
29
|
+
* - Under-pruning: Not enough messages removed, hitting hard token limits
|
|
30
|
+
*
|
|
31
|
+
* Solution: Track the ratio between actual token usage (from API response) and
|
|
32
|
+
* estimated token usage (from our token counter). Apply EMA smoothing so the
|
|
33
|
+
* calibration adjusts gradually, preventing oscillation.
|
|
34
|
+
*
|
|
35
|
+
* The calibration ratio is applied to maxTokens in the pruner:
|
|
36
|
+
* effectiveMaxTokens = maxTokens * calibrationRatio
|
|
37
|
+
*
|
|
38
|
+
* If actual > estimated → ratio decreases → prune more aggressively
|
|
39
|
+
* If actual < estimated → ratio increases → prune less aggressively
|
|
40
|
+
*
|
|
41
|
+
* @param state - Current calibration state
|
|
42
|
+
* @param actualTokens - Actual token count from API response (UsageMetadata)
|
|
43
|
+
* @param estimatedTokens - Estimated token count from token counter
|
|
44
|
+
* @param alpha - EMA smoothing factor (default: PRUNING_EMA_ALPHA)
|
|
45
|
+
* @returns Updated calibration state (new object, does not mutate input)
|
|
46
|
+
*/
|
|
47
|
+
export function updatePruneCalibration(
|
|
48
|
+
state: PruneCalibrationState,
|
|
49
|
+
actualTokens: number,
|
|
50
|
+
estimatedTokens: number,
|
|
51
|
+
alpha: number = PRUNING_EMA_ALPHA
|
|
52
|
+
): PruneCalibrationState {
|
|
53
|
+
// Guard against division by zero or invalid inputs
|
|
54
|
+
if (estimatedTokens <= 0 || actualTokens <= 0) {
|
|
55
|
+
return state;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Raw ratio: how much our estimate differs from reality
|
|
59
|
+
const observedRatio = estimatedTokens / actualTokens;
|
|
60
|
+
|
|
61
|
+
// Clamp to prevent extreme adjustments from outlier readings
|
|
62
|
+
// Range [0.5, 2.0] means we never more than double or halve the budget
|
|
63
|
+
const clampedRatio = Math.max(0.5, Math.min(2.0, observedRatio));
|
|
64
|
+
|
|
65
|
+
// Apply EMA: new_ratio = α * observed + (1 - α) * previous
|
|
66
|
+
const newRatio = alpha * clampedRatio + (1 - alpha) * state.ratio;
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
ratio: newRatio,
|
|
70
|
+
iterations: state.iterations + 1,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Applies the calibration ratio to a max token budget.
|
|
76
|
+
* The ratio adjusts the effective budget so pruning is more or less aggressive
|
|
77
|
+
* based on observed vs. estimated token divergence.
|
|
78
|
+
*
|
|
79
|
+
* @param maxTokens - Raw max token budget
|
|
80
|
+
* @param state - Current calibration state
|
|
81
|
+
* @returns Adjusted max token budget
|
|
82
|
+
*/
|
|
83
|
+
export function applyCalibration(
|
|
84
|
+
maxTokens: number,
|
|
85
|
+
state: PruneCalibrationState
|
|
86
|
+
): number {
|
|
87
|
+
if (state.iterations === 0) {
|
|
88
|
+
// No calibration data yet — use raw budget
|
|
89
|
+
return maxTokens;
|
|
90
|
+
}
|
|
91
|
+
return Math.floor(maxTokens * state.ratio);
|
|
92
|
+
}
|
package/src/utils/run.ts
CHANGED
|
@@ -1,108 +1,108 @@
|
|
|
1
|
-
import { CallbackManagerForChainRun } from '@langchain/core/callbacks/manager';
|
|
2
|
-
import {
|
|
3
|
-
mergeConfigs,
|
|
4
|
-
patchConfig,
|
|
5
|
-
Runnable,
|
|
6
|
-
RunnableConfig,
|
|
7
|
-
} from '@langchain/core/runnables';
|
|
8
|
-
import { AsyncLocalStorageProviderSingleton } from '@langchain/core/singletons';
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Delays the execution for a specified number of milliseconds.
|
|
12
|
-
*
|
|
13
|
-
* @param {number} ms - The number of milliseconds to delay.
|
|
14
|
-
* @return {Promise<void>} A promise that resolves after the specified delay.
|
|
15
|
-
*/
|
|
16
|
-
export function sleep(ms: number): Promise<void> {
|
|
17
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
21
|
-
export interface RunnableCallableArgs extends Partial<any> {
|
|
22
|
-
name?: string;
|
|
23
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
24
|
-
func: (...args: any[]) => any;
|
|
25
|
-
tags?: string[];
|
|
26
|
-
trace?: boolean;
|
|
27
|
-
recurse?: boolean;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
export class RunnableCallable<I = unknown, O = unknown> extends Runnable<I, O> {
|
|
31
|
-
lc_namespace: string[] = ['langgraph'];
|
|
32
|
-
|
|
33
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
34
|
-
func: (...args: any[]) => any;
|
|
35
|
-
|
|
36
|
-
tags?: string[];
|
|
37
|
-
|
|
38
|
-
config?: RunnableConfig;
|
|
39
|
-
|
|
40
|
-
trace: boolean = true;
|
|
41
|
-
|
|
42
|
-
recurse: boolean = true;
|
|
43
|
-
|
|
44
|
-
constructor(fields: RunnableCallableArgs) {
|
|
45
|
-
super();
|
|
46
|
-
this.name = fields.name ?? fields.func.name;
|
|
47
|
-
this.func = fields.func;
|
|
48
|
-
this.config = fields.tags ? { tags: fields.tags } : undefined;
|
|
49
|
-
this.trace = fields.trace ?? this.trace;
|
|
50
|
-
this.recurse = fields.recurse ?? this.recurse;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
protected async _tracedInvoke(
|
|
54
|
-
input: I,
|
|
55
|
-
config?: Partial<RunnableConfig>,
|
|
56
|
-
runManager?: CallbackManagerForChainRun
|
|
57
|
-
): Promise<O> {
|
|
58
|
-
return new Promise<O>((resolve, reject) => {
|
|
59
|
-
// Defensive check: ensure runManager has getChild method before calling
|
|
60
|
-
const childCallbacks =
|
|
61
|
-
typeof runManager?.getChild === 'function'
|
|
62
|
-
? runManager.getChild()
|
|
63
|
-
: undefined;
|
|
64
|
-
let childConfig: Partial<RunnableConfig> | null = patchConfig(config, {
|
|
65
|
-
callbacks: childCallbacks,
|
|
66
|
-
});
|
|
67
|
-
void AsyncLocalStorageProviderSingleton.runWithConfig(
|
|
68
|
-
childConfig,
|
|
69
|
-
async () => {
|
|
70
|
-
try {
|
|
71
|
-
const output = await this.func(input, childConfig);
|
|
72
|
-
childConfig = null;
|
|
73
|
-
resolve(output);
|
|
74
|
-
} catch (e) {
|
|
75
|
-
childConfig = null;
|
|
76
|
-
reject(e);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
);
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
async invoke(
|
|
84
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
85
|
-
input: any,
|
|
86
|
-
options?: Partial<RunnableConfig> | undefined
|
|
87
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
88
|
-
): Promise<any> {
|
|
89
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
90
|
-
let returnValue: any;
|
|
91
|
-
|
|
92
|
-
if (this.trace) {
|
|
93
|
-
returnValue = await this._callWithConfig(
|
|
94
|
-
this._tracedInvoke,
|
|
95
|
-
input,
|
|
96
|
-
mergeConfigs(this.config, options)
|
|
97
|
-
);
|
|
98
|
-
} else {
|
|
99
|
-
returnValue = await this.func(input, mergeConfigs(this.config, options));
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
if (Runnable.isRunnable(returnValue) && this.recurse) {
|
|
103
|
-
return await returnValue.invoke(input, options);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
return returnValue;
|
|
107
|
-
}
|
|
108
|
-
}
|
|
1
|
+
import { CallbackManagerForChainRun } from '@langchain/core/callbacks/manager';
|
|
2
|
+
import {
|
|
3
|
+
mergeConfigs,
|
|
4
|
+
patchConfig,
|
|
5
|
+
Runnable,
|
|
6
|
+
RunnableConfig,
|
|
7
|
+
} from '@langchain/core/runnables';
|
|
8
|
+
import { AsyncLocalStorageProviderSingleton } from '@langchain/core/singletons';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Delays the execution for a specified number of milliseconds.
|
|
12
|
+
*
|
|
13
|
+
* @param {number} ms - The number of milliseconds to delay.
|
|
14
|
+
* @return {Promise<void>} A promise that resolves after the specified delay.
|
|
15
|
+
*/
|
|
16
|
+
export function sleep(ms: number): Promise<void> {
|
|
17
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
21
|
+
export interface RunnableCallableArgs extends Partial<any> {
|
|
22
|
+
name?: string;
|
|
23
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
24
|
+
func: (...args: any[]) => any;
|
|
25
|
+
tags?: string[];
|
|
26
|
+
trace?: boolean;
|
|
27
|
+
recurse?: boolean;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export class RunnableCallable<I = unknown, O = unknown> extends Runnable<I, O> {
|
|
31
|
+
lc_namespace: string[] = ['langgraph'];
|
|
32
|
+
|
|
33
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
34
|
+
func: (...args: any[]) => any;
|
|
35
|
+
|
|
36
|
+
tags?: string[];
|
|
37
|
+
|
|
38
|
+
config?: RunnableConfig;
|
|
39
|
+
|
|
40
|
+
trace: boolean = true;
|
|
41
|
+
|
|
42
|
+
recurse: boolean = true;
|
|
43
|
+
|
|
44
|
+
constructor(fields: RunnableCallableArgs) {
|
|
45
|
+
super();
|
|
46
|
+
this.name = fields.name ?? fields.func.name;
|
|
47
|
+
this.func = fields.func;
|
|
48
|
+
this.config = fields.tags ? { tags: fields.tags } : undefined;
|
|
49
|
+
this.trace = fields.trace ?? this.trace;
|
|
50
|
+
this.recurse = fields.recurse ?? this.recurse;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
protected async _tracedInvoke(
|
|
54
|
+
input: I,
|
|
55
|
+
config?: Partial<RunnableConfig>,
|
|
56
|
+
runManager?: CallbackManagerForChainRun
|
|
57
|
+
): Promise<O> {
|
|
58
|
+
return new Promise<O>((resolve, reject) => {
|
|
59
|
+
// Defensive check: ensure runManager has getChild method before calling
|
|
60
|
+
const childCallbacks =
|
|
61
|
+
typeof runManager?.getChild === 'function'
|
|
62
|
+
? runManager.getChild()
|
|
63
|
+
: undefined;
|
|
64
|
+
let childConfig: Partial<RunnableConfig> | null = patchConfig(config, {
|
|
65
|
+
callbacks: childCallbacks,
|
|
66
|
+
});
|
|
67
|
+
void AsyncLocalStorageProviderSingleton.runWithConfig(
|
|
68
|
+
childConfig,
|
|
69
|
+
async () => {
|
|
70
|
+
try {
|
|
71
|
+
const output = await this.func(input, childConfig);
|
|
72
|
+
childConfig = null;
|
|
73
|
+
resolve(output);
|
|
74
|
+
} catch (e) {
|
|
75
|
+
childConfig = null;
|
|
76
|
+
reject(e);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
);
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async invoke(
|
|
84
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
85
|
+
input: any,
|
|
86
|
+
options?: Partial<RunnableConfig> | undefined
|
|
87
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
88
|
+
): Promise<any> {
|
|
89
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
90
|
+
let returnValue: any;
|
|
91
|
+
|
|
92
|
+
if (this.trace) {
|
|
93
|
+
returnValue = await this._callWithConfig(
|
|
94
|
+
this._tracedInvoke,
|
|
95
|
+
input,
|
|
96
|
+
mergeConfigs(this.config, options)
|
|
97
|
+
);
|
|
98
|
+
} else {
|
|
99
|
+
returnValue = await this.func(input, mergeConfigs(this.config, options));
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (Runnable.isRunnable(returnValue) && this.recurse) {
|
|
103
|
+
return await returnValue.invoke(input, options);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return returnValue;
|
|
107
|
+
}
|
|
108
|
+
}
|
package/src/utils/tokens.ts
CHANGED
|
@@ -1,118 +1,118 @@
|
|
|
1
|
-
import { Tokenizer } from 'ai-tokenizer';
|
|
2
|
-
import type { BaseMessage } from '@langchain/core/messages';
|
|
3
|
-
import { ContentTypes } from '@/common/enum';
|
|
4
|
-
|
|
5
|
-
export type EncodingName = 'o200k_base' | 'claude';
|
|
6
|
-
|
|
7
|
-
const tokenizers: Partial<Record<EncodingName, Tokenizer>> = {};
|
|
8
|
-
|
|
9
|
-
async function getTokenizer(
|
|
10
|
-
encoding: EncodingName = 'o200k_base'
|
|
11
|
-
): Promise<Tokenizer> {
|
|
12
|
-
const cached = tokenizers[encoding];
|
|
13
|
-
if (cached) {
|
|
14
|
-
return cached;
|
|
15
|
-
}
|
|
16
|
-
const data =
|
|
17
|
-
encoding === 'claude'
|
|
18
|
-
? await import('ai-tokenizer/encoding/claude')
|
|
19
|
-
: await import('ai-tokenizer/encoding/o200k_base');
|
|
20
|
-
const instance = new Tokenizer(data);
|
|
21
|
-
tokenizers[encoding] = instance;
|
|
22
|
-
return instance;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
export function encodingForModel(model: string): EncodingName {
|
|
26
|
-
if (model.toLowerCase().includes('claude')) {
|
|
27
|
-
return 'claude';
|
|
28
|
-
}
|
|
29
|
-
return 'o200k_base';
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export function getTokenCountForMessage(
|
|
33
|
-
message: BaseMessage,
|
|
34
|
-
getTokenCount: (text: string) => number
|
|
35
|
-
): number {
|
|
36
|
-
const tokensPerMessage = 3;
|
|
37
|
-
|
|
38
|
-
const processValue = (value: unknown): void => {
|
|
39
|
-
if (Array.isArray(value)) {
|
|
40
|
-
for (const item of value) {
|
|
41
|
-
if (
|
|
42
|
-
!item ||
|
|
43
|
-
!item.type ||
|
|
44
|
-
item.type === ContentTypes.ERROR ||
|
|
45
|
-
item.type === ContentTypes.IMAGE_URL
|
|
46
|
-
) {
|
|
47
|
-
continue;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
if (item.type === ContentTypes.TOOL_CALL && item.tool_call != null) {
|
|
51
|
-
const toolName = item.tool_call?.name || '';
|
|
52
|
-
if (toolName != null && toolName && typeof toolName === 'string') {
|
|
53
|
-
numTokens += getTokenCount(toolName);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const args = item.tool_call?.args || '';
|
|
57
|
-
if (args != null && args && typeof args === 'string') {
|
|
58
|
-
numTokens += getTokenCount(args);
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
const output = item.tool_call?.output || '';
|
|
62
|
-
if (output != null && output && typeof output === 'string') {
|
|
63
|
-
numTokens += getTokenCount(output);
|
|
64
|
-
}
|
|
65
|
-
continue;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
const nestedValue = item[item.type];
|
|
69
|
-
|
|
70
|
-
if (!nestedValue) {
|
|
71
|
-
continue;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
processValue(nestedValue);
|
|
75
|
-
}
|
|
76
|
-
} else if (typeof value === 'string') {
|
|
77
|
-
numTokens += getTokenCount(value);
|
|
78
|
-
} else if (typeof value === 'number') {
|
|
79
|
-
numTokens += getTokenCount(value.toString());
|
|
80
|
-
} else if (typeof value === 'boolean') {
|
|
81
|
-
numTokens += getTokenCount(value.toString());
|
|
82
|
-
}
|
|
83
|
-
};
|
|
84
|
-
|
|
85
|
-
let numTokens = tokensPerMessage;
|
|
86
|
-
processValue(message.content);
|
|
87
|
-
return numTokens;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
/**
|
|
91
|
-
* Creates a token counter function using the specified encoding.
|
|
92
|
-
* Lazily loads the encoding data on first use via dynamic import.
|
|
93
|
-
*/
|
|
94
|
-
export const createTokenCounter = async (
|
|
95
|
-
encoding: EncodingName = 'o200k_base'
|
|
96
|
-
): Promise<(message: BaseMessage) => number> => {
|
|
97
|
-
const tok = await getTokenizer(encoding);
|
|
98
|
-
const countTokens = (text: string): number => tok.count(text);
|
|
99
|
-
return (message: BaseMessage): number =>
|
|
100
|
-
getTokenCountForMessage(message, countTokens);
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
/** Utility to manage the token encoder lifecycle explicitly. */
|
|
104
|
-
export const TokenEncoderManager = {
|
|
105
|
-
async initialize(): Promise<void> {
|
|
106
|
-
// No-op: ai-tokenizer is synchronously initialized from bundled data.
|
|
107
|
-
},
|
|
108
|
-
|
|
109
|
-
reset(): void {
|
|
110
|
-
for (const key of Object.keys(tokenizers)) {
|
|
111
|
-
delete tokenizers[key as EncodingName];
|
|
112
|
-
}
|
|
113
|
-
},
|
|
114
|
-
|
|
115
|
-
isInitialized(): boolean {
|
|
116
|
-
return Object.keys(tokenizers).length > 0;
|
|
117
|
-
},
|
|
118
|
-
};
|
|
1
|
+
import { Tokenizer } from 'ai-tokenizer';
|
|
2
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
3
|
+
import { ContentTypes } from '@/common/enum';
|
|
4
|
+
|
|
5
|
+
export type EncodingName = 'o200k_base' | 'claude';
|
|
6
|
+
|
|
7
|
+
const tokenizers: Partial<Record<EncodingName, Tokenizer>> = {};
|
|
8
|
+
|
|
9
|
+
async function getTokenizer(
|
|
10
|
+
encoding: EncodingName = 'o200k_base'
|
|
11
|
+
): Promise<Tokenizer> {
|
|
12
|
+
const cached = tokenizers[encoding];
|
|
13
|
+
if (cached) {
|
|
14
|
+
return cached;
|
|
15
|
+
}
|
|
16
|
+
const data =
|
|
17
|
+
encoding === 'claude'
|
|
18
|
+
? await import('ai-tokenizer/encoding/claude')
|
|
19
|
+
: await import('ai-tokenizer/encoding/o200k_base');
|
|
20
|
+
const instance = new Tokenizer(data);
|
|
21
|
+
tokenizers[encoding] = instance;
|
|
22
|
+
return instance;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function encodingForModel(model: string): EncodingName {
|
|
26
|
+
if (model.toLowerCase().includes('claude')) {
|
|
27
|
+
return 'claude';
|
|
28
|
+
}
|
|
29
|
+
return 'o200k_base';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function getTokenCountForMessage(
|
|
33
|
+
message: BaseMessage,
|
|
34
|
+
getTokenCount: (text: string) => number
|
|
35
|
+
): number {
|
|
36
|
+
const tokensPerMessage = 3;
|
|
37
|
+
|
|
38
|
+
const processValue = (value: unknown): void => {
|
|
39
|
+
if (Array.isArray(value)) {
|
|
40
|
+
for (const item of value) {
|
|
41
|
+
if (
|
|
42
|
+
!item ||
|
|
43
|
+
!item.type ||
|
|
44
|
+
item.type === ContentTypes.ERROR ||
|
|
45
|
+
item.type === ContentTypes.IMAGE_URL
|
|
46
|
+
) {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (item.type === ContentTypes.TOOL_CALL && item.tool_call != null) {
|
|
51
|
+
const toolName = item.tool_call?.name || '';
|
|
52
|
+
if (toolName != null && toolName && typeof toolName === 'string') {
|
|
53
|
+
numTokens += getTokenCount(toolName);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const args = item.tool_call?.args || '';
|
|
57
|
+
if (args != null && args && typeof args === 'string') {
|
|
58
|
+
numTokens += getTokenCount(args);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const output = item.tool_call?.output || '';
|
|
62
|
+
if (output != null && output && typeof output === 'string') {
|
|
63
|
+
numTokens += getTokenCount(output);
|
|
64
|
+
}
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const nestedValue = item[item.type];
|
|
69
|
+
|
|
70
|
+
if (!nestedValue) {
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
processValue(nestedValue);
|
|
75
|
+
}
|
|
76
|
+
} else if (typeof value === 'string') {
|
|
77
|
+
numTokens += getTokenCount(value);
|
|
78
|
+
} else if (typeof value === 'number') {
|
|
79
|
+
numTokens += getTokenCount(value.toString());
|
|
80
|
+
} else if (typeof value === 'boolean') {
|
|
81
|
+
numTokens += getTokenCount(value.toString());
|
|
82
|
+
}
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
let numTokens = tokensPerMessage;
|
|
86
|
+
processValue(message.content);
|
|
87
|
+
return numTokens;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Creates a token counter function using the specified encoding.
|
|
92
|
+
* Lazily loads the encoding data on first use via dynamic import.
|
|
93
|
+
*/
|
|
94
|
+
export const createTokenCounter = async (
|
|
95
|
+
encoding: EncodingName = 'o200k_base'
|
|
96
|
+
): Promise<(message: BaseMessage) => number> => {
|
|
97
|
+
const tok = await getTokenizer(encoding);
|
|
98
|
+
const countTokens = (text: string): number => tok.count(text);
|
|
99
|
+
return (message: BaseMessage): number =>
|
|
100
|
+
getTokenCountForMessage(message, countTokens);
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
/** Utility to manage the token encoder lifecycle explicitly. */
|
|
104
|
+
export const TokenEncoderManager = {
|
|
105
|
+
async initialize(): Promise<void> {
|
|
106
|
+
// No-op: ai-tokenizer is synchronously initialized from bundled data.
|
|
107
|
+
},
|
|
108
|
+
|
|
109
|
+
reset(): void {
|
|
110
|
+
for (const key of Object.keys(tokenizers)) {
|
|
111
|
+
delete tokenizers[key as EncodingName];
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
isInitialized(): boolean {
|
|
116
|
+
return Object.keys(tokenizers).length > 0;
|
|
117
|
+
},
|
|
118
|
+
};
|