@riotprompt/riotprompt 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.kodrdriv-test-cache.json +6 -0
- package/README.md +2 -2
- package/dist/builder.js +3 -0
- package/dist/builder.js.map +1 -1
- package/dist/context-manager.d.ts +135 -0
- package/dist/context-manager.js +220 -0
- package/dist/context-manager.js.map +1 -0
- package/dist/conversation-logger.d.ts +283 -0
- package/dist/conversation-logger.js +454 -0
- package/dist/conversation-logger.js.map +1 -0
- package/dist/conversation.d.ts +271 -0
- package/dist/conversation.js +622 -0
- package/dist/conversation.js.map +1 -0
- package/dist/formatter.js.map +1 -1
- package/dist/iteration-strategy.d.ts +231 -0
- package/dist/iteration-strategy.js +486 -0
- package/dist/iteration-strategy.js.map +1 -0
- package/dist/loader.js +3 -0
- package/dist/loader.js.map +1 -1
- package/dist/message-builder.d.ts +156 -0
- package/dist/message-builder.js +254 -0
- package/dist/message-builder.js.map +1 -0
- package/dist/override.js +3 -0
- package/dist/override.js.map +1 -1
- package/dist/recipes.d.ts +42 -0
- package/dist/recipes.js +189 -4
- package/dist/recipes.js.map +1 -1
- package/dist/reflection.d.ts +250 -0
- package/dist/reflection.js +416 -0
- package/dist/reflection.js.map +1 -0
- package/dist/riotprompt.cjs +3549 -218
- package/dist/riotprompt.cjs.map +1 -1
- package/dist/riotprompt.d.ts +18 -2
- package/dist/riotprompt.js +9 -1
- package/dist/riotprompt.js.map +1 -1
- package/dist/token-budget.d.ts +177 -0
- package/dist/token-budget.js +404 -0
- package/dist/token-budget.js.map +1 -0
- package/dist/tools.d.ts +239 -0
- package/dist/tools.js +324 -0
- package/dist/tools.js.map +1 -0
- package/package.json +23 -20
package/dist/riotprompt.d.ts
CHANGED
|
@@ -13,7 +13,15 @@ export * as Loader from './loader';
|
|
|
13
13
|
export * as Override from './override';
|
|
14
14
|
export * as Builder from './builder';
|
|
15
15
|
export * as Recipes from './recipes';
|
|
16
|
-
export { cook, recipe, registerTemplates, getTemplates, clearTemplates } from './recipes';
|
|
16
|
+
export { cook, recipe, registerTemplates, getTemplates, clearTemplates, generateToolGuidance } from './recipes';
|
|
17
|
+
export { ConversationBuilder } from './conversation';
|
|
18
|
+
export { ContextManager } from './context-manager';
|
|
19
|
+
export { TokenCounter, TokenBudgetManager } from './token-budget';
|
|
20
|
+
export { MessageBuilder, MessageTemplates } from './message-builder';
|
|
21
|
+
export { ConversationLogger, ConversationReplayer } from './conversation-logger';
|
|
22
|
+
export { ToolRegistry } from './tools';
|
|
23
|
+
export { StrategyExecutor, IterationStrategyFactory } from './iteration-strategy';
|
|
24
|
+
export { MetricsCollector, ReflectionReportGenerator } from './reflection';
|
|
17
25
|
export type { Content } from './items/content';
|
|
18
26
|
export type { Context } from './items/context';
|
|
19
27
|
export type { Instruction } from './items/instruction';
|
|
@@ -25,4 +33,12 @@ export type { Prompt } from './prompt';
|
|
|
25
33
|
export type { FormatOptions, SectionSeparator, SectionTitleProperty } from './formatter';
|
|
26
34
|
export type { Model, Request } from './chat';
|
|
27
35
|
export type { Logger } from './logger';
|
|
28
|
-
export type { RecipeConfig, ContentItem, TemplateConfig } from './recipes';
|
|
36
|
+
export type { RecipeConfig, ContentItem, TemplateConfig, ToolGuidanceConfig } from './recipes';
|
|
37
|
+
export type { ConversationMessage, ConversationBuilderConfig, ConversationMetadata, ConversationState, InjectOptions, ToolCall } from './conversation';
|
|
38
|
+
export type { DynamicContentItem, TrackedContextItem, ContextStats } from './context-manager';
|
|
39
|
+
export type { TokenUsage, TokenBudgetConfig, CompressionStats, CompressionStrategy } from './token-budget';
|
|
40
|
+
export type { SemanticRole, MessageMetadata } from './message-builder';
|
|
41
|
+
export type { IterationStrategy, StrategyPhase, StrategyState, StrategyResult, StrategyContext, PhaseResult, ToolResult, LLMClient, ToolUsagePolicy, Insight } from './iteration-strategy';
|
|
42
|
+
export type { ReflectionReport, ReflectionConfig, AgenticExecutionMetrics, ToolExecutionMetric, ToolStats, Recommendation, ToolEffectivenessAnalysis, PerformanceInsights, QualityAssessment } from './reflection';
|
|
43
|
+
export type { LogConfig, LogFormat, LoggedConversation, ConversationLogMetadata, LoggedMessage, ToolCallLog, ConversationSummary, ReplayOptions, ReplayResult } from './conversation-logger';
|
|
44
|
+
export type { Tool, ToolParameter, ToolContext, ToolExample, ToolCost, OpenAITool, AnthropicTool, ToolDefinition, ToolUsageStats } from './tools';
|
package/dist/riotprompt.js
CHANGED
|
@@ -20,5 +20,13 @@ import * as builder from './builder.js';
|
|
|
20
20
|
export { builder as Builder };
|
|
21
21
|
import * as recipes from './recipes.js';
|
|
22
22
|
export { recipes as Recipes };
|
|
23
|
-
export { clearTemplates, cook, getTemplates, recipe, registerTemplates } from './recipes.js';
|
|
23
|
+
export { clearTemplates, cook, generateToolGuidance, getTemplates, recipe, registerTemplates } from './recipes.js';
|
|
24
|
+
export { ConversationBuilder } from './conversation.js';
|
|
25
|
+
export { ContextManager } from './context-manager.js';
|
|
26
|
+
export { TokenBudgetManager, TokenCounter } from './token-budget.js';
|
|
27
|
+
export { MessageBuilder, MessageTemplates } from './message-builder.js';
|
|
28
|
+
export { ConversationLogger, ConversationReplayer } from './conversation-logger.js';
|
|
29
|
+
export { ToolRegistry } from './tools.js';
|
|
30
|
+
export { IterationStrategyFactory, StrategyExecutor } from './iteration-strategy.js';
|
|
31
|
+
export { MetricsCollector, ReflectionReportGenerator } from './reflection.js';
|
|
24
32
|
//# sourceMappingURL=riotprompt.js.map
|
package/dist/riotprompt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"riotprompt.js","sources":[],"sourcesContent":[],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"riotprompt.js","sources":[],"sourcesContent":[],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import { ConversationMessage } from './conversation';
|
|
2
|
+
import { Model } from './chat';
|
|
3
|
+
/**
|
|
4
|
+
* Token usage information
|
|
5
|
+
*/
|
|
6
|
+
export interface TokenUsage {
|
|
7
|
+
used: number;
|
|
8
|
+
max: number;
|
|
9
|
+
remaining: number;
|
|
10
|
+
percentage: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Compression statistics
|
|
14
|
+
*/
|
|
15
|
+
export interface CompressionStats {
|
|
16
|
+
messagesBefore: number;
|
|
17
|
+
messagesAfter: number;
|
|
18
|
+
tokensBefore: number;
|
|
19
|
+
tokensAfter: number;
|
|
20
|
+
tokensSaved: number;
|
|
21
|
+
strategy: CompressionStrategy;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Compression strategy
|
|
25
|
+
*/
|
|
26
|
+
export type CompressionStrategy = 'priority-based' | 'fifo' | 'summarize' | 'adaptive';
|
|
27
|
+
/**
|
|
28
|
+
* Token budget configuration
|
|
29
|
+
*/
|
|
30
|
+
export interface TokenBudgetConfig {
|
|
31
|
+
max: number;
|
|
32
|
+
reserveForResponse: number;
|
|
33
|
+
warningThreshold?: number;
|
|
34
|
+
strategy: CompressionStrategy;
|
|
35
|
+
onBudgetExceeded: 'compress' | 'error' | 'warn' | 'truncate';
|
|
36
|
+
preserveRecent?: number;
|
|
37
|
+
preserveSystem?: boolean;
|
|
38
|
+
preserveHighPriority?: boolean;
|
|
39
|
+
onWarning?: (usage: TokenUsage) => void;
|
|
40
|
+
onCompression?: (stats: CompressionStats) => void;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* TokenCounter counts tokens using tiktoken for accurate model-specific counting.
|
|
44
|
+
*
|
|
45
|
+
* Features:
|
|
46
|
+
* - Model-specific token counting
|
|
47
|
+
* - Message overhead calculation
|
|
48
|
+
* - Tool call token estimation
|
|
49
|
+
* - Response token estimation
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* ```typescript
|
|
53
|
+
* const counter = new TokenCounter('gpt-4o');
|
|
54
|
+
*
|
|
55
|
+
* const tokens = counter.count('Hello, world!');
|
|
56
|
+
* console.log(`Text uses ${tokens} tokens`);
|
|
57
|
+
*
|
|
58
|
+
* const messageTokens = counter.countMessage({
|
|
59
|
+
* role: 'user',
|
|
60
|
+
* content: 'What is the weather?'
|
|
61
|
+
* });
|
|
62
|
+
* ```
|
|
63
|
+
*/
|
|
64
|
+
export declare class TokenCounter {
|
|
65
|
+
private encoder;
|
|
66
|
+
private model;
|
|
67
|
+
private logger;
|
|
68
|
+
constructor(model: Model, logger?: any);
|
|
69
|
+
/**
|
|
70
|
+
* Count tokens in text
|
|
71
|
+
*/
|
|
72
|
+
count(text: string): number;
|
|
73
|
+
/**
|
|
74
|
+
* Count tokens in a single message
|
|
75
|
+
*/
|
|
76
|
+
countMessage(message: ConversationMessage): number;
|
|
77
|
+
/**
|
|
78
|
+
* Count tokens in entire conversation
|
|
79
|
+
*/
|
|
80
|
+
countConversation(messages: ConversationMessage[]): number;
|
|
81
|
+
/**
|
|
82
|
+
* Count with additional overhead estimation
|
|
83
|
+
*/
|
|
84
|
+
countWithOverhead(messages: ConversationMessage[], includeToolOverhead?: boolean): number;
|
|
85
|
+
/**
|
|
86
|
+
* Estimate tokens needed for response
|
|
87
|
+
*/
|
|
88
|
+
estimateResponseTokens(messages: ConversationMessage[]): number;
|
|
89
|
+
/**
|
|
90
|
+
* Map RiotPrompt model to Tiktoken model
|
|
91
|
+
*/
|
|
92
|
+
private mapToTiktokenModel;
|
|
93
|
+
/**
|
|
94
|
+
* Free encoder resources
|
|
95
|
+
*/
|
|
96
|
+
dispose(): void;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* TokenBudgetManager manages token budgets and compression strategies.
|
|
100
|
+
*
|
|
101
|
+
* Features:
|
|
102
|
+
* - Monitor token usage
|
|
103
|
+
* - Automatic compression when budget exceeded
|
|
104
|
+
* - Multiple compression strategies
|
|
105
|
+
* - Priority-based message retention
|
|
106
|
+
* - Usage statistics and callbacks
|
|
107
|
+
*
|
|
108
|
+
* @example
|
|
109
|
+
* ```typescript
|
|
110
|
+
* const manager = new TokenBudgetManager({
|
|
111
|
+
* max: 8000,
|
|
112
|
+
* reserveForResponse: 1000,
|
|
113
|
+
* strategy: 'priority-based',
|
|
114
|
+
* onBudgetExceeded: 'compress'
|
|
115
|
+
* }, 'gpt-4o');
|
|
116
|
+
*
|
|
117
|
+
* // Check if message can be added
|
|
118
|
+
* if (manager.canAddMessage(message)) {
|
|
119
|
+
* messages.push(message);
|
|
120
|
+
* } else {
|
|
121
|
+
* // Compress conversation
|
|
122
|
+
* messages = manager.compress(messages);
|
|
123
|
+
* messages.push(message);
|
|
124
|
+
* }
|
|
125
|
+
* ```
|
|
126
|
+
*/
|
|
127
|
+
export declare class TokenBudgetManager {
|
|
128
|
+
private config;
|
|
129
|
+
private counter;
|
|
130
|
+
private logger;
|
|
131
|
+
constructor(config: TokenBudgetConfig, model: Model, logger?: any);
|
|
132
|
+
/**
|
|
133
|
+
* Get current token usage
|
|
134
|
+
*/
|
|
135
|
+
getCurrentUsage(messages: ConversationMessage[]): TokenUsage;
|
|
136
|
+
/**
|
|
137
|
+
* Get remaining tokens available
|
|
138
|
+
*/
|
|
139
|
+
getRemainingTokens(messages: ConversationMessage[]): number;
|
|
140
|
+
/**
|
|
141
|
+
* Check if near token limit
|
|
142
|
+
*/
|
|
143
|
+
isNearLimit(messages: ConversationMessage[], threshold?: number): boolean;
|
|
144
|
+
/**
|
|
145
|
+
* Check if a message can be added without exceeding budget
|
|
146
|
+
*/
|
|
147
|
+
canAddMessage(message: ConversationMessage, currentMessages: ConversationMessage[]): boolean;
|
|
148
|
+
/**
|
|
149
|
+
* Compress messages according to strategy
|
|
150
|
+
*/
|
|
151
|
+
compress(messages: ConversationMessage[]): ConversationMessage[];
|
|
152
|
+
/**
|
|
153
|
+
* Compress by priority (keep high-priority messages)
|
|
154
|
+
*/
|
|
155
|
+
private compressByPriority;
|
|
156
|
+
/**
|
|
157
|
+
* Compress using FIFO (remove oldest first)
|
|
158
|
+
*/
|
|
159
|
+
private compressFIFO;
|
|
160
|
+
/**
|
|
161
|
+
* Adaptive compression based on conversation phase
|
|
162
|
+
*/
|
|
163
|
+
private compressAdaptive;
|
|
164
|
+
/**
|
|
165
|
+
* Calculate message priority for compression
|
|
166
|
+
*/
|
|
167
|
+
private calculatePriority;
|
|
168
|
+
/**
|
|
169
|
+
* Truncate to exact number of messages
|
|
170
|
+
*/
|
|
171
|
+
truncate(messages: ConversationMessage[], maxMessages: number): ConversationMessage[];
|
|
172
|
+
/**
|
|
173
|
+
* Dispose resources
|
|
174
|
+
*/
|
|
175
|
+
dispose(): void;
|
|
176
|
+
}
|
|
177
|
+
export default TokenBudgetManager;
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
import { encoding_for_model } from 'tiktoken';
|
|
2
|
+
import { wrapLogger, DEFAULT_LOGGER } from './logger.js';
|
|
3
|
+
|
|
4
|
+
function _define_property(obj, key, value) {
|
|
5
|
+
if (key in obj) {
|
|
6
|
+
Object.defineProperty(obj, key, {
|
|
7
|
+
value: value,
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true
|
|
11
|
+
});
|
|
12
|
+
} else {
|
|
13
|
+
obj[key] = value;
|
|
14
|
+
}
|
|
15
|
+
return obj;
|
|
16
|
+
}
|
|
17
|
+
// ===== TOKEN COUNTER =====
|
|
18
|
+
/**
|
|
19
|
+
* TokenCounter counts tokens using tiktoken for accurate model-specific counting.
|
|
20
|
+
*
|
|
21
|
+
* Features:
|
|
22
|
+
* - Model-specific token counting
|
|
23
|
+
* - Message overhead calculation
|
|
24
|
+
* - Tool call token estimation
|
|
25
|
+
* - Response token estimation
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```typescript
|
|
29
|
+
* const counter = new TokenCounter('gpt-4o');
|
|
30
|
+
*
|
|
31
|
+
* const tokens = counter.count('Hello, world!');
|
|
32
|
+
* console.log(`Text uses ${tokens} tokens`);
|
|
33
|
+
*
|
|
34
|
+
* const messageTokens = counter.countMessage({
|
|
35
|
+
* role: 'user',
|
|
36
|
+
* content: 'What is the weather?'
|
|
37
|
+
* });
|
|
38
|
+
* ```
|
|
39
|
+
*/ class TokenCounter {
|
|
40
|
+
/**
|
|
41
|
+
* Count tokens in text
|
|
42
|
+
*/ count(text) {
|
|
43
|
+
if (!text) return 0;
|
|
44
|
+
return this.encoder.encode(text).length;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Count tokens in a single message
|
|
48
|
+
*/ countMessage(message) {
|
|
49
|
+
let tokens = 4; // Base overhead per message
|
|
50
|
+
// Content tokens
|
|
51
|
+
if (message.content) {
|
|
52
|
+
tokens += this.count(message.content);
|
|
53
|
+
}
|
|
54
|
+
// Role tokens
|
|
55
|
+
tokens += 1;
|
|
56
|
+
// Tool call tokens
|
|
57
|
+
if (message.tool_calls) {
|
|
58
|
+
for (const toolCall of message.tool_calls){
|
|
59
|
+
tokens += this.count(JSON.stringify(toolCall));
|
|
60
|
+
tokens += 3; // Tool call overhead
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// Tool result tokens
|
|
64
|
+
if (message.tool_call_id) {
|
|
65
|
+
tokens += this.count(message.tool_call_id);
|
|
66
|
+
tokens += 2; // Tool result overhead
|
|
67
|
+
}
|
|
68
|
+
return tokens;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Count tokens in entire conversation
|
|
72
|
+
*/ countConversation(messages) {
|
|
73
|
+
let total = 3; // Conversation start overhead
|
|
74
|
+
for (const message of messages){
|
|
75
|
+
total += this.countMessage(message);
|
|
76
|
+
}
|
|
77
|
+
return total;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Count with additional overhead estimation
|
|
81
|
+
*/ countWithOverhead(messages, includeToolOverhead = false) {
|
|
82
|
+
let total = this.countConversation(messages);
|
|
83
|
+
// Add tool definition overhead if tools are present
|
|
84
|
+
if (includeToolOverhead) {
|
|
85
|
+
const hasTools = messages.some((m)=>m.tool_calls && m.tool_calls.length > 0);
|
|
86
|
+
if (hasTools) {
|
|
87
|
+
total += 100; // Estimated tool definition overhead
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return total;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Estimate tokens needed for response
|
|
94
|
+
*/ estimateResponseTokens(messages) {
|
|
95
|
+
// Heuristic: average response is about 20% of input
|
|
96
|
+
const inputTokens = this.countConversation(messages);
|
|
97
|
+
return Math.max(500, Math.floor(inputTokens * 0.2));
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Map RiotPrompt model to Tiktoken model
|
|
101
|
+
*/ mapToTiktokenModel(model) {
|
|
102
|
+
switch(model){
|
|
103
|
+
case 'gpt-4o':
|
|
104
|
+
case 'gpt-4o-mini':
|
|
105
|
+
return 'gpt-4o';
|
|
106
|
+
case 'o1-preview':
|
|
107
|
+
case 'o1-mini':
|
|
108
|
+
case 'o1':
|
|
109
|
+
case 'o3-mini':
|
|
110
|
+
case 'o1-pro':
|
|
111
|
+
// O1 models use gpt-4o tokenization
|
|
112
|
+
return 'gpt-4o';
|
|
113
|
+
default:
|
|
114
|
+
return 'gpt-4o';
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Free encoder resources
|
|
119
|
+
*/ dispose() {
|
|
120
|
+
this.encoder.free();
|
|
121
|
+
}
|
|
122
|
+
constructor(model, logger){
|
|
123
|
+
_define_property(this, "encoder", void 0);
|
|
124
|
+
_define_property(this, "model", void 0);
|
|
125
|
+
_define_property(this, "logger", void 0);
|
|
126
|
+
this.model = model;
|
|
127
|
+
this.logger = wrapLogger(logger || DEFAULT_LOGGER, 'TokenCounter');
|
|
128
|
+
// Map RiotPrompt models to Tiktoken models
|
|
129
|
+
const tiktokenModel = this.mapToTiktokenModel(model);
|
|
130
|
+
this.encoder = encoding_for_model(tiktokenModel);
|
|
131
|
+
this.logger.debug('Created TokenCounter', {
|
|
132
|
+
model
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// ===== TOKEN BUDGET MANAGER =====
|
|
137
|
+
/**
|
|
138
|
+
* TokenBudgetManager manages token budgets and compression strategies.
|
|
139
|
+
*
|
|
140
|
+
* Features:
|
|
141
|
+
* - Monitor token usage
|
|
142
|
+
* - Automatic compression when budget exceeded
|
|
143
|
+
* - Multiple compression strategies
|
|
144
|
+
* - Priority-based message retention
|
|
145
|
+
* - Usage statistics and callbacks
|
|
146
|
+
*
|
|
147
|
+
* @example
|
|
148
|
+
* ```typescript
|
|
149
|
+
* const manager = new TokenBudgetManager({
|
|
150
|
+
* max: 8000,
|
|
151
|
+
* reserveForResponse: 1000,
|
|
152
|
+
* strategy: 'priority-based',
|
|
153
|
+
* onBudgetExceeded: 'compress'
|
|
154
|
+
* }, 'gpt-4o');
|
|
155
|
+
*
|
|
156
|
+
* // Check if message can be added
|
|
157
|
+
* if (manager.canAddMessage(message)) {
|
|
158
|
+
* messages.push(message);
|
|
159
|
+
* } else {
|
|
160
|
+
* // Compress conversation
|
|
161
|
+
* messages = manager.compress(messages);
|
|
162
|
+
* messages.push(message);
|
|
163
|
+
* }
|
|
164
|
+
* ```
|
|
165
|
+
*/ class TokenBudgetManager {
|
|
166
|
+
/**
|
|
167
|
+
* Get current token usage
|
|
168
|
+
*/ getCurrentUsage(messages) {
|
|
169
|
+
const used = this.counter.countConversation(messages);
|
|
170
|
+
const max = this.config.max;
|
|
171
|
+
const remaining = Math.max(0, max - used - this.config.reserveForResponse);
|
|
172
|
+
const percentage = used / max * 100;
|
|
173
|
+
return {
|
|
174
|
+
used,
|
|
175
|
+
max,
|
|
176
|
+
remaining,
|
|
177
|
+
percentage
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Get remaining tokens available
|
|
182
|
+
*/ getRemainingTokens(messages) {
|
|
183
|
+
return this.getCurrentUsage(messages).remaining;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Check if near token limit
|
|
187
|
+
*/ isNearLimit(messages, threshold) {
|
|
188
|
+
const usage = this.getCurrentUsage(messages);
|
|
189
|
+
const checkThreshold = threshold !== null && threshold !== void 0 ? threshold : this.config.warningThreshold;
|
|
190
|
+
const isNear = usage.percentage >= checkThreshold * 100;
|
|
191
|
+
if (isNear) {
|
|
192
|
+
var _this_config_onWarning, _this_config;
|
|
193
|
+
(_this_config_onWarning = (_this_config = this.config).onWarning) === null || _this_config_onWarning === void 0 ? void 0 : _this_config_onWarning.call(_this_config, usage);
|
|
194
|
+
}
|
|
195
|
+
return isNear;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Check if a message can be added without exceeding budget
|
|
199
|
+
*/ canAddMessage(message, currentMessages) {
|
|
200
|
+
const currentTokens = this.counter.countConversation(currentMessages);
|
|
201
|
+
const messageTokens = this.counter.countMessage(message);
|
|
202
|
+
const total = currentTokens + messageTokens + this.config.reserveForResponse;
|
|
203
|
+
return total <= this.config.max;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Compress messages according to strategy
|
|
207
|
+
*/ compress(messages) {
|
|
208
|
+
var _this_config_onCompression, _this_config;
|
|
209
|
+
const before = messages.length;
|
|
210
|
+
const tokensBefore = this.counter.countConversation(messages);
|
|
211
|
+
const targetTokens = this.config.max - this.config.reserveForResponse;
|
|
212
|
+
this.logger.debug('Compressing messages', {
|
|
213
|
+
before,
|
|
214
|
+
tokensBefore,
|
|
215
|
+
targetTokens,
|
|
216
|
+
strategy: this.config.strategy
|
|
217
|
+
});
|
|
218
|
+
// No compression needed
|
|
219
|
+
if (tokensBefore <= targetTokens) {
|
|
220
|
+
return messages;
|
|
221
|
+
}
|
|
222
|
+
let compressed;
|
|
223
|
+
switch(this.config.strategy){
|
|
224
|
+
case 'priority-based':
|
|
225
|
+
compressed = this.compressByPriority(messages, targetTokens);
|
|
226
|
+
break;
|
|
227
|
+
case 'fifo':
|
|
228
|
+
compressed = this.compressFIFO(messages, targetTokens);
|
|
229
|
+
break;
|
|
230
|
+
case 'adaptive':
|
|
231
|
+
compressed = this.compressAdaptive(messages, targetTokens);
|
|
232
|
+
break;
|
|
233
|
+
case 'summarize':
|
|
234
|
+
// For now, fall back to FIFO (summarization would require LLM call)
|
|
235
|
+
compressed = this.compressFIFO(messages, targetTokens);
|
|
236
|
+
break;
|
|
237
|
+
default:
|
|
238
|
+
compressed = this.compressFIFO(messages, targetTokens);
|
|
239
|
+
}
|
|
240
|
+
const tokensAfter = this.counter.countConversation(compressed);
|
|
241
|
+
const stats = {
|
|
242
|
+
messagesBefore: before,
|
|
243
|
+
messagesAfter: compressed.length,
|
|
244
|
+
tokensBefore,
|
|
245
|
+
tokensAfter,
|
|
246
|
+
tokensSaved: tokensBefore - tokensAfter,
|
|
247
|
+
strategy: this.config.strategy
|
|
248
|
+
};
|
|
249
|
+
(_this_config_onCompression = (_this_config = this.config).onCompression) === null || _this_config_onCompression === void 0 ? void 0 : _this_config_onCompression.call(_this_config, stats);
|
|
250
|
+
this.logger.info('Compressed conversation', stats);
|
|
251
|
+
return compressed;
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Compress by priority (keep high-priority messages)
|
|
255
|
+
*/ compressByPriority(messages, targetTokens) {
|
|
256
|
+
// Calculate priority for each message
|
|
257
|
+
const withPriority = messages.map((msg, idx)=>({
|
|
258
|
+
message: msg,
|
|
259
|
+
priority: this.calculatePriority(msg, idx, messages.length),
|
|
260
|
+
tokens: this.counter.countMessage(msg),
|
|
261
|
+
index: idx
|
|
262
|
+
}));
|
|
263
|
+
// Sort by priority (descending)
|
|
264
|
+
withPriority.sort((a, b)=>b.priority - a.priority);
|
|
265
|
+
// Keep highest priority messages that fit in budget
|
|
266
|
+
const kept = [];
|
|
267
|
+
let totalTokens = 0;
|
|
268
|
+
for (const item of withPriority){
|
|
269
|
+
if (totalTokens + item.tokens <= targetTokens) {
|
|
270
|
+
kept.push(item);
|
|
271
|
+
totalTokens += item.tokens;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// Sort back to original order
|
|
275
|
+
kept.sort((a, b)=>a.index - b.index);
|
|
276
|
+
return kept.map((item)=>item.message);
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Compress using FIFO (remove oldest first)
|
|
280
|
+
*/ compressFIFO(messages, targetTokens) {
|
|
281
|
+
var _this_config_preserveRecent;
|
|
282
|
+
const preserved = [];
|
|
283
|
+
let totalTokens = 0;
|
|
284
|
+
// Always preserve system messages if configured
|
|
285
|
+
const systemMessages = messages.filter((m)=>m.role === 'system');
|
|
286
|
+
if (this.config.preserveSystem) {
|
|
287
|
+
for (const msg of systemMessages){
|
|
288
|
+
preserved.push(msg);
|
|
289
|
+
totalTokens += this.counter.countMessage(msg);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
// Preserve recent messages
|
|
293
|
+
const recentCount = (_this_config_preserveRecent = this.config.preserveRecent) !== null && _this_config_preserveRecent !== void 0 ? _this_config_preserveRecent : 3;
|
|
294
|
+
const recentMessages = messages.slice(-recentCount).filter((m)=>m.role !== 'system');
|
|
295
|
+
for (const msg of recentMessages){
|
|
296
|
+
if (!preserved.includes(msg)) {
|
|
297
|
+
const tokens = this.counter.countMessage(msg);
|
|
298
|
+
if (totalTokens + tokens <= targetTokens) {
|
|
299
|
+
preserved.push(msg);
|
|
300
|
+
totalTokens += tokens;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
// Add older messages if space available
|
|
305
|
+
const otherMessages = messages.filter((m)=>!preserved.includes(m) && m.role !== 'system');
|
|
306
|
+
for(let i = otherMessages.length - 1; i >= 0; i--){
|
|
307
|
+
const msg = otherMessages[i];
|
|
308
|
+
const tokens = this.counter.countMessage(msg);
|
|
309
|
+
if (totalTokens + tokens <= targetTokens) {
|
|
310
|
+
preserved.unshift(msg);
|
|
311
|
+
totalTokens += tokens;
|
|
312
|
+
} else {
|
|
313
|
+
break;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Sort to maintain conversation order
|
|
317
|
+
return messages.filter((m)=>preserved.includes(m));
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Adaptive compression based on conversation phase
|
|
321
|
+
*/ compressAdaptive(messages, targetTokens) {
|
|
322
|
+
const messageCount = messages.length;
|
|
323
|
+
// Early phase: minimal compression (keep most messages)
|
|
324
|
+
if (messageCount <= 5) {
|
|
325
|
+
return this.compressFIFO(messages, targetTokens);
|
|
326
|
+
}
|
|
327
|
+
// Mid phase: moderate compression
|
|
328
|
+
if (messageCount <= 15) {
|
|
329
|
+
// Use FIFO but preserve more recent messages
|
|
330
|
+
const modifiedConfig = {
|
|
331
|
+
...this.config,
|
|
332
|
+
preserveRecent: 5
|
|
333
|
+
};
|
|
334
|
+
const tempManager = new TokenBudgetManager(modifiedConfig, 'gpt-4o', this.logger);
|
|
335
|
+
return tempManager.compressFIFO(messages, targetTokens);
|
|
336
|
+
}
|
|
337
|
+
// Late phase: aggressive compression (priority-based)
|
|
338
|
+
return this.compressByPriority(messages, targetTokens);
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Calculate message priority for compression
|
|
342
|
+
*/ calculatePriority(message, index, total) {
|
|
343
|
+
let priority = 1.0;
|
|
344
|
+
// System messages: highest priority
|
|
345
|
+
if (message.role === 'system') {
|
|
346
|
+
priority = 10.0;
|
|
347
|
+
}
|
|
348
|
+
// Recent messages: higher priority
|
|
349
|
+
const recencyBonus = index / total;
|
|
350
|
+
priority += recencyBonus * 2;
|
|
351
|
+
// Tool results: moderate priority
|
|
352
|
+
if (message.role === 'tool') {
|
|
353
|
+
priority += 0.5;
|
|
354
|
+
}
|
|
355
|
+
// Messages with tool calls: keep for context
|
|
356
|
+
if (message.tool_calls && message.tool_calls.length > 0) {
|
|
357
|
+
priority += 0.8;
|
|
358
|
+
}
|
|
359
|
+
return priority;
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Truncate to exact number of messages
|
|
363
|
+
*/ truncate(messages, maxMessages) {
|
|
364
|
+
if (messages.length <= maxMessages) {
|
|
365
|
+
return messages;
|
|
366
|
+
}
|
|
367
|
+
// Keep system messages + recent messages
|
|
368
|
+
const systemMessages = messages.filter((m)=>m.role === 'system');
|
|
369
|
+
const otherMessages = messages.filter((m)=>m.role !== 'system');
|
|
370
|
+
const recentOther = otherMessages.slice(-(maxMessages - systemMessages.length));
|
|
371
|
+
return [
|
|
372
|
+
...systemMessages,
|
|
373
|
+
...recentOther
|
|
374
|
+
];
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Dispose resources
|
|
378
|
+
*/ dispose() {
|
|
379
|
+
this.counter.dispose();
|
|
380
|
+
}
|
|
381
|
+
constructor(config, model, logger){
|
|
382
|
+
_define_property(this, "config", void 0);
|
|
383
|
+
_define_property(this, "counter", void 0);
|
|
384
|
+
_define_property(this, "logger", void 0);
|
|
385
|
+
this.config = {
|
|
386
|
+
warningThreshold: 0.8,
|
|
387
|
+
preserveRecent: 3,
|
|
388
|
+
preserveSystem: true,
|
|
389
|
+
preserveHighPriority: true,
|
|
390
|
+
onWarning: ()=>{},
|
|
391
|
+
onCompression: ()=>{},
|
|
392
|
+
...config
|
|
393
|
+
};
|
|
394
|
+
this.counter = new TokenCounter(model, logger);
|
|
395
|
+
this.logger = wrapLogger(logger || DEFAULT_LOGGER, 'TokenBudgetManager');
|
|
396
|
+
this.logger.debug('Created TokenBudgetManager', {
|
|
397
|
+
max: this.config.max,
|
|
398
|
+
strategy: this.config.strategy
|
|
399
|
+
});
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
export { TokenBudgetManager, TokenCounter, TokenBudgetManager as default };
|
|
404
|
+
//# sourceMappingURL=token-budget.js.map
|