llmist 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -75
- package/dist/{chunk-KORMY3CD.js → chunk-RZTAKIDE.js} +605 -4
- package/dist/chunk-RZTAKIDE.js.map +1 -0
- package/dist/{chunk-LELPPETT.js → chunk-TFIKR2RK.js} +459 -3
- package/dist/chunk-TFIKR2RK.js.map +1 -0
- package/dist/cli.cjs +628 -23
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +49 -22
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +769 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +354 -32
- package/dist/index.d.ts +354 -32
- package/dist/index.js +177 -2
- package/dist/index.js.map +1 -1
- package/dist/{mock-stream-DKF5yatf.d.cts → mock-stream-DNt-HBTn.d.cts} +525 -79
- package/dist/{mock-stream-DKF5yatf.d.ts → mock-stream-DNt-HBTn.d.ts} +525 -79
- package/dist/testing/index.cjs +1063 -4
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +437 -3
- package/dist/testing/index.d.ts +437 -3
- package/dist/testing/index.js +54 -4
- package/package.json +1 -1
- package/dist/chunk-KORMY3CD.js.map +0 -1
- package/dist/chunk-LELPPETT.js.map +0 -1
|
@@ -1,5 +1,319 @@
|
|
|
1
|
-
import { Logger, ILogObj } from 'tslog';
|
|
2
1
|
import { ZodTypeAny } from 'zod';
|
|
2
|
+
import { Logger, ILogObj } from 'tslog';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Model Catalog Types
|
|
6
|
+
*
|
|
7
|
+
* Type definitions for LLM model specifications including
|
|
8
|
+
* context windows, pricing, features, and capabilities.
|
|
9
|
+
*/
|
|
10
|
+
interface ModelPricing {
|
|
11
|
+
/** Price per 1 million input tokens in USD */
|
|
12
|
+
input: number;
|
|
13
|
+
/** Price per 1 million output tokens in USD */
|
|
14
|
+
output: number;
|
|
15
|
+
/** Price per 1 million cached input tokens in USD (if supported) */
|
|
16
|
+
cachedInput?: number;
|
|
17
|
+
/** Price per 1 million cache write tokens in USD (Anthropic: 1.25x input price) */
|
|
18
|
+
cacheWriteInput?: number;
|
|
19
|
+
}
|
|
20
|
+
interface ModelFeatures {
|
|
21
|
+
/** Supports streaming responses */
|
|
22
|
+
streaming: boolean;
|
|
23
|
+
/** Supports function/tool calling */
|
|
24
|
+
functionCalling: boolean;
|
|
25
|
+
/** Supports vision/image input */
|
|
26
|
+
vision: boolean;
|
|
27
|
+
/** Supports extended thinking/reasoning */
|
|
28
|
+
reasoning?: boolean;
|
|
29
|
+
/** Supports structured outputs */
|
|
30
|
+
structuredOutputs?: boolean;
|
|
31
|
+
/** Supports fine-tuning */
|
|
32
|
+
fineTuning?: boolean;
|
|
33
|
+
}
|
|
34
|
+
interface ModelSpec {
|
|
35
|
+
/** Provider identifier (e.g., 'openai', 'anthropic', 'gemini') */
|
|
36
|
+
provider: string;
|
|
37
|
+
/** Full model identifier used in API calls */
|
|
38
|
+
modelId: string;
|
|
39
|
+
/** Human-readable display name */
|
|
40
|
+
displayName: string;
|
|
41
|
+
/** Maximum context window size in tokens */
|
|
42
|
+
contextWindow: number;
|
|
43
|
+
/** Maximum output tokens per request */
|
|
44
|
+
maxOutputTokens: number;
|
|
45
|
+
/** Pricing per 1M tokens */
|
|
46
|
+
pricing: ModelPricing;
|
|
47
|
+
/** Training data knowledge cutoff date (YYYY-MM-DD or description) */
|
|
48
|
+
knowledgeCutoff: string;
|
|
49
|
+
/** Supported features and capabilities */
|
|
50
|
+
features: ModelFeatures;
|
|
51
|
+
/** Additional metadata */
|
|
52
|
+
metadata?: {
|
|
53
|
+
/** Model family/series */
|
|
54
|
+
family?: string;
|
|
55
|
+
/** Release date */
|
|
56
|
+
releaseDate?: string;
|
|
57
|
+
/** Deprecation date if applicable */
|
|
58
|
+
deprecationDate?: string;
|
|
59
|
+
/** Notes or special information */
|
|
60
|
+
notes?: string;
|
|
61
|
+
/** Whether manual temperature configuration is supported (defaults to true) */
|
|
62
|
+
supportsTemperature?: boolean;
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
interface ModelLimits {
|
|
66
|
+
contextWindow: number;
|
|
67
|
+
maxOutputTokens: number;
|
|
68
|
+
}
|
|
69
|
+
interface CostEstimate {
|
|
70
|
+
inputCost: number;
|
|
71
|
+
/** Cost for cached input tokens (already included in inputCost calculation) */
|
|
72
|
+
cachedInputCost: number;
|
|
73
|
+
/** Cost for cache creation tokens (already included in inputCost calculation, Anthropic only) */
|
|
74
|
+
cacheCreationCost: number;
|
|
75
|
+
outputCost: number;
|
|
76
|
+
totalCost: number;
|
|
77
|
+
currency: "USD";
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Strategy interface for context compaction.
|
|
82
|
+
*
|
|
83
|
+
* Strategies define how conversation history is compressed to fit within
|
|
84
|
+
* context window limits. Different strategies trade off between:
|
|
85
|
+
* - Speed (LLM calls vs local processing)
|
|
86
|
+
* - Context preservation (summary quality vs simple truncation)
|
|
87
|
+
* - Cost (summarization model usage)
|
|
88
|
+
*/
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Context provided to compaction strategies.
|
|
92
|
+
*/
|
|
93
|
+
interface CompactionContext {
|
|
94
|
+
/** Current token count of the conversation */
|
|
95
|
+
currentTokens: number;
|
|
96
|
+
/** Target token count after compaction */
|
|
97
|
+
targetTokens: number;
|
|
98
|
+
/** Model's context window limits */
|
|
99
|
+
modelLimits: ModelLimits;
|
|
100
|
+
/** LLMist client for summarization calls */
|
|
101
|
+
client: LLMist;
|
|
102
|
+
/** Model identifier for token counting and summarization */
|
|
103
|
+
model: string;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Result of a compaction operation.
|
|
107
|
+
*/
|
|
108
|
+
interface CompactionResult {
|
|
109
|
+
/** Compacted messages to replace history with */
|
|
110
|
+
messages: LLMMessage[];
|
|
111
|
+
/** Summary text if summarization was used */
|
|
112
|
+
summary?: string;
|
|
113
|
+
/** The name of the strategy that was ultimately executed */
|
|
114
|
+
strategyName: string;
|
|
115
|
+
/** Metadata about the compaction */
|
|
116
|
+
metadata: {
|
|
117
|
+
/** Number of messages before compaction */
|
|
118
|
+
originalCount: number;
|
|
119
|
+
/** Number of messages after compaction */
|
|
120
|
+
compactedCount: number;
|
|
121
|
+
/** Estimated tokens before compaction */
|
|
122
|
+
tokensBefore: number;
|
|
123
|
+
/** Estimated tokens after compaction */
|
|
124
|
+
tokensAfter: number;
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Interface for compaction strategy implementations.
|
|
129
|
+
*
|
|
130
|
+
* Strategies receive the conversation history (excluding base messages like
|
|
131
|
+
* system prompt and gadget instructions) and must return a compacted version.
|
|
132
|
+
*
|
|
133
|
+
* @example
|
|
134
|
+
* ```typescript
|
|
135
|
+
* class MyCustomStrategy implements CompactionStrategy {
|
|
136
|
+
* readonly name = 'my-custom';
|
|
137
|
+
*
|
|
138
|
+
* async compact(
|
|
139
|
+
* messages: LLMMessage[],
|
|
140
|
+
* config: ResolvedCompactionConfig,
|
|
141
|
+
* context: CompactionContext
|
|
142
|
+
* ): Promise<CompactionResult> {
|
|
143
|
+
* // Custom compaction logic
|
|
144
|
+
* return {
|
|
145
|
+
* messages: compactedMessages,
|
|
146
|
+
* metadata: { ... }
|
|
147
|
+
* };
|
|
148
|
+
* }
|
|
149
|
+
* }
|
|
150
|
+
* ```
|
|
151
|
+
*/
|
|
152
|
+
interface CompactionStrategy {
|
|
153
|
+
/** Human-readable name of the strategy */
|
|
154
|
+
readonly name: string;
|
|
155
|
+
/**
|
|
156
|
+
* Compact the given messages to fit within target token count.
|
|
157
|
+
*
|
|
158
|
+
* @param messages - Conversation history messages (excludes system/gadget base)
|
|
159
|
+
* @param config - Resolved compaction configuration
|
|
160
|
+
* @param context - Context including token counts and LLM client
|
|
161
|
+
* @returns Compacted messages with metadata
|
|
162
|
+
*/
|
|
163
|
+
compact(messages: LLMMessage[], config: ResolvedCompactionConfig, context: CompactionContext): Promise<CompactionResult>;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Utility to group messages into logical conversation turns.
|
|
167
|
+
*
|
|
168
|
+
* A "turn" is typically a user message followed by an assistant response.
|
|
169
|
+
* Gadget calls are grouped with the preceding assistant message.
|
|
170
|
+
*/
|
|
171
|
+
interface MessageTurn {
|
|
172
|
+
/** Messages in this turn (user + assistant + any gadget results) */
|
|
173
|
+
messages: LLMMessage[];
|
|
174
|
+
/** Estimated token count for this turn */
|
|
175
|
+
tokenEstimate: number;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Configuration types for the context compaction system.
|
|
180
|
+
*
|
|
181
|
+
* Context compaction automatically manages conversation history to prevent
|
|
182
|
+
* context window overflow in long-running agent conversations.
|
|
183
|
+
*/
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Event emitted when compaction occurs.
|
|
187
|
+
* This is included in StreamEvent for UI visibility.
|
|
188
|
+
*/
|
|
189
|
+
interface CompactionEvent {
|
|
190
|
+
/** The strategy that performed the compaction */
|
|
191
|
+
strategy: string;
|
|
192
|
+
/** Token count before compaction */
|
|
193
|
+
tokensBefore: number;
|
|
194
|
+
/** Token count after compaction */
|
|
195
|
+
tokensAfter: number;
|
|
196
|
+
/** Number of messages before compaction */
|
|
197
|
+
messagesBefore: number;
|
|
198
|
+
/** Number of messages after compaction */
|
|
199
|
+
messagesAfter: number;
|
|
200
|
+
/** Summary text if summarization was used */
|
|
201
|
+
summary?: string;
|
|
202
|
+
/** Agent iteration when compaction occurred */
|
|
203
|
+
iteration: number;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Statistics about compaction activity.
|
|
207
|
+
*/
|
|
208
|
+
interface CompactionStats {
|
|
209
|
+
/** Total number of compactions performed */
|
|
210
|
+
totalCompactions: number;
|
|
211
|
+
/** Total tokens saved across all compactions */
|
|
212
|
+
totalTokensSaved: number;
|
|
213
|
+
/** Current context usage */
|
|
214
|
+
currentUsage: {
|
|
215
|
+
tokens: number;
|
|
216
|
+
percent: number;
|
|
217
|
+
};
|
|
218
|
+
/** Model's context window size */
|
|
219
|
+
contextWindow: number;
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Configuration for the context compaction system.
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```typescript
|
|
226
|
+
* // Custom configuration
|
|
227
|
+
* const agent = await LLMist.createAgent()
|
|
228
|
+
* .withModel('sonnet')
|
|
229
|
+
* .withCompaction({
|
|
230
|
+
* triggerThresholdPercent: 70,
|
|
231
|
+
* targetPercent: 40,
|
|
232
|
+
* preserveRecentTurns: 10,
|
|
233
|
+
* })
|
|
234
|
+
* .ask('...');
|
|
235
|
+
*
|
|
236
|
+
* // Disable compaction
|
|
237
|
+
* const agent = await LLMist.createAgent()
|
|
238
|
+
* .withModel('sonnet')
|
|
239
|
+
* .withoutCompaction()
|
|
240
|
+
* .ask('...');
|
|
241
|
+
* ```
|
|
242
|
+
*/
|
|
243
|
+
interface CompactionConfig {
|
|
244
|
+
/**
|
|
245
|
+
* Enable or disable compaction.
|
|
246
|
+
* @default true
|
|
247
|
+
*/
|
|
248
|
+
enabled?: boolean;
|
|
249
|
+
/**
|
|
250
|
+
* The compaction strategy to use.
|
|
251
|
+
* - 'sliding-window': Fast, drops oldest turns (no LLM call)
|
|
252
|
+
* - 'summarization': LLM-based compression of old messages
|
|
253
|
+
* - 'hybrid': Summarizes old messages + keeps recent turns (recommended)
|
|
254
|
+
* - Or provide a custom CompactionStrategy instance
|
|
255
|
+
* @default 'hybrid'
|
|
256
|
+
*/
|
|
257
|
+
strategy?: "sliding-window" | "summarization" | "hybrid" | CompactionStrategy;
|
|
258
|
+
/**
|
|
259
|
+
* Context usage percentage that triggers compaction.
|
|
260
|
+
* When token count exceeds this percentage of the context window,
|
|
261
|
+
* compaction is performed before the next LLM call.
|
|
262
|
+
* @default 80
|
|
263
|
+
*/
|
|
264
|
+
triggerThresholdPercent?: number;
|
|
265
|
+
/**
|
|
266
|
+
* Target context usage percentage after compaction.
|
|
267
|
+
* The compaction will aim to reduce tokens to this percentage.
|
|
268
|
+
* @default 50
|
|
269
|
+
*/
|
|
270
|
+
targetPercent?: number;
|
|
271
|
+
/**
|
|
272
|
+
* Number of recent turns to preserve during compaction.
|
|
273
|
+
* A "turn" is a user message + assistant response pair.
|
|
274
|
+
* Recent turns are kept verbatim while older ones are summarized/dropped.
|
|
275
|
+
* @default 5
|
|
276
|
+
*/
|
|
277
|
+
preserveRecentTurns?: number;
|
|
278
|
+
/**
|
|
279
|
+
* Model to use for summarization.
|
|
280
|
+
* If not specified, uses the agent's model.
|
|
281
|
+
* @default undefined (uses agent's model)
|
|
282
|
+
*/
|
|
283
|
+
summarizationModel?: string;
|
|
284
|
+
/**
|
|
285
|
+
* Custom system prompt for summarization.
|
|
286
|
+
* If not specified, uses a default prompt optimized for context preservation.
|
|
287
|
+
*/
|
|
288
|
+
summarizationPrompt?: string;
|
|
289
|
+
/**
|
|
290
|
+
* Callback invoked when compaction occurs.
|
|
291
|
+
* Useful for logging or analytics.
|
|
292
|
+
*/
|
|
293
|
+
onCompaction?: (event: CompactionEvent) => void;
|
|
294
|
+
}
|
|
295
|
+
/**
|
|
296
|
+
* Default configuration values for compaction.
|
|
297
|
+
* Compaction is enabled by default with the hybrid strategy.
|
|
298
|
+
*/
|
|
299
|
+
declare const DEFAULT_COMPACTION_CONFIG: Required<Omit<CompactionConfig, "summarizationModel" | "summarizationPrompt" | "onCompaction">>;
|
|
300
|
+
/**
|
|
301
|
+
* Default prompt used for summarization strategy.
|
|
302
|
+
*/
|
|
303
|
+
declare const DEFAULT_SUMMARIZATION_PROMPT = "Summarize this conversation history concisely, preserving:\n1. Key decisions made and their rationale\n2. Important facts and data discovered\n3. Errors encountered and how they were resolved\n4. Current task context and goals\n\nFormat as a brief narrative paragraph, not bullet points.\nPrevious conversation:";
|
|
304
|
+
/**
|
|
305
|
+
* Resolved configuration with all defaults applied.
|
|
306
|
+
*/
|
|
307
|
+
interface ResolvedCompactionConfig {
|
|
308
|
+
enabled: boolean;
|
|
309
|
+
strategy: "sliding-window" | "summarization" | "hybrid";
|
|
310
|
+
triggerThresholdPercent: number;
|
|
311
|
+
targetPercent: number;
|
|
312
|
+
preserveRecentTurns: number;
|
|
313
|
+
summarizationModel?: string;
|
|
314
|
+
summarizationPrompt: string;
|
|
315
|
+
onCompaction?: (event: CompactionEvent) => void;
|
|
316
|
+
}
|
|
3
317
|
|
|
4
318
|
/**
|
|
5
319
|
* Example of gadget usage to help LLMs understand proper invocation.
|
|
@@ -44,6 +358,7 @@ interface ParsedGadgetCall {
|
|
|
44
358
|
parameters?: Record<string, unknown>;
|
|
45
359
|
parseError?: string;
|
|
46
360
|
}
|
|
361
|
+
|
|
47
362
|
type StreamEvent = {
|
|
48
363
|
type: "text";
|
|
49
364
|
content: string;
|
|
@@ -58,6 +373,9 @@ type StreamEvent = {
|
|
|
58
373
|
question: string;
|
|
59
374
|
gadgetName: string;
|
|
60
375
|
invocationId: string;
|
|
376
|
+
} | {
|
|
377
|
+
type: "compaction";
|
|
378
|
+
event: CompactionEvent;
|
|
61
379
|
};
|
|
62
380
|
|
|
63
381
|
type TextOnlyHandler = TextOnlyStrategy | TextOnlyGadgetConfig | TextOnlyCustomHandler;
|
|
@@ -193,10 +511,27 @@ interface PromptContext {
|
|
|
193
511
|
/** Names of all gadgets */
|
|
194
512
|
gadgetNames: string[];
|
|
195
513
|
}
|
|
514
|
+
/**
|
|
515
|
+
* Context provided to hint template functions for rendering dynamic hints.
|
|
516
|
+
*/
|
|
517
|
+
interface HintContext {
|
|
518
|
+
/** Current iteration (1-based for readability) */
|
|
519
|
+
iteration: number;
|
|
520
|
+
/** Maximum iterations allowed */
|
|
521
|
+
maxIterations: number;
|
|
522
|
+
/** Iterations remaining (maxIterations - iteration) */
|
|
523
|
+
remaining: number;
|
|
524
|
+
/** Number of gadget calls in the current response */
|
|
525
|
+
gadgetCallCount?: number;
|
|
526
|
+
}
|
|
196
527
|
/**
|
|
197
528
|
* Template that can be either a static string or a function that renders based on context.
|
|
198
529
|
*/
|
|
199
530
|
type PromptTemplate = string | ((context: PromptContext) => string);
|
|
531
|
+
/**
|
|
532
|
+
* Template for hints that can be either a static string or a function that renders based on hint context.
|
|
533
|
+
*/
|
|
534
|
+
type HintTemplate = string | ((context: HintContext) => string);
|
|
200
535
|
/**
|
|
201
536
|
* Configuration for customizing all prompts used internally by llmist.
|
|
202
537
|
*
|
|
@@ -244,11 +579,33 @@ interface PromptConfig {
|
|
|
244
579
|
* Should be a function that returns formatted example strings.
|
|
245
580
|
*/
|
|
246
581
|
customExamples?: (context: PromptContext) => string;
|
|
582
|
+
/**
|
|
583
|
+
* Hint shown when LLM uses only one gadget per response.
|
|
584
|
+
* Encourages parallel gadget usage for efficiency.
|
|
585
|
+
*/
|
|
586
|
+
parallelGadgetsHint?: HintTemplate;
|
|
587
|
+
/**
|
|
588
|
+
* Template for iteration progress hint.
|
|
589
|
+
* Informs the LLM about remaining iterations to help plan work.
|
|
590
|
+
*
|
|
591
|
+
* When using a string template, supports placeholders:
|
|
592
|
+
* - {iteration}: Current iteration (1-based)
|
|
593
|
+
* - {maxIterations}: Maximum iterations allowed
|
|
594
|
+
* - {remaining}: Iterations remaining
|
|
595
|
+
*/
|
|
596
|
+
iterationProgressHint?: HintTemplate;
|
|
247
597
|
}
|
|
598
|
+
/**
|
|
599
|
+
* Default hint templates used by llmist.
|
|
600
|
+
*/
|
|
601
|
+
declare const DEFAULT_HINTS: {
|
|
602
|
+
readonly parallelGadgetsHint: "Tip: You can call multiple gadgets in a single response for efficiency.";
|
|
603
|
+
readonly iterationProgressHint: "[Iteration {iteration}/{maxIterations}] Plan your actions accordingly.";
|
|
604
|
+
};
|
|
248
605
|
/**
|
|
249
606
|
* Default prompt templates used by llmist.
|
|
250
607
|
*/
|
|
251
|
-
declare const DEFAULT_PROMPTS: Required<Omit<PromptConfig, "rules" | "customExamples"> & {
|
|
608
|
+
declare const DEFAULT_PROMPTS: Required<Omit<PromptConfig, "rules" | "customExamples" | "parallelGadgetsHint" | "iterationProgressHint"> & {
|
|
252
609
|
rules: (context: PromptContext) => string[];
|
|
253
610
|
customExamples: null;
|
|
254
611
|
}>;
|
|
@@ -260,6 +617,16 @@ declare function resolvePromptTemplate(template: PromptTemplate | undefined, def
|
|
|
260
617
|
* Resolve rules template to an array of strings.
|
|
261
618
|
*/
|
|
262
619
|
declare function resolveRulesTemplate(rules: PromptConfig["rules"] | undefined, context: PromptContext): string[];
|
|
620
|
+
/**
|
|
621
|
+
* Resolve a hint template to a string using the given context.
|
|
622
|
+
* Supports both function templates and string templates with placeholders.
|
|
623
|
+
*
|
|
624
|
+
* @param template - The hint template to resolve
|
|
625
|
+
* @param defaultValue - Default value if template is undefined
|
|
626
|
+
* @param context - Context for rendering the template
|
|
627
|
+
* @returns The resolved hint string
|
|
628
|
+
*/
|
|
629
|
+
declare function resolveHintTemplate(template: HintTemplate | undefined, defaultValue: string, context: HintContext): string;
|
|
263
630
|
|
|
264
631
|
type LLMRole = "system" | "user" | "assistant";
|
|
265
632
|
interface LLMMessage {
|
|
@@ -301,82 +668,6 @@ declare class LLMMessageBuilder {
|
|
|
301
668
|
build(): LLMMessage[];
|
|
302
669
|
}
|
|
303
670
|
|
|
304
|
-
/**
|
|
305
|
-
* Model Catalog Types
|
|
306
|
-
*
|
|
307
|
-
* Type definitions for LLM model specifications including
|
|
308
|
-
* context windows, pricing, features, and capabilities.
|
|
309
|
-
*/
|
|
310
|
-
interface ModelPricing {
|
|
311
|
-
/** Price per 1 million input tokens in USD */
|
|
312
|
-
input: number;
|
|
313
|
-
/** Price per 1 million output tokens in USD */
|
|
314
|
-
output: number;
|
|
315
|
-
/** Price per 1 million cached input tokens in USD (if supported) */
|
|
316
|
-
cachedInput?: number;
|
|
317
|
-
/** Price per 1 million cache write tokens in USD (Anthropic: 1.25x input price) */
|
|
318
|
-
cacheWriteInput?: number;
|
|
319
|
-
}
|
|
320
|
-
interface ModelFeatures {
|
|
321
|
-
/** Supports streaming responses */
|
|
322
|
-
streaming: boolean;
|
|
323
|
-
/** Supports function/tool calling */
|
|
324
|
-
functionCalling: boolean;
|
|
325
|
-
/** Supports vision/image input */
|
|
326
|
-
vision: boolean;
|
|
327
|
-
/** Supports extended thinking/reasoning */
|
|
328
|
-
reasoning?: boolean;
|
|
329
|
-
/** Supports structured outputs */
|
|
330
|
-
structuredOutputs?: boolean;
|
|
331
|
-
/** Supports fine-tuning */
|
|
332
|
-
fineTuning?: boolean;
|
|
333
|
-
}
|
|
334
|
-
interface ModelSpec {
|
|
335
|
-
/** Provider identifier (e.g., 'openai', 'anthropic', 'gemini') */
|
|
336
|
-
provider: string;
|
|
337
|
-
/** Full model identifier used in API calls */
|
|
338
|
-
modelId: string;
|
|
339
|
-
/** Human-readable display name */
|
|
340
|
-
displayName: string;
|
|
341
|
-
/** Maximum context window size in tokens */
|
|
342
|
-
contextWindow: number;
|
|
343
|
-
/** Maximum output tokens per request */
|
|
344
|
-
maxOutputTokens: number;
|
|
345
|
-
/** Pricing per 1M tokens */
|
|
346
|
-
pricing: ModelPricing;
|
|
347
|
-
/** Training data knowledge cutoff date (YYYY-MM-DD or description) */
|
|
348
|
-
knowledgeCutoff: string;
|
|
349
|
-
/** Supported features and capabilities */
|
|
350
|
-
features: ModelFeatures;
|
|
351
|
-
/** Additional metadata */
|
|
352
|
-
metadata?: {
|
|
353
|
-
/** Model family/series */
|
|
354
|
-
family?: string;
|
|
355
|
-
/** Release date */
|
|
356
|
-
releaseDate?: string;
|
|
357
|
-
/** Deprecation date if applicable */
|
|
358
|
-
deprecationDate?: string;
|
|
359
|
-
/** Notes or special information */
|
|
360
|
-
notes?: string;
|
|
361
|
-
/** Whether manual temperature configuration is supported (defaults to true) */
|
|
362
|
-
supportsTemperature?: boolean;
|
|
363
|
-
};
|
|
364
|
-
}
|
|
365
|
-
interface ModelLimits {
|
|
366
|
-
contextWindow: number;
|
|
367
|
-
maxOutputTokens: number;
|
|
368
|
-
}
|
|
369
|
-
interface CostEstimate {
|
|
370
|
-
inputCost: number;
|
|
371
|
-
/** Cost for cached input tokens (already included in inputCost calculation) */
|
|
372
|
-
cachedInputCost: number;
|
|
373
|
-
/** Cost for cache creation tokens (already included in inputCost calculation, Anthropic only) */
|
|
374
|
-
cacheCreationCost: number;
|
|
375
|
-
outputCost: number;
|
|
376
|
-
totalCost: number;
|
|
377
|
-
currency: "USD";
|
|
378
|
-
}
|
|
379
|
-
|
|
380
671
|
interface LLMGenerationOptions {
|
|
381
672
|
model: string;
|
|
382
673
|
messages: LLMMessage[];
|
|
@@ -1213,6 +1504,22 @@ interface Observers {
|
|
|
1213
1504
|
onGadgetExecutionComplete?: (context: ObserveGadgetCompleteContext) => void | Promise<void>;
|
|
1214
1505
|
/** Called for each stream chunk */
|
|
1215
1506
|
onStreamChunk?: (context: ObserveChunkContext) => void | Promise<void>;
|
|
1507
|
+
/** Called when context compaction occurs */
|
|
1508
|
+
onCompaction?: (context: ObserveCompactionContext) => void | Promise<void>;
|
|
1509
|
+
}
|
|
1510
|
+
/**
|
|
1511
|
+
* Context provided when context compaction occurs.
|
|
1512
|
+
* Read-only observation point.
|
|
1513
|
+
*/
|
|
1514
|
+
interface ObserveCompactionContext {
|
|
1515
|
+
/** Agent iteration when compaction occurred */
|
|
1516
|
+
iteration: number;
|
|
1517
|
+
/** Details of the compaction event */
|
|
1518
|
+
event: CompactionEvent;
|
|
1519
|
+
/** Cumulative compaction statistics */
|
|
1520
|
+
stats: CompactionStats;
|
|
1521
|
+
/** Logger instance */
|
|
1522
|
+
logger: Logger<ILogObj>;
|
|
1216
1523
|
}
|
|
1217
1524
|
/**
|
|
1218
1525
|
* Context for chunk interception.
|
|
@@ -1311,6 +1618,8 @@ interface Interceptors {
|
|
|
1311
1618
|
*/
|
|
1312
1619
|
interface LLMCallControllerContext {
|
|
1313
1620
|
iteration: number;
|
|
1621
|
+
/** Maximum iterations configured for the agent */
|
|
1622
|
+
maxIterations: number;
|
|
1314
1623
|
options: LLMGenerationOptions;
|
|
1315
1624
|
logger: Logger<ILogObj>;
|
|
1316
1625
|
}
|
|
@@ -1329,12 +1638,16 @@ type BeforeLLMCallAction = {
|
|
|
1329
1638
|
*/
|
|
1330
1639
|
interface AfterLLMCallControllerContext {
|
|
1331
1640
|
iteration: number;
|
|
1641
|
+
/** Maximum iterations configured for the agent */
|
|
1642
|
+
maxIterations: number;
|
|
1332
1643
|
options: Readonly<LLMGenerationOptions>;
|
|
1333
1644
|
finishReason: string | null;
|
|
1334
1645
|
/** Token usage including cached token counts when available */
|
|
1335
1646
|
usage?: TokenUsage;
|
|
1336
1647
|
/** The final message (after interceptors) that will be added to history */
|
|
1337
1648
|
finalMessage: string;
|
|
1649
|
+
/** Number of gadget calls in the current response */
|
|
1650
|
+
gadgetCallCount: number;
|
|
1338
1651
|
logger: Logger<ILogObj>;
|
|
1339
1652
|
}
|
|
1340
1653
|
/**
|
|
@@ -1535,6 +1848,8 @@ interface AgentOptions {
|
|
|
1535
1848
|
gadgetOutputLimit?: boolean;
|
|
1536
1849
|
/** Max gadget output as % of model context window (default: 15) */
|
|
1537
1850
|
gadgetOutputLimitPercent?: number;
|
|
1851
|
+
/** Context compaction configuration (enabled by default) */
|
|
1852
|
+
compactionConfig?: CompactionConfig;
|
|
1538
1853
|
}
|
|
1539
1854
|
/**
|
|
1540
1855
|
* Agent: Lean orchestrator that delegates to StreamProcessor.
|
|
@@ -1574,6 +1889,7 @@ declare class Agent {
|
|
|
1574
1889
|
private readonly outputStore;
|
|
1575
1890
|
private readonly outputLimitEnabled;
|
|
1576
1891
|
private readonly outputLimitCharLimit;
|
|
1892
|
+
private readonly compactionManager?;
|
|
1577
1893
|
/**
|
|
1578
1894
|
* Creates a new Agent instance.
|
|
1579
1895
|
* @internal This constructor is private. Use LLMist.createAgent() or AgentBuilder instead.
|
|
@@ -1598,6 +1914,46 @@ declare class Agent {
|
|
|
1598
1914
|
* ```
|
|
1599
1915
|
*/
|
|
1600
1916
|
getRegistry(): GadgetRegistry;
|
|
1917
|
+
/**
|
|
1918
|
+
* Manually trigger context compaction.
|
|
1919
|
+
*
|
|
1920
|
+
* Forces compaction regardless of threshold. Useful for:
|
|
1921
|
+
* - Pre-emptive context management before expected long operations
|
|
1922
|
+
* - Testing compaction behavior
|
|
1923
|
+
*
|
|
1924
|
+
* @returns CompactionEvent if compaction was performed, null if not configured or no history
|
|
1925
|
+
*
|
|
1926
|
+
* @example
|
|
1927
|
+
* ```typescript
|
|
1928
|
+
* const agent = await LLMist.createAgent()
|
|
1929
|
+
* .withModel('sonnet')
|
|
1930
|
+
* .withCompaction()
|
|
1931
|
+
* .ask('...');
|
|
1932
|
+
*
|
|
1933
|
+
* // Manually compact before a long operation
|
|
1934
|
+
* const event = await agent.compact();
|
|
1935
|
+
* if (event) {
|
|
1936
|
+
* console.log(`Saved ${event.tokensBefore - event.tokensAfter} tokens`);
|
|
1937
|
+
* }
|
|
1938
|
+
* ```
|
|
1939
|
+
*/
|
|
1940
|
+
compact(): Promise<CompactionEvent | null>;
|
|
1941
|
+
/**
|
|
1942
|
+
* Get compaction statistics.
|
|
1943
|
+
*
|
|
1944
|
+
* @returns CompactionStats if compaction is enabled, null otherwise
|
|
1945
|
+
*
|
|
1946
|
+
* @example
|
|
1947
|
+
* ```typescript
|
|
1948
|
+
* const stats = agent.getCompactionStats();
|
|
1949
|
+
* if (stats) {
|
|
1950
|
+
* console.log(`Total compactions: ${stats.totalCompactions}`);
|
|
1951
|
+
* console.log(`Tokens saved: ${stats.totalTokensSaved}`);
|
|
1952
|
+
* console.log(`Current usage: ${stats.currentUsage.percent.toFixed(1)}%`);
|
|
1953
|
+
* }
|
|
1954
|
+
* ```
|
|
1955
|
+
*/
|
|
1956
|
+
getCompactionStats(): CompactionStats | null;
|
|
1601
1957
|
/**
|
|
1602
1958
|
* Run the agent loop.
|
|
1603
1959
|
* Clean, simple orchestration - all complexity is in StreamProcessor.
|
|
@@ -1701,6 +2057,7 @@ declare class AgentBuilder {
|
|
|
1701
2057
|
private defaultGadgetTimeoutMs?;
|
|
1702
2058
|
private gadgetOutputLimit?;
|
|
1703
2059
|
private gadgetOutputLimitPercent?;
|
|
2060
|
+
private compactionConfig?;
|
|
1704
2061
|
constructor(client?: LLMist);
|
|
1705
2062
|
/**
|
|
1706
2063
|
* Set the model to use.
|
|
@@ -2027,6 +2384,51 @@ declare class AgentBuilder {
|
|
|
2027
2384
|
* ```
|
|
2028
2385
|
*/
|
|
2029
2386
|
withGadgetOutputLimitPercent(percent: number): this;
|
|
2387
|
+
/**
|
|
2388
|
+
* Configure context compaction.
|
|
2389
|
+
*
|
|
2390
|
+
* Context compaction automatically manages conversation history to prevent
|
|
2391
|
+
* context window overflow in long-running agent conversations.
|
|
2392
|
+
*
|
|
2393
|
+
* @param config - Compaction configuration options
|
|
2394
|
+
* @returns This builder for chaining
|
|
2395
|
+
*
|
|
2396
|
+
* @example
|
|
2397
|
+
* ```typescript
|
|
2398
|
+
* // Custom thresholds
|
|
2399
|
+
* .withCompaction({
|
|
2400
|
+
* triggerThresholdPercent: 70,
|
|
2401
|
+
* targetPercent: 40,
|
|
2402
|
+
* preserveRecentTurns: 10,
|
|
2403
|
+
* })
|
|
2404
|
+
*
|
|
2405
|
+
* // Different strategy
|
|
2406
|
+
* .withCompaction({
|
|
2407
|
+
* strategy: 'sliding-window',
|
|
2408
|
+
* })
|
|
2409
|
+
*
|
|
2410
|
+
* // With callback
|
|
2411
|
+
* .withCompaction({
|
|
2412
|
+
* onCompaction: (event) => {
|
|
2413
|
+
* console.log(`Saved ${event.tokensBefore - event.tokensAfter} tokens`);
|
|
2414
|
+
* }
|
|
2415
|
+
* })
|
|
2416
|
+
* ```
|
|
2417
|
+
*/
|
|
2418
|
+
withCompaction(config: CompactionConfig): this;
|
|
2419
|
+
/**
|
|
2420
|
+
* Disable context compaction.
|
|
2421
|
+
*
|
|
2422
|
+
* By default, compaction is enabled. Use this method to explicitly disable it.
|
|
2423
|
+
*
|
|
2424
|
+
* @returns This builder for chaining
|
|
2425
|
+
*
|
|
2426
|
+
* @example
|
|
2427
|
+
* ```typescript
|
|
2428
|
+
* .withoutCompaction() // Disable automatic compaction
|
|
2429
|
+
* ```
|
|
2430
|
+
*/
|
|
2431
|
+
withoutCompaction(): this;
|
|
2030
2432
|
/**
|
|
2031
2433
|
* Add a synthetic gadget call to the conversation history.
|
|
2032
2434
|
*
|
|
@@ -2144,6 +2546,50 @@ declare class AgentBuilder {
|
|
|
2144
2546
|
build(): Agent;
|
|
2145
2547
|
}
|
|
2146
2548
|
|
|
2549
|
+
/**
|
|
2550
|
+
* Core interfaces for the Agent architecture.
|
|
2551
|
+
* These interfaces define the contracts for the composable services that make up the agent system.
|
|
2552
|
+
*/
|
|
2553
|
+
|
|
2554
|
+
/**
|
|
2555
|
+
* Manages the conversation history and message building.
|
|
2556
|
+
* This interface abstracts conversation state management from the orchestration logic.
|
|
2557
|
+
*/
|
|
2558
|
+
interface IConversationManager {
|
|
2559
|
+
/**
|
|
2560
|
+
* Adds a user message to the conversation.
|
|
2561
|
+
*/
|
|
2562
|
+
addUserMessage(content: string): void;
|
|
2563
|
+
/**
|
|
2564
|
+
* Adds an assistant message to the conversation.
|
|
2565
|
+
*/
|
|
2566
|
+
addAssistantMessage(content: string): void;
|
|
2567
|
+
/**
|
|
2568
|
+
* Adds a gadget call and its result to the conversation.
|
|
2569
|
+
*/
|
|
2570
|
+
addGadgetCall(gadgetName: string, parameters: Record<string, unknown>, result: string): void;
|
|
2571
|
+
/**
|
|
2572
|
+
* Gets the complete conversation history including base messages (system prompts, gadget instructions).
|
|
2573
|
+
*/
|
|
2574
|
+
getMessages(): LLMMessage[];
|
|
2575
|
+
/**
|
|
2576
|
+
* Gets only the conversation history messages (excludes base messages).
|
|
2577
|
+
* Used by compaction to determine what can be compressed.
|
|
2578
|
+
*/
|
|
2579
|
+
getHistoryMessages(): LLMMessage[];
|
|
2580
|
+
/**
|
|
2581
|
+
* Gets the base messages (system prompts, gadget instructions).
|
|
2582
|
+
* These are never compacted and always included at the start.
|
|
2583
|
+
*/
|
|
2584
|
+
getBaseMessages(): LLMMessage[];
|
|
2585
|
+
/**
|
|
2586
|
+
* Replaces the conversation history with new messages.
|
|
2587
|
+
* Used by compaction to update history after compression.
|
|
2588
|
+
* @param newHistory - The compacted history messages to replace with
|
|
2589
|
+
*/
|
|
2590
|
+
replaceHistory(newHistory: LLMMessage[]): void;
|
|
2591
|
+
}
|
|
2592
|
+
|
|
2147
2593
|
/**
|
|
2148
2594
|
* Context provided to matcher functions to determine if a mock should be used.
|
|
2149
2595
|
*/
|
|
@@ -2712,4 +3158,4 @@ declare function createTextMockStream(text: string, options?: {
|
|
|
2712
3158
|
usage?: MockResponse["usage"];
|
|
2713
3159
|
}): LLMStream;
|
|
2714
3160
|
|
|
2715
|
-
export { type
|
|
3161
|
+
export { type BeforeGadgetExecutionAction as $, type AgentHooks as A, BaseGadget as B, type CompactionStrategy as C, type ModelDescriptor as D, type ModelSpec as E, type LLMGenerationOptions as F, GadgetRegistry as G, type HintTemplate as H, type IConversationManager as I, type HistoryMessage as J, AgentBuilder as K, type LLMStream as L, MockProviderAdapter as M, type EventHandlers as N, collectEvents as O, type ParsedGadgetCall as P, collectText as Q, type ResolvedCompactionConfig as R, type StreamEvent as S, type TokenUsage as T, runWithHandlers as U, type AfterGadgetExecutionAction as V, type AfterGadgetExecutionControllerContext as W, type AfterLLMCallAction as X, type AfterLLMCallControllerContext as Y, type AfterLLMErrorAction as Z, type AgentOptions as _, type LLMStreamChunk as a, type BeforeLLMCallAction as a0, type ChunkInterceptorContext as a1, type Controllers as a2, type GadgetExecutionControllerContext as a3, type GadgetParameterInterceptorContext as a4, type GadgetResultInterceptorContext as a5, type Interceptors as a6, type LLMCallControllerContext as a7, type LLMErrorControllerContext as a8, type MessageInterceptorContext as a9, resolveHintTemplate as aA, resolvePromptTemplate as aB, resolveRulesTemplate as aC, type QuickOptions as aD, complete as aE, stream as aF, type GadgetClass as aG, type GadgetOrClass as aH, type TextOnlyAction as aI, type TextOnlyContext as aJ, type TextOnlyCustomHandler as aK, type TextOnlyGadgetConfig as aL, type TextOnlyHandler as aM, type TextOnlyStrategy as aN, type ObserveChunkContext as aa, type ObserveGadgetCompleteContext as ab, type ObserveGadgetStartContext as ac, type ObserveLLMCallContext as ad, type ObserveLLMCompleteContext as ae, type ObserveLLMErrorContext as af, type Observers as ag, type MessageTurn as ah, type ObserveCompactionContext as ai, DEFAULT_COMPACTION_CONFIG as aj, DEFAULT_SUMMARIZATION_PROMPT as ak, type LLMistOptions as al, type LLMRole as am, LLMMessageBuilder as an, type CostEstimate as ao, type ModelFeatures as ap, type ModelLimits as aq, type ModelPricing as ar, type ProviderIdentifier as as, ModelIdentifierParser as at, type HintContext as au, type PromptConfig as av, type PromptContext as aw, type PromptTemplate as ax, DEFAULT_HINTS as ay, DEFAULT_PROMPTS as az, type LLMMessage as b, createMockAdapter as c, MockBuilder as d, createMockClient as e, MockManager as f, getMockManager as g, createMockStream as h, createTextMockStream as i, type MockMatcher as j, type MockMatcherContext as k, type MockOptions as l, mockLLM as m, type MockRegistration as n, type MockResponse as o, type MockStats as p, ModelRegistry as q, type CompactionContext as r, type CompactionResult as s, LLMist as t, type CompactionConfig as u, type CompactionEvent as v, type CompactionStats as w, type GadgetExample as x, type GadgetExecutionResult as y, type ProviderAdapter as z };
|