@prometheus-ai/agent-core 0.5.3 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/agent-loop.d.ts +7 -0
- package/dist/types/agent.d.ts +41 -13
- package/dist/types/compaction/branch-summarization.d.ts +3 -3
- package/dist/types/compaction/compaction.d.ts +11 -9
- package/dist/types/compaction/messages.d.ts +14 -2
- package/dist/types/compaction/openai.d.ts +18 -3
- package/dist/types/compaction/pruning.d.ts +55 -0
- package/dist/types/compaction/shake.d.ts +3 -1
- package/dist/types/compaction/utils.d.ts +18 -2
- package/dist/types/proxy.d.ts +4 -3
- package/dist/types/telemetry.d.ts +59 -57
- package/dist/types/types.d.ts +60 -16
- package/package.json +6 -4
- package/src/agent-loop.ts +660 -181
- package/src/agent.ts +103 -30
- package/src/compaction/branch-summarization.ts +8 -7
- package/src/compaction/compaction.ts +69 -34
- package/src/compaction/messages.ts +78 -64
- package/src/compaction/openai.ts +88 -74
- package/src/compaction/prompts/branch-summary.md +1 -1
- package/src/compaction/prompts/compaction-summary-context.md +1 -1
- package/src/compaction/prompts/compaction-summary.md +2 -2
- package/src/compaction/prompts/compaction-update-summary.md +3 -3
- package/src/compaction/prompts/file-operations.md +3 -8
- package/src/compaction/prompts/summarization-system.md +1 -1
- package/src/compaction/pruning.ts +240 -8
- package/src/compaction/shake.ts +7 -3
- package/src/compaction/utils.ts +97 -19
- package/src/proxy.ts +13 -7
- package/src/telemetry.ts +126 -113
- package/src/types.ts +65 -16
|
@@ -53,3 +53,10 @@ export declare function agentLoopContinueDetailed(context: AgentContext, config:
|
|
|
53
53
|
};
|
|
54
54
|
export declare const INTENT_FIELD = "_i";
|
|
55
55
|
export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"];
|
|
56
|
+
/** Resolve the human-readable reason an abort carried. A caller that aborts via
|
|
57
|
+
* `AbortController.abort(reason)` with a string or a non-`AbortError` `Error`
|
|
58
|
+
* (e.g. the coding agent's user-interrupt label) gets that text surfaced on the
|
|
59
|
+
* synthesized assistant message's `errorMessage`; a bare `abort()` (whose
|
|
60
|
+
* `signal.reason` is the default `AbortError` `DOMException`) falls back to the
|
|
61
|
+
* generic sentinel that downstream renderers treat as "no specific reason". */
|
|
62
|
+
export declare function abortReasonText(signal: AbortSignal | undefined): string;
|
package/dist/types/agent.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { type AssistantMessage, type AssistantMessageEvent, type CursorExecHandlers, type CursorToolResultHandler, type Effort, type ImageContent, type Message, type Model, type ProviderSessionState, type ServiceTier, type SimpleStreamOptions, type ThinkingBudgets, type ToolChoice } from "@prometheus-ai/ai";
|
|
1
|
+
import { type ApiKeyResolveContext, type AssistantMessage, type AssistantMessageEvent, type Context, type CursorExecHandlers, type CursorToolResultHandler, type Effort, type ImageContent, type Message, type Model, type ProviderSessionState, type ServiceTier, type SimpleStreamOptions, type ThinkingBudgets, type ToolChoice } from "@prometheus-ai/ai";
|
|
2
2
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
3
3
|
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
4
|
-
import type { AgentEvent, AgentLoopConfig, AgentMessage, AgentState, AgentTool, AgentToolContext, StreamFn, ToolCallContext } from "./types";
|
|
4
|
+
import type { AgentEvent, AgentLoopConfig, AgentMessage, AgentState, AgentTool, AgentToolContext, AsideMessage, StreamFn, ToolCallContext } from "./types";
|
|
5
5
|
export declare class AgentBusyError extends Error {
|
|
6
6
|
constructor(message?: string);
|
|
7
7
|
}
|
|
@@ -17,6 +17,11 @@ export interface AgentOptions {
|
|
|
17
17
|
* Use for context pruning, injecting external context, etc.
|
|
18
18
|
*/
|
|
19
19
|
transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise<AgentMessage[]>;
|
|
20
|
+
/**
|
|
21
|
+
* Optional transform applied after provider context assembly and before
|
|
22
|
+
* telemetry capture/provider send.
|
|
23
|
+
*/
|
|
24
|
+
transformProviderContext?: (context: Context, model: Model) => Context;
|
|
20
25
|
/**
|
|
21
26
|
* Steering mode: "all" = send all steering messages at once, "one-at-a-time" = one per turn
|
|
22
27
|
*/
|
|
@@ -31,11 +36,6 @@ export interface AgentOptions {
|
|
|
31
36
|
* - "wait": defer steering until the current turn completes
|
|
32
37
|
*/
|
|
33
38
|
interruptMode?: "immediate" | "wait";
|
|
34
|
-
/**
|
|
35
|
-
* Maximum completed tool calls to accept from one streamed assistant turn before
|
|
36
|
-
* executing the batch. Undefined disables batching.
|
|
37
|
-
*/
|
|
38
|
-
maxToolCallsPerTurn?: number;
|
|
39
39
|
/**
|
|
40
40
|
* API format for Kimi Code provider: "openai" or "anthropic" (default: "anthropic")
|
|
41
41
|
*/
|
|
@@ -51,6 +51,11 @@ export interface AgentOptions {
|
|
|
51
51
|
* Used by providers that support session-based caching (e.g., OpenAI Codex).
|
|
52
52
|
*/
|
|
53
53
|
sessionId?: string;
|
|
54
|
+
/**
|
|
55
|
+
* Optional prompt cache key forwarded to LLM providers.
|
|
56
|
+
* When omitted, providers may fall back to sessionId.
|
|
57
|
+
*/
|
|
58
|
+
promptCacheKey?: string;
|
|
54
59
|
/**
|
|
55
60
|
* Shared provider state map for session-scoped transport/session caches.
|
|
56
61
|
*/
|
|
@@ -59,7 +64,7 @@ export interface AgentOptions {
|
|
|
59
64
|
* Resolves an API key dynamically for each LLM call.
|
|
60
65
|
* Useful for expiring tokens (e.g., GitHub Copilot OAuth).
|
|
61
66
|
*/
|
|
62
|
-
getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
|
|
67
|
+
getApiKey?: (provider: string, ctx?: ApiKeyResolveContext) => Promise<string | undefined> | string | undefined;
|
|
63
68
|
/**
|
|
64
69
|
* Inspect or replace provider payloads before they are sent.
|
|
65
70
|
*/
|
|
@@ -159,7 +164,7 @@ export interface AgentPromptOptions {
|
|
|
159
164
|
export declare class Agent {
|
|
160
165
|
#private;
|
|
161
166
|
streamFn: StreamFn;
|
|
162
|
-
getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
|
|
167
|
+
getApiKey?: (provider: string, ctx?: ApiKeyResolveContext) => Promise<string | undefined> | string | undefined;
|
|
163
168
|
/**
|
|
164
169
|
* Hook invoked after tool arguments are validated and before execution.
|
|
165
170
|
* Reassign at any time to swap the implementation (e.g. on extension reload).
|
|
@@ -180,6 +185,14 @@ export declare class Agent {
|
|
|
180
185
|
* Call this when switching sessions (new session, branch, resume).
|
|
181
186
|
*/
|
|
182
187
|
set sessionId(value: string | undefined);
|
|
188
|
+
/**
|
|
189
|
+
* Get the prompt cache key forwarded to providers.
|
|
190
|
+
*/
|
|
191
|
+
get promptCacheKey(): string | undefined;
|
|
192
|
+
/**
|
|
193
|
+
* Set the prompt cache key forwarded to providers.
|
|
194
|
+
*/
|
|
195
|
+
set promptCacheKey(value: string | undefined);
|
|
183
196
|
/**
|
|
184
197
|
* Static metadata forwarded to every API request when no resolver is installed
|
|
185
198
|
* (e.g. `metadata.user_id` for Anthropic session attribution). Setting this
|
|
@@ -268,8 +281,6 @@ export declare class Agent {
|
|
|
268
281
|
* Set to 0 to disable the cap.
|
|
269
282
|
*/
|
|
270
283
|
set maxRetryDelayMs(value: number | undefined);
|
|
271
|
-
get maxToolCallsPerTurn(): number | undefined;
|
|
272
|
-
set maxToolCallsPerTurn(value: number | undefined);
|
|
273
284
|
get state(): AgentState;
|
|
274
285
|
get appendOnlyContext(): AppendOnlyContextManager | undefined;
|
|
275
286
|
setAppendOnlyContext(manager?: AppendOnlyContextManager): void;
|
|
@@ -278,10 +289,17 @@ export declare class Agent {
|
|
|
278
289
|
setRawSseEventInterceptor(fn: SimpleStreamOptions["onSseEvent"] | undefined): void;
|
|
279
290
|
setAssistantMessageEventInterceptor(fn: ((message: AssistantMessage, event: AssistantMessageEvent) => void) | undefined): void;
|
|
280
291
|
setOnBeforeYield(fn: (() => Promise<void> | void) | undefined): void;
|
|
292
|
+
/**
|
|
293
|
+
* Provide a source of non-interrupting "aside" messages (e.g. background-job
|
|
294
|
+
* completions, late LSP diagnostics) drained at each step boundary. Never
|
|
295
|
+
* aborts in-flight tools. See `AgentLoopConfig.getAsideMessages`.
|
|
296
|
+
*/
|
|
297
|
+
setAsideMessageProvider(fn: (() => AsideMessage[] | Promise<AsideMessage[]>) | undefined): void;
|
|
281
298
|
emitExternalEvent(event: AgentEvent): void;
|
|
282
|
-
setSystemPrompt(v: string[]): void;
|
|
299
|
+
setSystemPrompt(v: string[] | string): void;
|
|
283
300
|
setModel(m: Model): void;
|
|
284
301
|
setThinkingLevel(l: Effort | undefined): void;
|
|
302
|
+
setDisableReasoning(disabled: boolean): void;
|
|
285
303
|
setSteeringMode(mode: "all" | "one-at-a-time"): void;
|
|
286
304
|
getSteeringMode(): "all" | "one-at-a-time";
|
|
287
305
|
setFollowUpMode(mode: "all" | "one-at-a-time"): void;
|
|
@@ -290,6 +308,7 @@ export declare class Agent {
|
|
|
290
308
|
getInterruptMode(): "immediate" | "wait";
|
|
291
309
|
setTools(t: AgentTool<any>[]): void;
|
|
292
310
|
replaceMessages(ms: AgentMessage[]): void;
|
|
311
|
+
replaceQueues(steering: AgentMessage[], followUp: AgentMessage[]): void;
|
|
293
312
|
appendMessage(m: AgentMessage): void;
|
|
294
313
|
popMessage(): AgentMessage | undefined;
|
|
295
314
|
/**
|
|
@@ -306,6 +325,15 @@ export declare class Agent {
|
|
|
306
325
|
clearFollowUpQueue(): void;
|
|
307
326
|
clearAllQueues(): void;
|
|
308
327
|
hasQueuedMessages(): boolean;
|
|
328
|
+
/** Non-consuming view of the pending steering queue (insertion order, newest
|
|
329
|
+
* last). The session layer derives its queued-message display/count from
|
|
330
|
+
* this live view instead of a mirror, so the agent-core queue stays the
|
|
331
|
+
* single source of truth. */
|
|
332
|
+
peekSteeringQueue(): readonly AgentMessage[];
|
|
333
|
+
/** Non-consuming view of the pending follow-up queue. See
|
|
334
|
+
* {@link peekSteeringQueue}. */
|
|
335
|
+
peekFollowUpQueue(): readonly AgentMessage[];
|
|
336
|
+
get isAborting(): boolean;
|
|
309
337
|
/**
|
|
310
338
|
* Remove and return the last steering message from the queue (LIFO).
|
|
311
339
|
* Used by dequeue keybinding.
|
|
@@ -317,7 +345,7 @@ export declare class Agent {
|
|
|
317
345
|
*/
|
|
318
346
|
popLastFollowUp(): AgentMessage | undefined;
|
|
319
347
|
clearMessages(): void;
|
|
320
|
-
abort(): void;
|
|
348
|
+
abort(reason?: unknown): void;
|
|
321
349
|
waitForIdle(): Promise<void>;
|
|
322
350
|
reset(): void;
|
|
323
351
|
/** Send a prompt with an AgentMessage */
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* When navigating to a different point in the session tree, this generates
|
|
5
5
|
* a summary of the branch being left so context isn't lost.
|
|
6
6
|
*/
|
|
7
|
-
import type { Model } from "@prometheus-ai/ai";
|
|
7
|
+
import type { ApiKey, Model } from "@prometheus-ai/ai";
|
|
8
8
|
import { type AgentTelemetry } from "../telemetry";
|
|
9
9
|
import type { AgentMessage } from "../types";
|
|
10
10
|
import type { ReadonlySessionManager, SessionEntry } from "./entries";
|
|
@@ -41,7 +41,7 @@ export interface GenerateBranchSummaryOptions {
|
|
|
41
41
|
/** Model to use for summarization */
|
|
42
42
|
model: Model;
|
|
43
43
|
/** API key for the model */
|
|
44
|
-
apiKey:
|
|
44
|
+
apiKey: ApiKey;
|
|
45
45
|
/** Abort signal for cancellation */
|
|
46
46
|
signal: AbortSignal;
|
|
47
47
|
/** Optional custom instructions for summarization */
|
|
@@ -54,7 +54,7 @@ export interface GenerateBranchSummaryOptions {
|
|
|
54
54
|
convertToLlm?: ConvertToLlm;
|
|
55
55
|
/**
|
|
56
56
|
* Optional telemetry handle. When provided, the branch summary LLM call is
|
|
57
|
-
* wrapped in an OTEL chat span tagged with `
|
|
57
|
+
* wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "branch_summary"`.
|
|
58
58
|
*/
|
|
59
59
|
telemetry?: AgentTelemetry;
|
|
60
60
|
}
|
|
@@ -4,10 +4,10 @@
|
|
|
4
4
|
* Pure functions for compaction logic. The session manager handles I/O,
|
|
5
5
|
* and after compaction the session is reloaded.
|
|
6
6
|
*/
|
|
7
|
-
import { type MessageAttribution, type Model, type Usage } from "@prometheus-ai/ai";
|
|
7
|
+
import { type ApiKey, type FetchImpl, type MessageAttribution, type Model, type Tool, type Usage } from "@prometheus-ai/ai";
|
|
8
8
|
import { type AgentTelemetry } from "../telemetry";
|
|
9
9
|
import { ThinkingLevel } from "../thinking";
|
|
10
|
-
import type { AgentMessage
|
|
10
|
+
import type { AgentMessage } from "../types";
|
|
11
11
|
import type { SessionEntry } from "./entries";
|
|
12
12
|
import { type ConvertToLlm } from "./messages";
|
|
13
13
|
import { type FileOperations } from "./utils";
|
|
@@ -30,7 +30,7 @@ export interface CompactionResult<T = unknown> {
|
|
|
30
30
|
}
|
|
31
31
|
export interface CompactionSettings {
|
|
32
32
|
enabled: boolean;
|
|
33
|
-
strategy?: "context-full" | "handoff" | "shake" | "off";
|
|
33
|
+
strategy?: "context-full" | "handoff" | "shake" | "snapcompact" | "off";
|
|
34
34
|
thresholdPercent?: number;
|
|
35
35
|
thresholdTokens?: number;
|
|
36
36
|
reserveTokens: number;
|
|
@@ -112,7 +112,7 @@ export interface SummaryOptions {
|
|
|
112
112
|
/**
|
|
113
113
|
* Optional telemetry handle. When provided, every LLM call emitted during
|
|
114
114
|
* compaction is wrapped in an OTEL chat span tagged with
|
|
115
|
-
* `
|
|
115
|
+
* `pi.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
|
|
116
116
|
* or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
|
|
117
117
|
*/
|
|
118
118
|
telemetry?: AgentTelemetry;
|
|
@@ -125,20 +125,22 @@ export interface SummaryOptions {
|
|
|
125
125
|
* `resolveCompactionEffort` for the conversion contract.
|
|
126
126
|
*/
|
|
127
127
|
thinkingLevel?: ThinkingLevel;
|
|
128
|
+
/** Optional fetch implementation threaded into remote compaction calls. */
|
|
129
|
+
fetch?: FetchImpl;
|
|
128
130
|
}
|
|
129
|
-
export declare function generateSummary(currentMessages: AgentMessage[], model: Model, reserveTokens: number, apiKey:
|
|
131
|
+
export declare function generateSummary(currentMessages: AgentMessage[], model: Model, reserveTokens: number, apiKey: ApiKey, signal?: AbortSignal, customInstructions?: string, previousSummary?: string, options?: SummaryOptions): Promise<string>;
|
|
130
132
|
export interface HandoffOptions {
|
|
131
133
|
/** Live agent system prompt — passed verbatim so providers hit the cached prefix. */
|
|
132
134
|
systemPrompt: string[];
|
|
133
135
|
/** Live agent tool list — same purpose. Forced to `toolChoice: "none"`. */
|
|
134
|
-
tools?:
|
|
136
|
+
tools?: Tool[];
|
|
135
137
|
customInstructions?: string;
|
|
136
138
|
convertToLlm?: ConvertToLlm;
|
|
137
139
|
initiatorOverride?: MessageAttribution;
|
|
138
140
|
metadata?: Record<string, unknown>;
|
|
139
141
|
/**
|
|
140
142
|
* Optional telemetry handle. When provided, the handoff LLM call is
|
|
141
|
-
* wrapped in an OTEL chat span tagged with `
|
|
143
|
+
* wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "handoff"`.
|
|
142
144
|
*/
|
|
143
145
|
telemetry?: AgentTelemetry;
|
|
144
146
|
/**
|
|
@@ -150,7 +152,7 @@ export interface HandoffOptions {
|
|
|
150
152
|
thinkingLevel?: ThinkingLevel;
|
|
151
153
|
}
|
|
152
154
|
export declare function renderHandoffPrompt(customInstructions?: string): string;
|
|
153
|
-
export declare function generateHandoff(messages: AgentMessage[], model: Model, apiKey:
|
|
155
|
+
export declare function generateHandoff(messages: AgentMessage[], model: Model, apiKey: ApiKey, options: HandoffOptions, signal?: AbortSignal): Promise<string>;
|
|
154
156
|
export interface CompactionPreparation {
|
|
155
157
|
/** UUID of first entry to keep */
|
|
156
158
|
firstKeptEntryId: string;
|
|
@@ -180,4 +182,4 @@ export declare function prepareCompaction(pathEntries: SessionEntry[], settings:
|
|
|
180
182
|
* @param preparation - Pre-calculated preparation from prepareCompaction()
|
|
181
183
|
* @param customInstructions - Optional custom focus for the summary
|
|
182
184
|
*/
|
|
183
|
-
export declare function compact(preparation: CompactionPreparation, model: Model, apiKey:
|
|
185
|
+
export declare function compact(preparation: CompactionPreparation, model: Model, apiKey: ApiKey, customInstructions?: string, signal?: AbortSignal, options?: SummaryOptions): Promise<CompactionResult>;
|
|
@@ -33,6 +33,8 @@ export interface CompactionSummaryMessage {
|
|
|
33
33
|
shortSummary?: string;
|
|
34
34
|
tokensBefore: number;
|
|
35
35
|
providerPayload?: ProviderPayload;
|
|
36
|
+
/** Snapcompact frames archived by this compaction; appended as image blocks after the summary text. */
|
|
37
|
+
images?: ImageContent[];
|
|
36
38
|
timestamp: number;
|
|
37
39
|
}
|
|
38
40
|
export type CoreCompactionMessage = CustomMessage | HookMessage | BranchSummaryMessage | CompactionSummaryMessage;
|
|
@@ -48,8 +50,19 @@ export type ConvertToLlm = (messages: AgentMessage[]) => Message[];
|
|
|
48
50
|
export declare function renderBranchSummaryContext(summary: string): string;
|
|
49
51
|
export declare function renderCompactionSummaryContext(summary: string): string;
|
|
50
52
|
export declare function createBranchSummaryMessage(summary: string, fromId: string, timestamp: string): BranchSummaryMessage;
|
|
51
|
-
export declare function createCompactionSummaryMessage(summary: string, tokensBefore: number, timestamp: string, shortSummary?: string, providerPayload?: ProviderPayload): CompactionSummaryMessage;
|
|
53
|
+
export declare function createCompactionSummaryMessage(summary: string, tokensBefore: number, timestamp: string, shortSummary?: string, providerPayload?: ProviderPayload, images?: ImageContent[]): CompactionSummaryMessage;
|
|
52
54
|
export declare function createCustomMessage(customType: string, content: string | (TextContent | ImageContent)[], display: boolean, details: unknown | undefined, timestamp: string, attribution?: MessageAttribution): CustomMessage;
|
|
55
|
+
/**
|
|
56
|
+
* Transform a single core-domain agent message to its LLM form; `undefined`
|
|
57
|
+
* drops it from the provider request.
|
|
58
|
+
*
|
|
59
|
+
* Single source of truth for the core roles (user/developer/assistant/
|
|
60
|
+
* toolResult) and the compaction messages owned by this package. Embedders
|
|
61
|
+
* with their own app messages (e.g. the coding agent) handle their custom
|
|
62
|
+
* roles and delegate every core role here — duplicating these cases is how
|
|
63
|
+
* snapcompact frames once silently fell off the provider request.
|
|
64
|
+
*/
|
|
65
|
+
export declare function convertMessageToLlm(message: AgentMessage): Message | undefined;
|
|
53
66
|
/**
|
|
54
67
|
* Default compaction-domain transformer.
|
|
55
68
|
*
|
|
@@ -58,4 +71,3 @@ export declare function createCustomMessage(customType: string, content: string
|
|
|
58
71
|
* core LLM roles and the compaction messages owned by this package.
|
|
59
72
|
*/
|
|
60
73
|
export declare function defaultConvertToLlm(messages: AgentMessage[]): Message[];
|
|
61
|
-
export declare const convertToLlm: typeof defaultConvertToLlm;
|
|
@@ -11,8 +11,17 @@
|
|
|
11
11
|
* summarization endpoints that accept `{ systemPrompt, prompt }` and reply
|
|
12
12
|
* with `{ summary, shortSummary? }`.
|
|
13
13
|
*/
|
|
14
|
-
import type { Message, Model } from "@prometheus-ai/ai/types";
|
|
14
|
+
import type { FetchImpl, Message, Model } from "@prometheus-ai/ai/types";
|
|
15
15
|
export declare const OPENAI_REMOTE_COMPACTION_PRESERVE_KEY = "openaiRemoteCompaction";
|
|
16
|
+
/**
|
|
17
|
+
* Hard ceiling on remote compaction HTTP requests. Unlike every provider
|
|
18
|
+
* stream (guarded by first-event/idle watchdogs in pi-ai), these are raw
|
|
19
|
+
* fetches awaiting one non-streamed JSON body — a connection silently dropped
|
|
20
|
+
* by a middlebox would otherwise hang the whole compaction pipeline forever
|
|
21
|
+
* (frozen "Auto context-full maintenance…" spinner, manual /compact queueing
|
|
22
|
+
* behind it). On timeout the caller falls back to local summarization.
|
|
23
|
+
*/
|
|
24
|
+
export declare const REMOTE_COMPACTION_TIMEOUT_MS = 180000;
|
|
16
25
|
export type OpenAiRemoteCompactionItem = {
|
|
17
26
|
type: "compaction" | "compaction_summary";
|
|
18
27
|
encrypted_content?: string;
|
|
@@ -54,5 +63,11 @@ export declare function withOpenAiRemoteCompactionPreserveData(preserveData: Rec
|
|
|
54
63
|
* encrypted reasoning we want to preserve.
|
|
55
64
|
*/
|
|
56
65
|
export declare function buildOpenAiNativeHistory(messages: Message[], model: Model, previousReplacementHistory?: Array<Record<string, unknown>>): Array<Record<string, unknown>>;
|
|
57
|
-
export declare function requestOpenAiRemoteCompaction(model: Model, apiKey: string, compactInput: Array<Record<string, unknown>>, instructions: string, signal?: AbortSignal
|
|
58
|
-
|
|
66
|
+
export declare function requestOpenAiRemoteCompaction(model: Model, apiKey: string, compactInput: Array<Record<string, unknown>>, instructions: string, signal?: AbortSignal, opts?: {
|
|
67
|
+
fetch?: FetchImpl;
|
|
68
|
+
timeoutMs?: number;
|
|
69
|
+
}): Promise<OpenAiRemoteCompactionResponse>;
|
|
70
|
+
export declare function requestRemoteCompaction(endpoint: string, request: RemoteCompactionRequest, signal?: AbortSignal, opts?: {
|
|
71
|
+
fetch?: FetchImpl;
|
|
72
|
+
timeoutMs?: number;
|
|
73
|
+
}): Promise<RemoteCompactionResponse>;
|
|
@@ -10,10 +10,65 @@ export interface PruneConfig {
|
|
|
10
10
|
minimumSavings: number;
|
|
11
11
|
/** Tool-result protection matchers. String entries protect every result from that tool; predicates may inspect the paired tool call. */
|
|
12
12
|
protectedTools: ProtectedToolMatcher[];
|
|
13
|
+
/**
|
|
14
|
+
* Optional supersede key function (see {@link SupersedePruneConfig.supersedeKey}).
|
|
15
|
+
* When provided, superseded tool results are pruned first — even inside the
|
|
16
|
+
* `protectTokens` window — before age-based victims. Absent, behavior is
|
|
17
|
+
* unchanged.
|
|
18
|
+
*/
|
|
19
|
+
supersedeKey?: SupersedeKeyFn;
|
|
20
|
+
/** Useless-flagged results bypass the protect window (see {@link USELESS_NOTICE}). Default true. */
|
|
21
|
+
pruneUseless?: boolean;
|
|
13
22
|
}
|
|
14
23
|
export declare const DEFAULT_PRUNE_CONFIG: PruneConfig;
|
|
15
24
|
export interface PruneResult {
|
|
16
25
|
prunedCount: number;
|
|
17
26
|
tokensSaved: number;
|
|
18
27
|
}
|
|
28
|
+
/** Exact placeholder written over a superseded tool result. */
|
|
29
|
+
export declare const SUPERSEDED_NOTICE = "[Superseded by a newer read of this file]";
|
|
30
|
+
/** Exact placeholder written over an elided useless tool result. */
|
|
31
|
+
export declare const USELESS_NOTICE = "[Uneventful result elided]";
|
|
32
|
+
/**
|
|
33
|
+
* Maps a tool call to a supersede key. Results sharing a key form a group in
|
|
34
|
+
* which every result except the newest is a supersede candidate. A key `K`
|
|
35
|
+
* additionally supersedes keys with prefix `K + "\u0000"` (selector-free read
|
|
36
|
+
* supersedes selector-carrying reads of the same base path). Return
|
|
37
|
+
* `undefined` to exempt a call from supersede grouping.
|
|
38
|
+
*/
|
|
39
|
+
export type SupersedeKeyFn = (toolName: string, args: Record<string, unknown>) => string | undefined;
|
|
40
|
+
export interface SupersedePruneConfig {
|
|
41
|
+
/** Supersede key function; results sharing a key supersede older ones. */
|
|
42
|
+
supersedeKey?: SupersedeKeyFn;
|
|
43
|
+
/** Also prune results flagged useless by their tool. Default false. */
|
|
44
|
+
pruneUseless?: boolean;
|
|
45
|
+
/** Prune a candidate now when all messages after it total at most this many estimated tokens. Default 8 000. */
|
|
46
|
+
suffixTokenLimit?: number;
|
|
47
|
+
/** Prune all candidates when the last message is at least this old (prompt cache is cold anyway). Default 30 min. */
|
|
48
|
+
idleFlushMs?: number;
|
|
49
|
+
/** Clock override for tests. */
|
|
50
|
+
now?: number;
|
|
51
|
+
/** Tool-result protection matchers (same contract as {@link PruneConfig.protectedTools}). */
|
|
52
|
+
protectedTools: ProtectedToolMatcher[];
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Prune superseded tool results (e.g. stale `read` outputs replaced by a newer
|
|
56
|
+
* read of the same file) and, when `pruneUseless` is set, results their tool
|
|
57
|
+
* flagged contextually useless. Cheap, incremental, and prompt-cache-aware: a
|
|
58
|
+
* candidate is pruned now only when the suffix after it is small (tail case —
|
|
59
|
+
* the read→edit→read loop) or when the context has been idle long enough that
|
|
60
|
+
* the provider cache is cold anyway (then ALL candidates flush).
|
|
61
|
+
*/
|
|
62
|
+
export declare function pruneSupersededToolResults(entries: SessionEntry[], config: SupersedePruneConfig): PruneResult;
|
|
19
63
|
export declare function pruneToolOutputs(entries: SessionEntry[], config?: PruneConfig): PruneResult;
|
|
64
|
+
/**
|
|
65
|
+
* Supersede key for the `read` tool: the file path with the trailing line/raw
|
|
66
|
+
* selector stripped (the read tool's own splitter grammar via
|
|
67
|
+
* {@link splitReadSelector}, e.g. `src/foo.ts:50-200`, `:2-4:raw`).
|
|
68
|
+
* Internal/URL-scheme paths (`skill://…`, `https://…`) are exempt.
|
|
69
|
+
* Selector-free reads key on the bare path; selector-carrying reads key on
|
|
70
|
+
* `path + "\u0000" + selector`, so two reads collide only when the newer is
|
|
71
|
+
* selector-free or the selectors are identical (the pass's prefix rule lets a
|
|
72
|
+
* bare-path read supersede selector-carrying reads of the same file).
|
|
73
|
+
*/
|
|
74
|
+
export declare function readToolSupersedeKey(toolName: string, args: Record<string, unknown>): string | undefined;
|
|
@@ -54,7 +54,9 @@ export type ShakeRegion = ToolResultShakeRegion | BlockShakeRegion;
|
|
|
54
54
|
* Walks the protect-recent window (most recent `protectTokens` of context is
|
|
55
55
|
* kept intact), collects whole tool-result messages (honoring `protectedTools`
|
|
56
56
|
* and skipping already-pruned results) and large fenced/XML blocks inside
|
|
57
|
-
* user/developer/assistant/custom messages.
|
|
57
|
+
* user/developer/assistant/custom messages. Tool results flagged contextually
|
|
58
|
+
* useless by their tool bypass the protect window — there is nothing recent
|
|
59
|
+
* worth keeping in them. Returns regions in document order.
|
|
58
60
|
*
|
|
59
61
|
* `toolCall` blocks are never touched (tool-call/result pairing is preserved)
|
|
60
62
|
* and regions never span a message boundary. When the combined estimated
|
|
@@ -9,6 +9,22 @@ export interface FileOperations {
|
|
|
9
9
|
edited: Set<string>;
|
|
10
10
|
}
|
|
11
11
|
export declare function createFileOps(): FileOperations;
|
|
12
|
+
/**
|
|
13
|
+
* Split a read-tool path into its base path and trailing selector, mirroring the
|
|
14
|
+
* read tool's own splitter. Single source of the grammar in this package: the
|
|
15
|
+
* file-operations list strips selectors via {@link stripReadSelector}, and the
|
|
16
|
+
* supersede-prune pass keys on both parts via `readToolSupersedeKey`.
|
|
17
|
+
*/
|
|
18
|
+
export declare function splitReadSelector(path: string): {
|
|
19
|
+
path: string;
|
|
20
|
+
sel?: string;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Strip a trailing read-tool selector (`:50-200`, `:raw`, `:1-50:raw`, `:conflicts`, …)
|
|
24
|
+
* so the same file read with different line ranges dedupes to one `<files>` entry
|
|
25
|
+
* and matches its write/edit path when computing Read/Write/RW markers.
|
|
26
|
+
*/
|
|
27
|
+
export declare function stripReadSelector(path: string): string;
|
|
12
28
|
/**
|
|
13
29
|
* Extract file operations from tool calls in an assistant message.
|
|
14
30
|
*/
|
|
@@ -21,8 +37,8 @@ export declare function computeFileLists(fileOps: FileOperations): {
|
|
|
21
37
|
readFiles: string[];
|
|
22
38
|
modifiedFiles: string[];
|
|
23
39
|
};
|
|
24
|
-
export declare function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string;
|
|
25
|
-
export declare function upsertFileOperations(summary: string, readFiles: string[], modifiedFiles: string[]): string;
|
|
40
|
+
export declare function formatFileOperations(readFiles: string[], modifiedFiles: string[], readSet?: ReadonlySet<string>): string;
|
|
41
|
+
export declare function upsertFileOperations(summary: string, readFiles: string[], modifiedFiles: string[], readSet?: ReadonlySet<string>): string;
|
|
26
42
|
/**
|
|
27
43
|
* Serialize LLM messages to text for summarization.
|
|
28
44
|
* This prevents the model from treating it as a conversation to continue.
|
package/dist/types/proxy.d.ts
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Proxy stream function for apps that route LLM calls through a server.
|
|
3
3
|
* The server manages auth and proxies requests to LLM providers.
|
|
4
4
|
*/
|
|
5
|
-
import { type AssistantMessage, type AssistantMessageEvent, type Context, EventStream, type Model, type SimpleStreamOptions, type StopReason } from "@prometheus-ai/ai";
|
|
6
|
-
declare class ProxyMessageEventStream extends EventStream<AssistantMessageEvent, AssistantMessage> {
|
|
5
|
+
import { type AssistantMessage, type AssistantMessageEvent, type Context, EventStream, type FetchImpl, type Model, type SimpleStreamOptions, type StopReason } from "@prometheus-ai/ai";
|
|
6
|
+
export declare class ProxyMessageEventStream extends EventStream<AssistantMessageEvent, AssistantMessage> {
|
|
7
7
|
constructor();
|
|
8
8
|
}
|
|
9
9
|
/**
|
|
@@ -60,6 +60,8 @@ export interface ProxyStreamOptions extends SimpleStreamOptions {
|
|
|
60
60
|
authToken: string;
|
|
61
61
|
/** Proxy server URL (e.g., "https://genai.example.com") */
|
|
62
62
|
proxyUrl: string;
|
|
63
|
+
/** Optional fetch implementation; defaults to global fetch. */
|
|
64
|
+
fetch?: FetchImpl;
|
|
63
65
|
}
|
|
64
66
|
/**
|
|
65
67
|
* Stream function that proxies through a server instead of calling LLM providers directly.
|
|
@@ -81,4 +83,3 @@ export interface ProxyStreamOptions extends SimpleStreamOptions {
|
|
|
81
83
|
* ```
|
|
82
84
|
*/
|
|
83
85
|
export declare function streamProxy(model: Model, context: Context, options: ProxyStreamOptions): ProxyMessageEventStream;
|
|
84
|
-
export {};
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* OpenTelemetry instrumentation for the agent loop.
|
|
3
3
|
*
|
|
4
4
|
* Implements the OpenTelemetry GenAI semantic conventions
|
|
5
|
-
* (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `
|
|
5
|
+
* (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `pi.gen_ai.*`
|
|
6
6
|
* extension attributes for run summaries, dashboard summaries, and cost hints
|
|
7
7
|
* that are useful to downstream observability UIs.
|
|
8
8
|
*
|
|
@@ -78,32 +78,33 @@ export declare const enum OpenAIAttr {
|
|
|
78
78
|
ResponseServiceTier = "openai.response.service_tier"
|
|
79
79
|
}
|
|
80
80
|
/** Project extension attributes. Kept out of the reserved `gen_ai.*` namespace. */
|
|
81
|
-
export declare const enum
|
|
82
|
-
AgentStepNumber = "
|
|
83
|
-
AgentStepCount = "
|
|
84
|
-
RequestReasoningEffort = "
|
|
85
|
-
RequestToolChoice = "
|
|
86
|
-
RequestAvailableTools = "
|
|
87
|
-
RequestMessages = "
|
|
88
|
-
ResponseText = "
|
|
89
|
-
ResponseToolCalls = "
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
81
|
+
export declare const enum PiGenAIAttr {
|
|
82
|
+
AgentStepNumber = "pi.gen_ai.agent.step.number",
|
|
83
|
+
AgentStepCount = "pi.gen_ai.agent.step.count",
|
|
84
|
+
RequestReasoningEffort = "pi.gen_ai.request.reasoning.effort",
|
|
85
|
+
RequestToolChoice = "pi.gen_ai.request.tool.choice",
|
|
86
|
+
RequestAvailableTools = "pi.gen_ai.request.available_tools",
|
|
87
|
+
RequestMessages = "pi.gen_ai.request.messages",
|
|
88
|
+
ResponseText = "pi.gen_ai.response.text",
|
|
89
|
+
ResponseToolCalls = "pi.gen_ai.response.tool_calls",
|
|
90
|
+
ResponseUpstreamProvider = "pi.gen_ai.response.upstream_provider",
|
|
91
|
+
UsageTotalTokens = "pi.gen_ai.usage.total_tokens",
|
|
92
|
+
UsageServerSideTools = "pi.gen_ai.usage.server_tool_requests",
|
|
93
|
+
CostEstimatedUsd = "pi.gen_ai.cost.estimated_usd",
|
|
94
|
+
CostInputUsd = "pi.gen_ai.cost.input_usd",
|
|
95
|
+
CostOutputUsd = "pi.gen_ai.cost.output_usd",
|
|
96
|
+
CostUnavailableReason = "pi.gen_ai.cost.unavailable_reason",
|
|
97
|
+
ToolStatus = "pi.gen_ai.tool.status",
|
|
98
|
+
ToolCallIntent = "pi.gen_ai.tool.call.intent",
|
|
99
|
+
HandoffFromAgentName = "pi.gen_ai.handoff.from_agent.name",
|
|
100
|
+
HandoffFromAgentId = "pi.gen_ai.handoff.from_agent.id",
|
|
101
|
+
HandoffToAgentName = "pi.gen_ai.handoff.to_agent.name",
|
|
102
|
+
HandoffToAgentId = "pi.gen_ai.handoff.to_agent.id",
|
|
103
|
+
OneshotKind = "pi.gen_ai.oneshot.kind",
|
|
104
|
+
GatewayName = "pi.gen_ai.gateway.name",
|
|
105
|
+
GatewayEndpoint = "pi.gen_ai.gateway.endpoint",
|
|
106
|
+
GatewayCallId = "pi.gen_ai.gateway.call_id",
|
|
107
|
+
GatewayRoutedTo = "pi.gen_ai.gateway.routed_to"
|
|
107
108
|
}
|
|
108
109
|
/** GenAI operation names — values for {@link GenAIAttr.OperationName}. */
|
|
109
110
|
export declare const GenAIOperation: {
|
|
@@ -141,9 +142,9 @@ export interface CostEstimatorContext {
|
|
|
141
142
|
}
|
|
142
143
|
/**
|
|
143
144
|
* Cost estimator result.
|
|
144
|
-
* { usd: number } — cost is known; emitted as
|
|
145
|
+
* { usd: number } — cost is known; emitted as pi.gen_ai.cost.estimated_usd
|
|
145
146
|
* { unavailable: string } — cost is intentionally unknown; emitted as
|
|
146
|
-
*
|
|
147
|
+
* pi.gen_ai.cost.unavailable_reason
|
|
147
148
|
* undefined — no opinion; nothing emitted
|
|
148
149
|
*/
|
|
149
150
|
export type CostEstimate = {
|
|
@@ -192,7 +193,7 @@ export interface ChatUsageEvent {
|
|
|
192
193
|
*
|
|
193
194
|
* Use this to reconcile gateway-issued ids (e.g. `x-litellm-call-id`) with
|
|
194
195
|
* downstream billing / spend dashboards. Known gateway patterns are also
|
|
195
|
-
* auto-stamped on the chat span as `
|
|
196
|
+
* auto-stamped on the chat span as `pi.gen_ai.gateway.*` attributes.
|
|
196
197
|
*/
|
|
197
198
|
readonly headers: Readonly<Record<string, string>> | undefined;
|
|
198
199
|
}
|
|
@@ -446,7 +447,7 @@ export interface InstrumentedChatSpanOptions {
|
|
|
446
447
|
/** Step index recorded on the span; defaults to `-1` for non-loop calls. */
|
|
447
448
|
readonly stepNumber?: number;
|
|
448
449
|
/**
|
|
449
|
-
* Tag stamped onto `
|
|
450
|
+
* Tag stamped onto `pi.gen_ai.oneshot.kind`. Values used by the agent:
|
|
450
451
|
* `compaction_summary`, `compaction_short_summary`, `compaction_turn_prefix`,
|
|
451
452
|
* `handoff`, `branch_summary`, `inspect_image`. Free-form to allow callers
|
|
452
453
|
* outside this package to add new kinds without bumping the helper.
|
|
@@ -499,7 +500,7 @@ export declare function finishExecuteToolSpan(telemetry: AgentTelemetry | undefi
|
|
|
499
500
|
readonly toolName: string;
|
|
500
501
|
}): void;
|
|
501
502
|
/** Span attribute carrying the terminal {@link ToolStatus}. */
|
|
502
|
-
export declare const EXECUTE_TOOL_STATUS_ATTR =
|
|
503
|
+
export declare const EXECUTE_TOOL_STATUS_ATTR = PiGenAIAttr.ToolStatus;
|
|
503
504
|
/**
|
|
504
505
|
* Record a tool that bypassed the span lifecycle entirely (pre-run
|
|
505
506
|
* interrupt, post-execution tail sweep for calls that never produced a
|
|
@@ -527,36 +528,37 @@ export declare function finishInvokeAgentSpan(telemetry: AgentTelemetry | undefi
|
|
|
527
528
|
} | undefined;
|
|
528
529
|
/**
|
|
529
530
|
* Invoke {@link AgentTelemetryConfig.onRunEnd} on `telemetry` if set. Throws
|
|
530
|
-
are caught and
|
|
531
|
+
* are caught and surfaced via the `onTelemetryWarning` hook (falling back to `console.warn`
|
|
532
|
+
* when no hook is set) — telemetry callbacks NEVER turn a
|
|
531
533
|
* successful agent run into a failed one. Idempotent at the call site via
|
|
532
534
|
* {@link AgentRunCollector.markRunEnded}; callers must check that before
|
|
533
535
|
* calling this helper.
|
|
534
536
|
*/
|
|
535
537
|
export declare function fireOnRunEnd(telemetry: AgentTelemetry, summary: AgentRunSummary, coverage: AgentRunCoverage): void;
|
|
536
|
-
/** Aggregate `
|
|
537
|
-
export declare const enum
|
|
538
|
-
ChatsCount = "
|
|
539
|
-
ChatsTotalLatencyMs = "
|
|
540
|
-
ChatsStopReasonPrefix = "
|
|
541
|
-
ToolsCount = "
|
|
542
|
-
ToolsOkCount = "
|
|
543
|
-
ToolsErrorCount = "
|
|
544
|
-
ToolsSkippedCount = "
|
|
545
|
-
ToolsBlockedCount = "
|
|
546
|
-
ToolsTimeoutCount = "
|
|
547
|
-
ToolsAbortedCount = "
|
|
548
|
-
ToolsTotalLatencyMs = "
|
|
549
|
-
ToolsInvoked = "
|
|
550
|
-
ToolsAvailable = "
|
|
551
|
-
ToolsUnused = "
|
|
552
|
-
UsageInputTokensTotal = "
|
|
553
|
-
UsageOutputTokensTotal = "
|
|
554
|
-
UsageCacheReadInputTokensTotal = "
|
|
555
|
-
UsageCacheCreationInputTokensTotal = "
|
|
556
|
-
UsageReasoningOutputTokensTotal = "
|
|
557
|
-
UsageTotalTokensTotal = "
|
|
558
|
-
CostEstimatedUsdTotal = "
|
|
559
|
-
ErrorsCount = "
|
|
538
|
+
/** Aggregate `pi.gen_ai.agent.*` attributes stamped on the `invoke_agent` span. */
|
|
539
|
+
export declare const enum PiGenAIAggregateAttr {
|
|
540
|
+
ChatsCount = "pi.gen_ai.agent.chats.count",
|
|
541
|
+
ChatsTotalLatencyMs = "pi.gen_ai.agent.chats.total_latency_ms",
|
|
542
|
+
ChatsStopReasonPrefix = "pi.gen_ai.agent.chats.stop_reason.",
|
|
543
|
+
ToolsCount = "pi.gen_ai.agent.tools.count",
|
|
544
|
+
ToolsOkCount = "pi.gen_ai.agent.tools.ok.count",
|
|
545
|
+
ToolsErrorCount = "pi.gen_ai.agent.tools.error.count",
|
|
546
|
+
ToolsSkippedCount = "pi.gen_ai.agent.tools.skipped.count",
|
|
547
|
+
ToolsBlockedCount = "pi.gen_ai.agent.tools.blocked.count",
|
|
548
|
+
ToolsTimeoutCount = "pi.gen_ai.agent.tools.timeout.count",
|
|
549
|
+
ToolsAbortedCount = "pi.gen_ai.agent.tools.aborted.count",
|
|
550
|
+
ToolsTotalLatencyMs = "pi.gen_ai.agent.tools.total_latency_ms",
|
|
551
|
+
ToolsInvoked = "pi.gen_ai.agent.tools.invoked",
|
|
552
|
+
ToolsAvailable = "pi.gen_ai.agent.tools.available",
|
|
553
|
+
ToolsUnused = "pi.gen_ai.agent.tools.unused",
|
|
554
|
+
UsageInputTokensTotal = "pi.gen_ai.agent.usage.input_tokens.total",
|
|
555
|
+
UsageOutputTokensTotal = "pi.gen_ai.agent.usage.output_tokens.total",
|
|
556
|
+
UsageCacheReadInputTokensTotal = "pi.gen_ai.agent.usage.cache_read.input_tokens.total",
|
|
557
|
+
UsageCacheCreationInputTokensTotal = "pi.gen_ai.agent.usage.cache_creation.input_tokens.total",
|
|
558
|
+
UsageReasoningOutputTokensTotal = "pi.gen_ai.agent.usage.reasoning.output_tokens.total",
|
|
559
|
+
UsageTotalTokensTotal = "pi.gen_ai.agent.usage.total_tokens.total",
|
|
560
|
+
CostEstimatedUsdTotal = "pi.gen_ai.agent.cost.estimated_usd.total",
|
|
561
|
+
ErrorsCount = "pi.gen_ai.agent.errors.count"
|
|
560
562
|
}
|
|
561
563
|
/**
|
|
562
564
|
* Run `fn` with `span` activated on the OTEL context. Spans created
|