@prometheus-ai/agent-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/README.md +473 -0
  3. package/dist/types/agent-loop.d.ts +55 -0
  4. package/dist/types/agent.d.ts +331 -0
  5. package/dist/types/append-only-context.d.ts +113 -0
  6. package/dist/types/compaction/branch-summarization.d.ts +94 -0
  7. package/dist/types/compaction/compaction.d.ts +183 -0
  8. package/dist/types/compaction/entries.d.ts +103 -0
  9. package/dist/types/compaction/errors.d.ts +26 -0
  10. package/dist/types/compaction/index.d.ts +12 -0
  11. package/dist/types/compaction/messages.d.ts +61 -0
  12. package/dist/types/compaction/openai.d.ts +58 -0
  13. package/dist/types/compaction/pruning.d.ts +19 -0
  14. package/dist/types/compaction/shake.d.ts +82 -0
  15. package/dist/types/compaction/tool-protection.d.ts +17 -0
  16. package/dist/types/compaction/utils.d.ts +32 -0
  17. package/dist/types/compaction.d.ts +1 -0
  18. package/dist/types/harmony-leak.d.ts +118 -0
  19. package/dist/types/index.d.ts +11 -0
  20. package/dist/types/proxy.d.ts +84 -0
  21. package/dist/types/run-collector.d.ts +196 -0
  22. package/dist/types/telemetry.d.ts +588 -0
  23. package/dist/types/thinking.d.ts +17 -0
  24. package/dist/types/types.d.ts +443 -0
  25. package/dist/types/utils/yield.d.ts +52 -0
  26. package/package.json +75 -0
  27. package/src/agent-loop.ts +1418 -0
  28. package/src/agent.ts +1236 -0
  29. package/src/append-only-context.ts +297 -0
  30. package/src/compaction/branch-summarization.ts +339 -0
  31. package/src/compaction/compaction.ts +1155 -0
  32. package/src/compaction/entries.ts +133 -0
  33. package/src/compaction/errors.ts +31 -0
  34. package/src/compaction/index.ts +13 -0
  35. package/src/compaction/messages.ts +212 -0
  36. package/src/compaction/openai.ts +552 -0
  37. package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
  38. package/src/compaction/prompts/branch-summary-context.md +5 -0
  39. package/src/compaction/prompts/branch-summary-preamble.md +2 -0
  40. package/src/compaction/prompts/branch-summary.md +30 -0
  41. package/src/compaction/prompts/compaction-short-summary.md +9 -0
  42. package/src/compaction/prompts/compaction-summary-context.md +5 -0
  43. package/src/compaction/prompts/compaction-summary.md +38 -0
  44. package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
  45. package/src/compaction/prompts/compaction-update-summary.md +45 -0
  46. package/src/compaction/prompts/file-operations.md +10 -0
  47. package/src/compaction/prompts/handoff-document.md +49 -0
  48. package/src/compaction/prompts/summarization-system.md +3 -0
  49. package/src/compaction/pruning.ts +99 -0
  50. package/src/compaction/shake.ts +406 -0
  51. package/src/compaction/tool-protection.ts +55 -0
  52. package/src/compaction/utils.ts +185 -0
  53. package/src/compaction.ts +1 -0
  54. package/src/harmony-leak.ts +456 -0
  55. package/src/index.ts +21 -0
  56. package/src/proxy.ts +326 -0
  57. package/src/run-collector.ts +631 -0
  58. package/src/telemetry.ts +2020 -0
  59. package/src/thinking.ts +19 -0
  60. package/src/types.ts +505 -0
  61. package/src/utils/yield.ts +146 -0
@@ -0,0 +1,118 @@
1
+ /**
2
+ * GPT-5 Harmony-header leakage detection and recovery.
3
+ *
4
+ * Background and policy: see `docs/ERRATA-GPT5-HARMONY.md`. This module
5
+ * implements §3 of that document: detection by signal fusion, plus a
6
+ * truncate-and-resume primitive for the `edit` tool when its input is in
7
+ * hashline DSL form. Other tools and surfaces fall through to
8
+ * abort-and-retry handled by the agent loop.
9
+ */
10
+ import type { AssistantMessage, Model, ToolCall } from "@prometheus-ai/ai";
11
+ declare const SIGNAL_ORDER: readonly ["M", "C", "G", "S", "B", "R", "T"];
12
+ export type HarmonySignalClass = "H" | (typeof SIGNAL_ORDER)[number];
13
+ export type HarmonySurface = "assistant_text" | "assistant_thinking" | "tool_arg";
14
+ export interface HarmonySignal {
15
+ classes: HarmonySignalClass[];
16
+ start: number;
17
+ end: number;
18
+ text: string;
19
+ }
20
+ export interface HarmonyDetection {
21
+ surface: HarmonySurface;
22
+ contentIndex?: number;
23
+ toolName?: string;
24
+ toolCallId?: string;
25
+ signals: HarmonySignal[];
26
+ }
27
+ export interface HarmonyAuditEvent {
28
+ action: "truncate_resume" | "abort_retry" | "escalated";
29
+ surface: HarmonySurface;
30
+ signal: string;
31
+ retryN: number;
32
+ model: string;
33
+ provider: string;
34
+ toolName?: string;
35
+ removedLen: number;
36
+ removedSha8: string;
37
+ removedPreview: string;
38
+ removedBlob?: string;
39
+ }
40
+ export interface HarmonyRecoveredToolCall {
41
+ message: AssistantMessage;
42
+ removed: string;
43
+ }
44
+ /**
45
+ * Whether to run leak detection on responses from this model. We default-on
46
+ * for every openai-codex model rather than enumerating ids, so a future
47
+ * gpt-5.6 (or whatever) doesn't silently bypass the mitigation. Detection
48
+ * itself is cheap; the cost of missing a leak on a new model is not.
49
+ */
50
+ export declare function isHarmonyLeakMitigationTarget(model: Model): boolean;
51
+ export declare function signalListLabel(signals: readonly HarmonySignal[]): string;
52
+ /**
53
+ * Detect harmony-protocol leakage in `text`. Returns undefined if clean.
54
+ *
55
+ * Trip rule: `H` alone, or `M` paired with at least one co-signal
56
+ * (`C`/`G`/`S`/`B`/`R`/`T`). Bare `M` does not trip — this document, its
57
+ * tests, and bug reports legitimately carry the marker.
58
+ *
59
+ * The `tool_arg` surface is held to a stricter rule. A tool argument is
60
+ * arbitrary file/data content that can legitimately carry the marker, a
61
+ * channel word, harmony control tokens, or a non-Latin script run (editing
62
+ * these very fixtures does exactly that). The only robust leak signal there
63
+ * is content trailing the structurally-valid parse, so a `tool_arg` detection
64
+ * additionally requires the `T` co-signal. Absent a `parsedEnd` boundary `T`
65
+ * is never set, so `tool_arg` scanning stays inert and a legitimate codex tool
66
+ * call is never hard-aborted. `assistant_text`/`assistant_thinking` keep the
67
+ * base rule.
68
+ *
69
+ * `parsedEnd`, when supplied, marks the byte at which a structurally valid
70
+ * tool-argument parse ends; markers at or past it set the `T` co-signal.
71
+ * `contentIndex`/`toolName`/`toolCallId` flow through to the returned
72
+ * detection for downstream auditing.
73
+ */
74
+ export declare function detectHarmonyLeak(text: string, surface: HarmonySurface, options?: {
75
+ parsedEnd?: number;
76
+ contentIndex?: number;
77
+ toolName?: string;
78
+ toolCallId?: string;
79
+ }): HarmonyDetection | undefined;
80
+ /**
81
+ * Scan an assistant message's content blocks; return the first detection.
82
+ *
83
+ * `toolArgParseEnd`, when supplied, resolves the byte offset at which a tool
84
+ * call's structurally-valid argument parse ends (the `T` co-signal in
85
+ * {@link detectHarmonyLeak}). Callers that can parse a tool's argument DSL pass
86
+ * it to enable `tool_arg` leak detection; omitting it keeps that surface inert
87
+ * — the safe default the agent loop relies on, since it cannot bound a streamed
88
+ * tool DSL and must never hard-abort a legitimate tool call.
89
+ */
90
+ export declare function detectHarmonyLeakInAssistantMessage(message: AssistantMessage, toolArgParseEnd?: (toolCall: ToolCall) => number | undefined): HarmonyDetection | undefined;
91
+ /**
92
+ * Truncate a contaminated tool call at the start of the contaminated line and
93
+ * append the tool's recovery sentinel. Returns a recovered AssistantMessage
94
+ * (containing only the cleaned tool call), a synthetic continuation user
95
+ * message asking the model to re-issue the rest, and the removed substring
96
+ * for auditing. Returns undefined when the tool is not recovery-eligible or
97
+ * the truncation would leave nothing meaningful to dispatch.
98
+ *
99
+ * `providerPayload` is dropped from the recovered message: for Codex the
100
+ * encrypted reasoning blob is opaque/signed and we cannot validate that it is
101
+ * uncontaminated. The model re-reasons on the next turn.
102
+ */
103
+ export declare function recoverHarmonyToolCall(message: AssistantMessage, detection: HarmonyDetection): HarmonyRecoveredToolCall | undefined;
104
+ /**
105
+ * Return the contaminated substring from `message` for audit purposes when
106
+ * recovery is not applicable (abort path). Walks from the first detected
107
+ * signal to end-of-content within the relevant block. Returns "" if the
108
+ * detection cannot be resolved against the message.
109
+ */
110
+ export declare function extractHarmonyRemoved(message: AssistantMessage, detection: HarmonyDetection): string;
111
+ export declare function createHarmonyAuditEvent(params: {
112
+ action: HarmonyAuditEvent["action"];
113
+ detection: HarmonyDetection;
114
+ model: Model;
115
+ retryN: number;
116
+ removed: string;
117
+ }): HarmonyAuditEvent;
118
+ export {};
@@ -0,0 +1,11 @@
1
+ export * from "./agent";
2
+ export * from "./agent-loop";
3
+ export * from "./append-only-context";
4
+ export * from "./compaction";
5
+ export * from "./harmony-leak";
6
+ export * from "./proxy";
7
+ export * from "./run-collector";
8
+ export * from "./telemetry";
9
+ export * from "./thinking";
10
+ export * from "./types";
11
+ export * from "./utils/yield";
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Proxy stream function for apps that route LLM calls through a server.
3
+ * The server manages auth and proxies requests to LLM providers.
4
+ */
5
+ import { type AssistantMessage, type AssistantMessageEvent, type Context, EventStream, type Model, type SimpleStreamOptions, type StopReason } from "@prometheus-ai/ai";
6
+ declare class ProxyMessageEventStream extends EventStream<AssistantMessageEvent, AssistantMessage> {
7
+ constructor();
8
+ }
9
+ /**
10
+ * Proxy event types - server sends these with partial field stripped to reduce bandwidth.
11
+ */
12
+ export type ProxyAssistantMessageEvent = {
13
+ type: "start";
14
+ } | {
15
+ type: "text_start";
16
+ contentIndex: number;
17
+ } | {
18
+ type: "text_delta";
19
+ contentIndex: number;
20
+ delta: string;
21
+ } | {
22
+ type: "text_end";
23
+ contentIndex: number;
24
+ contentSignature?: string;
25
+ } | {
26
+ type: "thinking_start";
27
+ contentIndex: number;
28
+ } | {
29
+ type: "thinking_delta";
30
+ contentIndex: number;
31
+ delta: string;
32
+ } | {
33
+ type: "thinking_end";
34
+ contentIndex: number;
35
+ contentSignature?: string;
36
+ } | {
37
+ type: "toolcall_start";
38
+ contentIndex: number;
39
+ id: string;
40
+ toolName: string;
41
+ } | {
42
+ type: "toolcall_delta";
43
+ contentIndex: number;
44
+ delta: string;
45
+ } | {
46
+ type: "toolcall_end";
47
+ contentIndex: number;
48
+ } | {
49
+ type: "done";
50
+ reason: Extract<StopReason, "stop" | "length" | "toolUse">;
51
+ usage: AssistantMessage["usage"];
52
+ } | {
53
+ type: "error";
54
+ reason: Extract<StopReason, "aborted" | "error">;
55
+ errorMessage?: string;
56
+ usage: AssistantMessage["usage"];
57
+ };
58
+ export interface ProxyStreamOptions extends SimpleStreamOptions {
59
+ /** Auth token for the proxy server */
60
+ authToken: string;
61
+ /** Proxy server URL (e.g., "https://genai.example.com") */
62
+ proxyUrl: string;
63
+ }
64
+ /**
65
+ * Stream function that proxies through a server instead of calling LLM providers directly.
66
+ * The server strips the partial field from delta events to reduce bandwidth.
67
+ * We reconstruct the partial message client-side.
68
+ *
69
+ * Use this as the `streamFn` option when creating an Agent that needs to go through a proxy.
70
+ *
71
+ * @example
72
+ * ```typescript
73
+ * const agent = new Agent({
74
+ * streamFn: (model, context, options) =>
75
+ * streamProxy(model, context, {
76
+ * ...options,
77
+ * authToken: await getAuthToken(),
78
+ * proxyUrl: "https://genai.example.com",
79
+ * }),
80
+ * });
81
+ * ```
82
+ */
83
+ export declare function streamProxy(model: Model, context: Context, options: ProxyStreamOptions): ProxyMessageEventStream;
84
+ export {};
@@ -0,0 +1,196 @@
1
+ /**
2
+ * Per-invocation run aggregator. Buffers per-chat and per-tool records as the
3
+ * loop executes and folds them into a single {@link AgentRunSummary} +
4
+ * {@link AgentRunCoverage} value at the end.
5
+ *
6
+ * One collector lives on each {@link AgentTelemetry} handle, which is
7
+ * constructed once per `agentLoop` invocation in {@link resolveTelemetry}.
8
+ * Collector lookups use the live `Span` as a `WeakMap` key — bounded memory,
9
+ * no cross-invoke leakage.
10
+ *
11
+ * The collector is fed exclusively by helpers in `./telemetry.ts`. Loop
12
+ * authors do not interact with it directly except via the public
13
+ * `recordSkippedTool` helper used for the two skip paths that bypass spans
14
+ * entirely (pre-run interrupt and the tail-sweep for tool calls that never
15
+ * produced a result message).
16
+ */
17
+ import type { Span } from "@opentelemetry/api";
18
+ import type { AssistantMessage, Model, StopReason } from "@prometheus-ai/ai";
19
+ /** Terminal status reported by an `execute_tool` span. */
20
+ export type ToolStatus = "ok" | "error" | "skipped" | "blocked" | "timeout" | "aborted";
21
+ /** Raw record for a single `chat` step, finalized by `finishChatSpan`. */
22
+ export interface ChatRecord {
23
+ readonly stepNumber: number;
24
+ readonly model: string;
25
+ readonly provider: string;
26
+ readonly stopReason: StopReason | undefined;
27
+ readonly latencyMs: number;
28
+ readonly inputTokens: number;
29
+ readonly outputTokens: number;
30
+ readonly cachedInputTokens: number;
31
+ readonly cacheWriteTokens: number;
32
+ readonly reasoningOutputTokens: number;
33
+ readonly totalTokens: number;
34
+ readonly costUsd: number | undefined;
35
+ readonly costUnavailableReason: string | undefined;
36
+ readonly errorType: string | undefined;
37
+ }
38
+ /** Raw record for a single `execute_tool` invocation. */
39
+ export interface ToolRecord {
40
+ readonly toolCallId: string;
41
+ readonly toolName: string;
42
+ readonly status: ToolStatus;
43
+ readonly latencyMs: number;
44
+ readonly errorType: string | undefined;
45
+ }
46
+ /** Per-tool counters surfaced under {@link AgentRunSummary.tools.byName}. */
47
+ export interface ToolCounters {
48
+ readonly total: number;
49
+ readonly ok: number;
50
+ readonly error: number;
51
+ readonly skipped: number;
52
+ readonly blocked: number;
53
+ readonly timeout: number;
54
+ readonly aborted: number;
55
+ readonly totalLatencyMs: number;
56
+ }
57
+ /**
58
+ * Run-level rollup returned in the `agent_end` event and passed to
59
+ * {@link AgentTelemetryConfig.onRunEnd}. Pure aggregation — no references to
60
+ * spans, no callbacks, no live state. Safe to persist / diff / assert.
61
+ */
62
+ export interface AgentRunSummary {
63
+ readonly chats: {
64
+ readonly total: number;
65
+ /** Bucketed by raw {@link StopReason}; absent reasons omitted. */
66
+ readonly byStopReason: Readonly<Record<string, number>>;
67
+ readonly totalLatencyMs: number;
68
+ };
69
+ readonly tools: {
70
+ readonly total: number;
71
+ readonly ok: number;
72
+ readonly error: number;
73
+ readonly skipped: number;
74
+ readonly blocked: number;
75
+ readonly timeout: number;
76
+ readonly aborted: number;
77
+ readonly totalLatencyMs: number;
78
+ /** Per-tool-name counters; keys sorted by name on snapshot. */
79
+ readonly byName: Readonly<Record<string, ToolCounters>>;
80
+ };
81
+ readonly usage: {
82
+ readonly inputTokens: number;
83
+ readonly outputTokens: number;
84
+ readonly cachedInputTokens: number;
85
+ readonly cacheWriteTokens: number;
86
+ readonly reasoningOutputTokens: number;
87
+ readonly totalTokens: number;
88
+ };
89
+ readonly cost: {
90
+ readonly estimatedUsd: number;
91
+ /** Sorted, deduped. */
92
+ readonly unavailableReasons: readonly string[];
93
+ };
94
+ readonly errors: {
95
+ readonly total: number;
96
+ readonly byType: Readonly<Record<string, number>>;
97
+ };
98
+ readonly stepCount: number;
99
+ }
100
+ /**
101
+ * Coverage rollup: registered-vs-invoked across the run. All arrays are
102
+ * sorted ascending and deduped so the value is stable for diffing.
103
+ */
104
+ export interface AgentRunCoverage {
105
+ readonly toolsAvailable: readonly string[];
106
+ readonly toolsInvoked: readonly string[];
107
+ readonly toolsUnused: readonly string[];
108
+ readonly modelsUsed: readonly string[];
109
+ readonly providersUsed: readonly string[];
110
+ }
111
+ export declare class AgentRunCollector {
112
+ #private;
113
+ /** True once `markRunEnded()` has been called for this invocation. */
114
+ get runEnded(): boolean;
115
+ /**
116
+ * Mark this run as logically ended. Callers use this to coordinate the
117
+ * `onRunEnd` hook between the success path (fires inside
118
+ * `buildAgentEndEvent`, before `stream.end()`) and the error path (fires
119
+ * inside `finishInvokeAgentSpan`'s finally). Idempotent — returns `true`
120
+ * the first time, `false` on subsequent calls.
121
+ */
122
+ markRunEnded(): boolean;
123
+ /** Record the tool names exposed on a single chat step. */
124
+ noteAvailableTools(tools: readonly {
125
+ readonly name: string;
126
+ }[] | undefined): void;
127
+ beginChat(span: Span, init: {
128
+ readonly stepNumber: number;
129
+ readonly model: Model;
130
+ readonly provider?: string;
131
+ }): void;
132
+ endChat(span: Span, message: AssistantMessage, fields: {
133
+ readonly costUsd: number | undefined;
134
+ readonly costUnavailableReason: string | undefined;
135
+ }): void;
136
+ /**
137
+ * Stamp the chat span as failed without a finalized AssistantMessage. Used
138
+ * by the `catch` arm of `streamAssistantResponse` so error chats still
139
+ * appear in the run summary.
140
+ */
141
+ failChat(span: Span, fields: {
142
+ readonly errorType: string;
143
+ }): void;
144
+ beginTool(span: Span, init: {
145
+ readonly toolCallId: string;
146
+ readonly toolName: string;
147
+ }): void;
148
+ endTool(span: Span, fields: {
149
+ readonly status: ToolStatus;
150
+ readonly errorType: string | undefined;
151
+ }): void;
152
+ /**
153
+ * Record a tool that never produced a span — pre-run interrupt or tail
154
+ * sweep. The LLM still asked for it, so it counts toward
155
+ * {@link AgentRunCoverage.toolsInvoked}.
156
+ */
157
+ recordOrphanTool(record: {
158
+ readonly toolCallId: string;
159
+ readonly toolName: string;
160
+ readonly status: ToolStatus;
161
+ }): void;
162
+ /** Build the immutable summary value from buffered records. */
163
+ snapshot(opts: {
164
+ readonly stepCount: number;
165
+ }): {
166
+ readonly summary: AgentRunSummary;
167
+ readonly coverage: AgentRunCoverage;
168
+ };
169
+ }
170
+ /**
171
+ * Fold multiple per-run summaries into one. Pure aggregation — useful when a
172
+ * caller (verify pass, benchmark harness) drives the agent loop N times and
173
+ * needs a single rollup across all invocations.
174
+ *
175
+ * Counters sum element-wise. Sets (cost reasons, error types, per-tool
176
+ * counters) merge by key. Numeric totals sum. The output is in the same
177
+ * shape as a single `AgentRunSummary`, so all dashboards and persistence
178
+ * layers handle it uniformly.
179
+ */
180
+ export declare function aggregateAgentRunSummaries(summaries: readonly AgentRunSummary[]): AgentRunSummary;
181
+ /** Union-merge multiple coverage values, preserving the sorted+deduped invariant. */
182
+ export declare function aggregateAgentRunCoverage(coverages: readonly AgentRunCoverage[]): AgentRunCoverage;
183
+ /** Empty `AgentRunSummary` constant. Exported for tests and default-initializers. */
184
+ export declare function emptyAgentRunSummary(): AgentRunSummary;
185
+ /** Empty `AgentRunCoverage` constant. Exported for tests and default-initializers. */
186
+ export declare function emptyAgentRunCoverage(): AgentRunCoverage;
187
+ /**
188
+ * Distinguishable error class thrown when `beforeToolCall` returns
189
+ * `{ block: true }`. Lets the catch arm of `runTool` set the terminal status
190
+ * on the execute_tool span to `"blocked"` instead of conflating with a real
191
+ * tool exception.
192
+ */
193
+ export declare class ToolCallBlockedError extends Error {
194
+ readonly name = "ToolCallBlockedError";
195
+ constructor(reason?: string);
196
+ }