@kodax-ai/kodax 0.7.40 → 0.7.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -0
- package/dist/chunks/{chunk-NDNILSTR.js → chunk-5TFLMGER.js} +1 -1
- package/dist/chunks/{chunk-FAVPT4P7.js → chunk-6OB4AJOM.js} +1 -1
- package/dist/chunks/chunk-HYWVRTFA.js +1233 -0
- package/dist/chunks/chunk-SX2IS5JP.js +16 -0
- package/dist/chunks/chunk-ZPJPNLBK.js +462 -0
- package/dist/chunks/{compaction-config-A7XZ6H5Y.js → compaction-config-LT5PEXPT.js} +1 -1
- package/dist/chunks/{construction-bootstrap-OFPUZTXQ.js → construction-bootstrap-HBCWJFHC.js} +1 -1
- package/dist/chunks/dist-V3BS2NKB.js +2 -0
- package/dist/chunks/{utils-DFMYJUTE.js → utils-FAFUQJ2A.js} +1 -1
- package/dist/index.d.ts +232 -7
- package/dist/index.js +2 -2
- package/dist/kodax_cli.js +922 -912
- package/dist/sdk-agent.d.ts +1459 -10
- package/dist/sdk-agent.js +1 -1
- package/dist/sdk-coding.d.ts +4543 -14
- package/dist/sdk-coding.js +1 -1
- package/dist/sdk-llm.d.ts +209 -10
- package/dist/sdk-repl.d.ts +2694 -13
- package/dist/sdk-repl.js +1 -1
- package/dist/sdk-skills.d.ts +487 -11
- package/dist/types-chunks/bash-prefix-extractor.d-B2iliwdi.d.ts +2432 -0
- package/dist/types-chunks/capability.d-BxNgd1-c.d.ts +368 -0
- package/dist/types-chunks/cost-tracker.d-C4dMlQuV.d.ts +342 -0
- package/dist/types-chunks/history-cleanup.d-q1vAvCss.d.ts +1266 -0
- package/dist/types-chunks/instance-discovery.d-DZhp77vb.d.ts +1217 -0
- package/dist/types-chunks/resolver.d-BwD6TKz7.d.ts +262 -0
- package/dist/types-chunks/storage.d-Bv9T99Qu.d.ts +584 -0
- package/dist/types-chunks/types.d-C5mHR87z.d.ts +119 -0
- package/package.json +8 -3
- package/dist/acp_events.d.ts +0 -109
- package/dist/acp_logger.d.ts +0 -20
- package/dist/acp_server.d.ts +0 -92
- package/dist/chunks/chunk-CLS57NPX.js +0 -460
- package/dist/chunks/chunk-QZEDWITG.js +0 -1226
- package/dist/chunks/chunk-Z5EBDA6R.js +0 -15
- package/dist/chunks/dist-OTUF22DA.js +0 -2
- package/dist/cli_commands.d.ts +0 -17
- package/dist/cli_option_helpers.d.ts +0 -49
- package/dist/cli_option_helpers.test.d.ts +0 -1
- package/dist/constructed_cli.d.ts +0 -82
- package/dist/constructed_cli.test.d.ts +0 -1
- package/dist/kodax_cli.d.ts +0 -7
- package/dist/self_modify_cli.d.ts +0 -81
- package/dist/self_modify_cli.test.d.ts +0 -9
- package/dist/skill_cli.d.ts +0 -15
- package/dist/skill_cli.test.d.ts +0 -1
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import { t as KodaXProviderConfig, o as KodaXMessage, _ as KodaXToolDefinition, I as KodaXReasoningRequest, B as KodaXProviderStreamOptions, N as KodaXStreamResult, p as KodaXModelDescriptor, s as KodaXProviderCapabilityProfile, F as KodaXReasoningCapability, H as KodaXReasoningOverride } from './capability.d-BxNgd1-c.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Retry-After header parsing — FEATURE_130 (v0.7.36).
|
|
5
|
+
*
|
|
6
|
+
* Handles the four forms KodaX's 12 provider adapters encounter when a
|
|
7
|
+
* model returns 429 (rate limit) or 503/529 (overloaded):
|
|
8
|
+
*
|
|
9
|
+
* 1. `Retry-After: 120` — integer seconds (HTTP 7231 standard)
|
|
10
|
+
* 2. `Retry-After: <HTTP-date>` — RFC 7231 IMF-fixdate
|
|
11
|
+
* e.g. "Wed, 21 Oct 2026 07:28:00 GMT"
|
|
12
|
+
* 3. `retry-after-ms: 45000` — Anthropic millisecond extension
|
|
13
|
+
* 4. (no Retry-After header present) — falls back to exponential backoff
|
|
14
|
+
* capped at `maxBackoffMs`, with optional jitter for the
|
|
15
|
+
* "thundering herd" protection.
|
|
16
|
+
*
|
|
17
|
+
* All return values are normalized to whole milliseconds and clamped to
|
|
18
|
+
* a sensible upper bound — never block the user for more than 120s, and
|
|
19
|
+
* never honor a header advertising a wait longer than `maxHeaderWaitMs`
|
|
20
|
+
* (default 120s). Beyond that limit we still extract the header but cap
|
|
21
|
+
* it; the calling provider can check `cappedFromHeader` to decide
|
|
22
|
+
* whether to surface a "rate limit exceeded — please wait" error to the
|
|
23
|
+
* user instead of silently sleeping for two minutes.
|
|
24
|
+
*
|
|
25
|
+
* Pattern-B (FEATURE_119) interaction: the helper is referentially
|
|
26
|
+
* transparent and stateless — it can be invoked concurrently by N
|
|
27
|
+
* parallel children without coordination. The retry loop in each
|
|
28
|
+
* provider holds its own attempt counter; this helper only translates
|
|
29
|
+
* headers/attempts into wait durations.
|
|
30
|
+
*
|
|
31
|
+
* Reference: opencode session/retry.ts:14-123 (4-form coverage).
|
|
32
|
+
*/
|
|
33
|
+
type RetryAfterSource = 'retry-after-seconds' | 'retry-after-date' | 'retry-after-ms' | 'exponential-backoff';
|
|
34
|
+
type RetryAfterResult = {
|
|
35
|
+
readonly type: 'header';
|
|
36
|
+
readonly waitMs: number;
|
|
37
|
+
readonly source: 'retry-after-seconds' | 'retry-after-date' | 'retry-after-ms';
|
|
38
|
+
/** True when the header value exceeded `maxHeaderWaitMs` and was clamped. */
|
|
39
|
+
readonly cappedFromHeader: boolean;
|
|
40
|
+
} | {
|
|
41
|
+
readonly type: 'backoff';
|
|
42
|
+
readonly waitMs: number;
|
|
43
|
+
readonly source: 'exponential-backoff';
|
|
44
|
+
readonly attempt: number;
|
|
45
|
+
};
|
|
46
|
+
interface ParseRetryAfterOptions {
|
|
47
|
+
/** Zero-based attempt index used by the backoff branch (0 = first retry). */
|
|
48
|
+
readonly attempt: number;
|
|
49
|
+
/** Base delay for exponential backoff. Default 1000ms. */
|
|
50
|
+
readonly baseBackoffMs?: number;
|
|
51
|
+
/** Maximum exponential backoff cap. Default 30000ms. */
|
|
52
|
+
readonly maxBackoffMs?: number;
|
|
53
|
+
/** Maximum wait honored from a header. Default 120000ms. */
|
|
54
|
+
readonly maxHeaderWaitMs?: number;
|
|
55
|
+
/**
|
|
56
|
+
* Override the "now" reference used by the HTTP-date branch.
|
|
57
|
+
* Test-only escape hatch — production code should leave this undefined.
|
|
58
|
+
*/
|
|
59
|
+
readonly now?: () => number;
|
|
60
|
+
/**
|
|
61
|
+
* Whether the backoff branch adds 0-25% jitter on top of the base
|
|
62
|
+
* exponential. Default true (matches the legacy `withRateLimit`
|
|
63
|
+
* jitter contract). Tests can pass `false` for deterministic output.
|
|
64
|
+
*/
|
|
65
|
+
readonly withJitter?: boolean;
|
|
66
|
+
}
|
|
67
|
+
type HeadersLike = Headers | Record<string, string | string[] | undefined> | undefined;
|
|
68
|
+
/**
|
|
69
|
+
* Parse rate-limit/overload retry-after headers (4 forms) and decide
|
|
70
|
+
* how long the caller should sleep before retrying. Returns either:
|
|
71
|
+
*
|
|
72
|
+
* - `{type: 'header', ...}` when one of the supported headers was found
|
|
73
|
+
* and converted into a wait duration; OR
|
|
74
|
+
* - `{type: 'backoff', ...}` falling back to exponential backoff for
|
|
75
|
+
* the given `attempt` index when no header is present.
|
|
76
|
+
*/
|
|
77
|
+
declare function parseRetryAfter(headers: HeadersLike, options: ParseRetryAfterOptions): RetryAfterResult;
|
|
78
|
+
/**
|
|
79
|
+
* Pull headers off a thrown error in the various shapes produced across
|
|
80
|
+
* provider SDKs (Anthropic, OpenAI, fetch-based custom providers).
|
|
81
|
+
* Returns `undefined` when no headers can be located — the helper then
|
|
82
|
+
* falls through to exponential backoff.
|
|
83
|
+
*/
|
|
84
|
+
declare function extractHeadersFromError(error: unknown): HeadersLike;
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* KodaX Base Provider
|
|
88
|
+
*
|
|
89
|
+
* Provider 抽象基类 - 所有 Provider 的公共基础
|
|
90
|
+
*/
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* FEATURE_130 (v0.7.36): structured payload fired through
|
|
94
|
+
* `KodaXEvents.onRetryAfter` whenever a provider's `withRateLimit`
|
|
95
|
+
* loop catches a 429 / 503 / 529 response and decides to wait. The
|
|
96
|
+
* `source` field carries which retry-after header form (or fallback)
|
|
97
|
+
* produced the wait duration so UI surfaces can show "provider asked
|
|
98
|
+
* us to wait 45s" vs "no header, exp-backoff guess of 4s".
|
|
99
|
+
*/
|
|
100
|
+
interface KodaXRetryAfterEvent {
|
|
101
|
+
readonly provider: string;
|
|
102
|
+
readonly waitMs: number;
|
|
103
|
+
readonly reason: 'rate-limit' | 'overloaded';
|
|
104
|
+
readonly source: RetryAfterSource;
|
|
105
|
+
readonly attempt: number;
|
|
106
|
+
readonly maxAttempts: number;
|
|
107
|
+
}
|
|
108
|
+
type KodaXOnRetryAfterCallback = (event: KodaXRetryAfterEvent) => void;
|
|
109
|
+
declare abstract class KodaXBaseProvider {
|
|
110
|
+
abstract readonly name: string;
|
|
111
|
+
abstract readonly supportsThinking: boolean;
|
|
112
|
+
protected abstract readonly config: KodaXProviderConfig;
|
|
113
|
+
/**
|
|
114
|
+
* Per-request override for `max_tokens` in the next provider call. Consumed
|
|
115
|
+
* once and cleared in `withRateLimit` after the next successful response.
|
|
116
|
+
* Two callers set this:
|
|
117
|
+
* 1. Context-overflow recovery inside `withRateLimit` (reduces budget
|
|
118
|
+
* when the model reports "prompt too long").
|
|
119
|
+
* 2. The agent loop's max_tokens escalation path, which flips this to
|
|
120
|
+
* `KODAX_ESCALATED_MAX_OUTPUT_TOKENS` when a capped-budget turn
|
|
121
|
+
* returns `stop_reason: max_tokens`. See `coding/src/agent.ts`.
|
|
122
|
+
*/
|
|
123
|
+
protected maxOutputTokensOverride?: number;
|
|
124
|
+
/**
|
|
125
|
+
* Public setter for the one-shot override above. Callers outside the
|
|
126
|
+
* provider package (notably the agent loop's escalation branch) use this
|
|
127
|
+
* to stage a larger budget for the next stream call in the same logical
|
|
128
|
+
* turn. Pass `undefined` to clear a stale override explicitly.
|
|
129
|
+
*/
|
|
130
|
+
setMaxOutputTokensOverride(value: number | undefined): void;
|
|
131
|
+
/**
|
|
132
|
+
* Returns the max_tokens value the provider will currently use on its
|
|
133
|
+
* next request. Precedence (highest to lowest):
|
|
134
|
+
* 1. One-shot override (agent escalation, context-overflow recovery)
|
|
135
|
+
* 2. User env var `KODAX_MAX_OUTPUT_TOKENS` (explicit user intent)
|
|
136
|
+
* 3. Active model descriptor's `maxOutputTokens` (FEATURE_098)
|
|
137
|
+
* 4. Provider config default
|
|
138
|
+
* 5. Global `KODAX_MAX_TOKENS` fallback
|
|
139
|
+
* Used by provider stream() paths and by the agent loop to decide
|
|
140
|
+
* whether escalation is applicable (see `coding/src/agent.ts`).
|
|
141
|
+
*/
|
|
142
|
+
getEffectiveMaxOutputTokens(model?: string): number;
|
|
143
|
+
/**
|
|
144
|
+
* Hard cap on a single streaming request's wall-clock duration (ms).
|
|
145
|
+
* Returns undefined when no cap is configured. Consumed by the
|
|
146
|
+
* resilience layer to abort a doomed stream before the server-side
|
|
147
|
+
* kill window fires; routed through `non_streaming_fallback`.
|
|
148
|
+
*/
|
|
149
|
+
getStreamMaxDurationMs(): number | undefined;
|
|
150
|
+
abstract stream(messages: KodaXMessage[], tools: KodaXToolDefinition[], system: string, reasoning?: boolean | KodaXReasoningRequest, streamOptions?: KodaXProviderStreamOptions, signal?: AbortSignal): Promise<KodaXStreamResult>;
|
|
151
|
+
supportsNonStreamingFallback(): boolean;
|
|
152
|
+
complete(_messages: KodaXMessage[], _tools: KodaXToolDefinition[], _system: string, _reasoning?: boolean | KodaXReasoningRequest, _streamOptions?: KodaXProviderStreamOptions, _signal?: AbortSignal): Promise<KodaXStreamResult>;
|
|
153
|
+
isConfigured(): boolean;
|
|
154
|
+
getModel(): string;
|
|
155
|
+
getAvailableModels(): string[];
|
|
156
|
+
getModelDescriptor(modelId?: string): KodaXModelDescriptor | undefined;
|
|
157
|
+
getBaseUrl(): string | undefined;
|
|
158
|
+
getApiKeyEnv(): string;
|
|
159
|
+
getCapabilityProfile(): KodaXProviderCapabilityProfile;
|
|
160
|
+
getConfiguredReasoningCapability(modelOverride?: string): KodaXReasoningCapability;
|
|
161
|
+
getReasoningCapability(modelOverride?: string): KodaXReasoningCapability;
|
|
162
|
+
getReasoningOverride(modelOverride?: string): KodaXReasoningOverride | undefined;
|
|
163
|
+
getReasoningOverrideKey(modelOverride?: string): string;
|
|
164
|
+
protected persistReasoningCapabilityOverride(capability: KodaXReasoningCapability, modelOverride?: string): void;
|
|
165
|
+
protected shouldFallbackForReasoningError(error: unknown, ...terms: string[]): boolean;
|
|
166
|
+
protected shouldFallbackForSpecificReasoningError(error: unknown, ...terms: string[]): boolean;
|
|
167
|
+
protected getReasoningFallbackChain(capability: KodaXReasoningCapability): KodaXReasoningCapability[];
|
|
168
|
+
/**
|
|
169
|
+
* 获取模型的上下文窗口大小
|
|
170
|
+
*
|
|
171
|
+
* Backwards-compatible no-arg form: resolves against the provider's
|
|
172
|
+
* default model descriptor. New call sites that know the active
|
|
173
|
+
* model should use `getEffectiveContextWindow(model)` directly.
|
|
174
|
+
* @returns 上下文窗口大小 (tokens)
|
|
175
|
+
*/
|
|
176
|
+
getContextWindow(): number;
|
|
177
|
+
/**
|
|
178
|
+
* Resolves the context window for a specific model.
|
|
179
|
+
* Precedence (highest to lowest):
|
|
180
|
+
* 1. Active model descriptor's `contextWindow` (FEATURE_098)
|
|
181
|
+
* 2. Provider config default
|
|
182
|
+
* 3. 200_000 fallback
|
|
183
|
+
* The user-level `compaction.contextWindow` is layered on top of
|
|
184
|
+
* this at the call site, so it remains the highest-priority manual
|
|
185
|
+
* override.
|
|
186
|
+
*/
|
|
187
|
+
getEffectiveContextWindow(model?: string): number;
|
|
188
|
+
protected getApiKey(): string;
|
|
189
|
+
protected shouldLogStreamDiagnostics(): boolean;
|
|
190
|
+
protected logStreamDiagnostic(...args: unknown[]): void;
|
|
191
|
+
protected normalizeReasoning(reasoning?: boolean | KodaXReasoningRequest): Required<KodaXReasoningRequest>;
|
|
192
|
+
/**
|
|
193
|
+
* Called when ECONNRESET/EPIPE is detected, indicating a stale keep-alive
|
|
194
|
+
* socket. Subclasses should override to rebuild their HTTP client with a
|
|
195
|
+
* fresh connection pool so the next retry uses a new TCP connection.
|
|
196
|
+
*/
|
|
197
|
+
protected onStaleConnection(): void;
|
|
198
|
+
protected isRateLimitError(error: unknown): boolean;
|
|
199
|
+
/**
|
|
200
|
+
* FEATURE_130: classify a rate-limit error as either a 429-style
|
|
201
|
+
* "rate-limit" or a 503/529-style "overloaded" condition. The
|
|
202
|
+
* distinction matters for UI: "rate-limit" usually surfaces a
|
|
203
|
+
* provider-supplied retry-after window; "overloaded" tends to fall
|
|
204
|
+
* through to exponential backoff with no header. Both flow through
|
|
205
|
+
* the same retry path; this only labels the event.
|
|
206
|
+
*/
|
|
207
|
+
protected classifyRateLimitReason(error: unknown): 'rate-limit' | 'overloaded';
|
|
208
|
+
/**
|
|
209
|
+
* Extract Retry-After delay from error headers (429/529 responses).
|
|
210
|
+
* Returns milliseconds, or undefined when no usable header is present.
|
|
211
|
+
*
|
|
212
|
+
* FEATURE_130 (v0.7.36): now delegates to the shared `parseRetryAfter`
|
|
213
|
+
* helper so all 12 provider adapters get 4-form coverage without each
|
|
214
|
+
* adapter rolling its own parser. The 4 forms supported are:
|
|
215
|
+
* - `Retry-After: <integer-seconds>`
|
|
216
|
+
* - `Retry-After: <HTTP-date>`
|
|
217
|
+
* - `retry-after-ms: <milliseconds>` (Anthropic extension)
|
|
218
|
+
* - exponential-backoff fallback (returned via `withRateLimit`,
|
|
219
|
+
* not through this helper — it is `undefined` here when no
|
|
220
|
+
* header is present, which the caller then resolves to backoff)
|
|
221
|
+
*/
|
|
222
|
+
protected extractRetryAfterMs(error: unknown): number | undefined;
|
|
223
|
+
/**
|
|
224
|
+
* Detect "prompt too long / context window exceeded" errors and compute
|
|
225
|
+
* a reduced max_tokens for retry. Returns undefined if not a context
|
|
226
|
+
* overflow error.
|
|
227
|
+
*/
|
|
228
|
+
protected parseContextOverflow(error: unknown): number | undefined;
|
|
229
|
+
protected isContextOverflowError(error: unknown): boolean;
|
|
230
|
+
protected withRateLimit<T>(fn: () => Promise<T>, signal?: AbortSignal, retries?: number, onRateLimit?: (attempt: number, maxRetries: number, delayMs: number) => void, onRetryAfter?: KodaXOnRetryAfterCallback): Promise<T>;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* KodaX Cost Rates - Multi-Provider pricing table
|
|
235
|
+
*
|
|
236
|
+
* 成本费率表 - 所有 Provider 的计费标准
|
|
237
|
+
* 支持 11 个内置 Provider 的成本追踪,用户可以覆盖默认费率
|
|
238
|
+
*/
|
|
239
|
+
interface CostRate {
|
|
240
|
+
readonly inputPer1M: number;
|
|
241
|
+
readonly outputPer1M: number;
|
|
242
|
+
readonly cachePer1M?: number;
|
|
243
|
+
}
|
|
244
|
+
declare const DEFAULT_COST_RATES: Readonly<Record<string, Readonly<Record<string, CostRate>>>>;
|
|
245
|
+
declare function getCostRate(provider: string, model: string, userOverrides?: Readonly<Record<string, Readonly<Record<string, CostRate>>>>): CostRate | undefined;
|
|
246
|
+
declare function calculateCost(rate: CostRate, inputTokens: number, outputTokens: number, cacheTokens?: number): number;
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* KodaX Cost Tracker - Immutable session cost tracking
|
|
250
|
+
*
|
|
251
|
+
* 成本追踪器 - 不可变的会话成本追踪
|
|
252
|
+
* 使用 Immutable 模式,每次操作都返回新对象而不修改原有对象
|
|
253
|
+
*/
|
|
254
|
+
|
|
255
|
+
interface TokenUsageRecord {
|
|
256
|
+
readonly timestamp: number;
|
|
257
|
+
readonly provider: string;
|
|
258
|
+
readonly model: string;
|
|
259
|
+
readonly inputTokens: number;
|
|
260
|
+
readonly outputTokens: number;
|
|
261
|
+
readonly cacheReadTokens: number;
|
|
262
|
+
readonly cacheWriteTokens: number;
|
|
263
|
+
readonly cost: number;
|
|
264
|
+
readonly role?: string;
|
|
265
|
+
}
|
|
266
|
+
interface ProviderCostSummary {
|
|
267
|
+
readonly cost: number;
|
|
268
|
+
readonly calls: number;
|
|
269
|
+
readonly inputTokens: number;
|
|
270
|
+
readonly outputTokens: number;
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* FEATURE_130 (v0.7.36) — per-retry record. Captures the wait the
|
|
274
|
+
* provider asked us to take so `/cost` can report "X retries, Ys total
|
|
275
|
+
* wait" alongside the token cost. Lives in the same tracker as token
|
|
276
|
+
* records to keep one source of truth for the session.
|
|
277
|
+
*/
|
|
278
|
+
interface RetryRecord {
|
|
279
|
+
readonly timestamp: number;
|
|
280
|
+
readonly provider: string;
|
|
281
|
+
readonly waitMs: number;
|
|
282
|
+
readonly reason: 'rate-limit' | 'overloaded';
|
|
283
|
+
readonly source: 'retry-after-seconds' | 'retry-after-date' | 'retry-after-ms' | 'exponential-backoff';
|
|
284
|
+
}
|
|
285
|
+
interface SessionCostSummary {
|
|
286
|
+
readonly totalCost: number;
|
|
287
|
+
readonly totalInputTokens: number;
|
|
288
|
+
readonly totalOutputTokens: number;
|
|
289
|
+
readonly totalCacheTokens: number;
|
|
290
|
+
/** FEATURE_116 (v0.7.37): cumulative cache-read input tokens across the
|
|
291
|
+
* session. Splits `totalCacheTokens` into the read half so the hit rate
|
|
292
|
+
* (`cacheHitRate = totalCacheReadTokens / totalCacheTokens`) is
|
|
293
|
+
* derivable for `/cost` reporting. */
|
|
294
|
+
readonly totalCacheReadTokens: number;
|
|
295
|
+
/** FEATURE_116 (v0.7.37): cumulative cache-write (creation) input tokens. */
|
|
296
|
+
readonly totalCacheWriteTokens: number;
|
|
297
|
+
/** FEATURE_116 (v0.7.37): cache-read share of all cache tokens this
|
|
298
|
+
* session, in [0, 1]. Computed as `totalCacheReadTokens /
|
|
299
|
+
* (totalCacheReadTokens + totalCacheWriteTokens)`. Returns 0 when no
|
|
300
|
+
* cache activity has been recorded — a session with zero cache
|
|
301
|
+
* activity is not "0% hit rate", just untracked. */
|
|
302
|
+
readonly cacheHitRate: number;
|
|
303
|
+
readonly callCount: number;
|
|
304
|
+
/** FEATURE_130: total retries triggered across the session. */
|
|
305
|
+
readonly retryCount: number;
|
|
306
|
+
/** FEATURE_130: cumulative milliseconds spent in retry-after sleeps. */
|
|
307
|
+
readonly retryWaitMs: number;
|
|
308
|
+
readonly byProvider: Readonly<Record<string, ProviderCostSummary>>;
|
|
309
|
+
readonly byRole: Readonly<Record<string, ProviderCostSummary>>;
|
|
310
|
+
}
|
|
311
|
+
interface CostTracker {
|
|
312
|
+
readonly records: readonly TokenUsageRecord[];
|
|
313
|
+
/** FEATURE_130 (v0.7.36): retry-wait records, append-only and immutable. */
|
|
314
|
+
readonly retries: readonly RetryRecord[];
|
|
315
|
+
}
|
|
316
|
+
declare function createCostTracker(): CostTracker;
|
|
317
|
+
/**
|
|
318
|
+
* FEATURE_130 (v0.7.36): record a retry-after wait. The InkREPL spinner
|
|
319
|
+
* (or any other consumer of `KodaXEvents.onRetryAfter`) calls this so
|
|
320
|
+
* `/cost` can surface accurate session-wide retry telemetry.
|
|
321
|
+
*/
|
|
322
|
+
declare function recordRetry(tracker: CostTracker, entry: {
|
|
323
|
+
readonly provider: string;
|
|
324
|
+
readonly waitMs: number;
|
|
325
|
+
readonly reason: 'rate-limit' | 'overloaded';
|
|
326
|
+
readonly source: 'retry-after-seconds' | 'retry-after-date' | 'retry-after-ms' | 'exponential-backoff';
|
|
327
|
+
}): CostTracker;
|
|
328
|
+
declare function recordUsage(tracker: CostTracker, entry: {
|
|
329
|
+
readonly provider: string;
|
|
330
|
+
readonly model: string;
|
|
331
|
+
readonly inputTokens: number;
|
|
332
|
+
readonly outputTokens: number;
|
|
333
|
+
readonly cacheReadTokens?: number;
|
|
334
|
+
readonly cacheWriteTokens?: number;
|
|
335
|
+
readonly role?: string;
|
|
336
|
+
}, userCostOverrides?: Readonly<Record<string, Readonly<Record<string, CostRate>>>>): CostTracker;
|
|
337
|
+
declare function getSummary(tracker: CostTracker): SessionCostSummary;
|
|
338
|
+
declare function formatCost(usd: number): string;
|
|
339
|
+
declare function formatCostReport(summary: SessionCostSummary): string;
|
|
340
|
+
|
|
341
|
+
export { DEFAULT_COST_RATES as D, KodaXBaseProvider as K, calculateCost as g, createCostTracker as h, extractHeadersFromError as i, formatCost as j, formatCostReport as k, getCostRate as l, getSummary as m, recordUsage as n, parseRetryAfter as p, recordRetry as r };
|
|
342
|
+
export type { CostRate as C, ParseRetryAfterOptions as P, RetryAfterResult as R, SessionCostSummary as S, TokenUsageRecord as T, CostTracker as a, KodaXOnRetryAfterCallback as b, KodaXRetryAfterEvent as c, ProviderCostSummary as d, RetryAfterSource as e, RetryRecord as f };
|