@kaelio/ktx 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/assets/python/{kaelio_ktx-0.9.0-py3-none-any.whl → kaelio_ktx-0.11.0-py3-none-any.whl} +0 -0
  2. package/assets/python/manifest.json +4 -4
  3. package/dist/.tsbuildinfo +1 -1
  4. package/dist/clack.d.ts +6 -0
  5. package/dist/clack.js +17 -2
  6. package/dist/cli-program.d.ts +3 -0
  7. package/dist/cli-program.js +46 -2
  8. package/dist/cli-runtime.d.ts +5 -0
  9. package/dist/cli-runtime.js +50 -0
  10. package/dist/commands/setup-commands.js +2 -3
  11. package/dist/community-cta.d.ts +11 -0
  12. package/dist/community-cta.js +19 -0
  13. package/dist/connection.js +23 -1
  14. package/dist/connectors/bigquery/connector.d.ts +2 -5
  15. package/dist/connectors/bigquery/connector.js +2 -2
  16. package/dist/connectors/clickhouse/connector.d.ts +2 -5
  17. package/dist/connectors/clickhouse/connector.js +2 -2
  18. package/dist/connectors/mysql/connector.d.ts +7 -6
  19. package/dist/connectors/mysql/connector.js +25 -5
  20. package/dist/connectors/mysql/dialect.d.ts +1 -1
  21. package/dist/connectors/mysql/dialect.js +12 -2
  22. package/dist/connectors/postgres/connector.d.ts +2 -5
  23. package/dist/connectors/postgres/connector.js +2 -2
  24. package/dist/connectors/snowflake/connector.d.ts +2 -5
  25. package/dist/connectors/snowflake/connector.js +2 -2
  26. package/dist/connectors/sqlite/connector.d.ts +2 -5
  27. package/dist/connectors/sqlite/connector.js +2 -2
  28. package/dist/connectors/sqlserver/connector.d.ts +2 -5
  29. package/dist/connectors/sqlserver/connector.js +2 -2
  30. package/dist/context/connections/drivers.d.ts +0 -1
  31. package/dist/context/connections/drivers.js +0 -7
  32. package/dist/context/connections/query-executor.d.ts +2 -1
  33. package/dist/context/core/abort.d.ts +9 -0
  34. package/dist/context/core/abort.js +36 -0
  35. package/dist/context/core/git-env.d.ts +12 -1
  36. package/dist/context/core/git-env.js +17 -2
  37. package/dist/context/core/git.service.js +15 -7
  38. package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.d.ts +1 -0
  39. package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.js +6 -2
  40. package/dist/context/ingest/context-candidates/curator-pagination.service.d.ts +1 -5
  41. package/dist/context/ingest/context-candidates/curator-pagination.service.js +1 -3
  42. package/dist/context/ingest/context-evidence/sqlite-context-evidence-store.d.ts +1 -1
  43. package/dist/context/ingest/final-gate-repair.d.ts +1 -0
  44. package/dist/context/ingest/final-gate-repair.js +1 -0
  45. package/dist/context/ingest/ingest-bundle.runner.d.ts +3 -0
  46. package/dist/context/ingest/ingest-bundle.runner.js +127 -53
  47. package/dist/context/ingest/isolated-diff/textual-conflict-resolver.d.ts +1 -0
  48. package/dist/context/ingest/isolated-diff/textual-conflict-resolver.js +1 -0
  49. package/dist/context/ingest/isolated-diff/work-unit-executor.d.ts +1 -0
  50. package/dist/context/ingest/local-bundle-runtime.js +11 -4
  51. package/dist/context/ingest/local-ingest.d.ts +1 -0
  52. package/dist/context/ingest/local-ingest.js +13 -3
  53. package/dist/context/ingest/memory-flow/events.js +1 -1
  54. package/dist/context/ingest/memory-flow/schema.js +8 -3
  55. package/dist/context/ingest/memory-flow/types.d.ts +7 -3
  56. package/dist/context/ingest/ports.d.ts +3 -5
  57. package/dist/context/ingest/stages/stage-3-work-units.d.ts +1 -4
  58. package/dist/context/ingest/stages/stage-3-work-units.js +5 -1
  59. package/dist/context/ingest/stages/stage-4-reconciliation.d.ts +1 -4
  60. package/dist/context/ingest/stages/stage-4-reconciliation.js +1 -1
  61. package/dist/context/ingest/types.d.ts +1 -0
  62. package/dist/context/llm/ai-sdk-runtime.d.ts +3 -0
  63. package/dist/context/llm/ai-sdk-runtime.js +152 -16
  64. package/dist/context/llm/claude-code-runtime.d.ts +6 -4
  65. package/dist/context/llm/claude-code-runtime.js +127 -48
  66. package/dist/context/llm/codex-runtime.d.ts +3 -3
  67. package/dist/context/llm/codex-runtime.js +90 -47
  68. package/dist/context/llm/local-config.d.ts +15 -5
  69. package/dist/context/llm/local-config.js +6 -1
  70. package/dist/context/llm/rate-limit-governor.d.ts +103 -0
  71. package/dist/context/llm/rate-limit-governor.js +285 -0
  72. package/dist/context/llm/runtime-port.d.ts +3 -6
  73. package/dist/context/mcp/context-tools.js +43 -13
  74. package/dist/context/project/config.d.ts +12 -0
  75. package/dist/context/project/config.js +35 -0
  76. package/dist/context/scan/types.d.ts +15 -2
  77. package/dist/context/scan/types.js +12 -0
  78. package/dist/context/sl/description-normalization.js +4 -14
  79. package/dist/context/tools/context-candidate-mark.tool.d.ts +2 -2
  80. package/dist/context-build-view.d.ts +13 -0
  81. package/dist/context-build-view.js +60 -1
  82. package/dist/demo-metrics.d.ts +0 -2
  83. package/dist/demo-metrics.js +1 -11
  84. package/dist/ingest.d.ts +1 -0
  85. package/dist/ingest.js +32 -3
  86. package/dist/io/symbols.d.ts +2 -0
  87. package/dist/io/symbols.js +2 -0
  88. package/dist/io/tty.d.ts +9 -0
  89. package/dist/io/tty.js +5 -0
  90. package/dist/links.d.ts +1 -0
  91. package/dist/links.js +1 -0
  92. package/dist/memory-flow-hud.js +8 -16
  93. package/dist/public-ingest.js +50 -15
  94. package/dist/reveal-password-prompt.d.ts +24 -0
  95. package/dist/reveal-password-prompt.js +78 -0
  96. package/dist/scan.js +18 -2
  97. package/dist/setup-agents.js +1 -5
  98. package/dist/setup-databases.d.ts +1 -0
  99. package/dist/setup-databases.js +23 -3
  100. package/dist/setup-demo-tour.js +1 -0
  101. package/dist/setup-embeddings.js +1 -1
  102. package/dist/setup-models.d.ts +1 -14
  103. package/dist/setup-models.js +116 -340
  104. package/dist/setup-prompts.js +4 -7
  105. package/dist/setup-sources.js +7 -7
  106. package/dist/setup.d.ts +26 -1
  107. package/dist/setup.js +78 -7
  108. package/dist/sl.d.ts +2 -2
  109. package/dist/sl.js +20 -4
  110. package/dist/sql.js +18 -2
  111. package/dist/star-prompt/cache.d.ts +16 -0
  112. package/dist/star-prompt/cache.js +45 -0
  113. package/dist/star-prompt/star-count.d.ts +7 -0
  114. package/dist/star-prompt/star-count.js +66 -0
  115. package/dist/star-prompt/star-line.d.ts +12 -0
  116. package/dist/star-prompt/star-line.js +26 -0
  117. package/dist/telemetry/command-hook.d.ts +24 -0
  118. package/dist/telemetry/command-hook.js +37 -3
  119. package/dist/telemetry/emitter.d.ts +10 -0
  120. package/dist/telemetry/emitter.js +31 -0
  121. package/dist/telemetry/events.d.ts +24 -0
  122. package/dist/telemetry/events.js +15 -0
  123. package/dist/telemetry/exception.d.ts +18 -0
  124. package/dist/telemetry/exception.js +162 -0
  125. package/dist/telemetry/index.d.ts +4 -3
  126. package/dist/telemetry/index.js +3 -2
  127. package/dist/telemetry/redaction-secrets.d.ts +11 -0
  128. package/dist/telemetry/redaction-secrets.js +92 -0
  129. package/dist/update-check/cache.d.ts +21 -0
  130. package/dist/update-check/cache.js +38 -0
  131. package/dist/update-check/channel.d.ts +15 -0
  132. package/dist/update-check/channel.js +30 -0
  133. package/dist/update-check/registry.d.ts +1 -0
  134. package/dist/update-check/registry.js +45 -0
  135. package/dist/update-check/update-check.d.ts +43 -0
  136. package/dist/update-check/update-check.js +116 -0
  137. package/package.json +8 -1
  138. package/dist/context/connections/local-query-executor.d.ts +0 -6
  139. package/dist/context/connections/local-query-executor.js +0 -39
  140. package/dist/context/connections/postgres-query-executor.d.ts +0 -25
  141. package/dist/context/connections/postgres-query-executor.js +0 -53
  142. package/dist/context/connections/sqlite-query-executor.d.ts +0 -4
  143. package/dist/context/connections/sqlite-query-executor.js +0 -74
@@ -1,5 +1,5 @@
1
1
  import { z } from 'zod';
2
- import { noopLogger } from '../core/config.js';
2
+ import { isAbortError, linkAbortSignal } from '../core/abort.js';
3
3
  import { isCompletedAgentStep, summarizeCodexExecEvents } from './codex-exec-events.js';
4
4
  import { startCodexRuntimeMcpServer, } from './codex-mcp-runtime-server.js';
5
5
  import { resolveCodexModel } from './codex-models.js';
@@ -18,8 +18,8 @@ function isTurnCompleted(event) {
18
18
  return eventRecord(event)?.type === 'turn.completed';
19
19
  }
20
20
  /**
21
- * Drains the Codex stream once, emitting a step as each agent action completes
22
- * so callers see live progress and the step budget is enforced mid-run. Every
21
+ * Drains the Codex stream once, counting each completed agent action so the
22
+ * step budget is enforced mid-run. Every
23
23
  * completed agent-action item counts (see {@link isCompletedAgentStep}), so
24
24
  * built-in `command_execution` steps decrement the budget the same as
25
25
  * `mcp_tool_call`s. A turn that produced no actions still counts as one step,
@@ -48,7 +48,6 @@ async function collectEvents(events, options = {}) {
48
48
  continue;
49
49
  }
50
50
  completedSteps += 1;
51
- await options.onStep?.(completedSteps);
52
51
  if (isActionStep && options.stepBudget !== undefined && completedSteps >= options.stepBudget) {
53
52
  budgetExceeded = true;
54
53
  options.abortController?.abort();
@@ -107,14 +106,43 @@ async function mcpForTools(input) {
107
106
  function runtimeToolNames(toolSet) {
108
107
  return Object.values(toolSet ?? {}).map((descriptor) => descriptor.name);
109
108
  }
109
+ const CODEX_RATE_LIMIT_MARKERS = /\b429\b|rate limit|too many requests|quota exceeded|temporarily overloaded/i;
110
+ function isCodexRateLimitError(error) {
111
+ return !!error && CODEX_RATE_LIMIT_MARKERS.test(error.message);
112
+ }
110
113
  export class CodexKtxLlmRuntime {
111
114
  deps;
112
115
  runner;
113
- logger;
114
116
  constructor(deps) {
115
117
  this.deps = deps;
116
118
  this.runner = deps.runner ?? new CodexSdkCliRunner();
117
- this.logger = deps.logger ?? noopLogger;
119
+ }
120
+ async runWithRateLimitRetry(abortSignal, run, getError) {
121
+ // maxRetryAttempts() returns 1 when no governor is present or pacing is
122
+ // disabled, so an opaque rate-limit failure surfaces on the first attempt
123
+ // instead of being retried with no backoff.
124
+ const maxAttempts = this.deps.rateLimitGovernor?.maxRetryAttempts() ?? 1;
125
+ for (let attempt = 0;; attempt += 1) {
126
+ await this.deps.rateLimitGovernor?.waitForReady(abortSignal);
127
+ const lastAttempt = attempt >= maxAttempts - 1;
128
+ try {
129
+ const result = await run();
130
+ const error = getError(result);
131
+ if (!isCodexRateLimitError(error) || lastAttempt) {
132
+ return result;
133
+ }
134
+ }
135
+ catch (error) {
136
+ if (isAbortError(error)) {
137
+ throw error;
138
+ }
139
+ const err = error instanceof Error ? error : new Error(String(error));
140
+ if (!isCodexRateLimitError(err) || lastAttempt) {
141
+ throw error;
142
+ }
143
+ }
144
+ this.deps.rateLimitGovernor?.report({ provider: 'codex', status: 'rejected', rateLimitType: 'opaque' });
145
+ }
118
146
  }
119
147
  async generateText(input) {
120
148
  const startedAt = Date.now();
@@ -138,16 +166,20 @@ export class CodexKtxLlmRuntime {
138
166
  }
139
167
  : {}),
140
168
  });
141
- const collected = await collectEvents(await this.runner.runStreamed({
142
- projectDir: this.deps.projectDir,
143
- model,
144
- prompt: promptWithSystem(input.system, input.prompt),
145
- configOverrides: config.configOverrides,
146
- env: config.env,
147
- }));
148
- const summary = summarizeCodexExecEvents(collected.events, { startedAt });
149
- input.onMetrics?.(metrics(summary, startedAt));
150
- return assertSuccessfulText(summary, collected.streamError);
169
+ const result = await this.runWithRateLimitRetry(input.abortSignal, async () => {
170
+ const collected = await collectEvents(await this.runner.runStreamed({
171
+ projectDir: this.deps.projectDir,
172
+ model,
173
+ prompt: promptWithSystem(input.system, input.prompt),
174
+ configOverrides: config.configOverrides,
175
+ env: config.env,
176
+ ...(input.abortSignal ? { signal: input.abortSignal } : {}),
177
+ }));
178
+ const summary = summarizeCodexExecEvents(collected.events, { startedAt });
179
+ return { collected, summary };
180
+ }, ({ collected, summary }) => summaryError(summary, collected.streamError));
181
+ input.onMetrics?.(metrics(result.summary, startedAt));
182
+ return assertSuccessfulText(result.summary, result.collected.streamError);
151
183
  }
152
184
  finally {
153
185
  await mcp?.close();
@@ -175,17 +207,21 @@ export class CodexKtxLlmRuntime {
175
207
  }
176
208
  : {}),
177
209
  });
178
- const collected = await collectEvents(await this.runner.runStreamed({
179
- projectDir: this.deps.projectDir,
180
- model,
181
- prompt: promptWithSystem(input.system, input.prompt),
182
- configOverrides: config.configOverrides,
183
- env: config.env,
184
- outputSchema: z.toJSONSchema(input.schema, { target: 'draft-7' }),
185
- }));
186
- const summary = summarizeCodexExecEvents(collected.events, { startedAt });
187
- input.onMetrics?.(metrics(summary, startedAt));
188
- return parseStructuredOutput(input.schema, assertSuccessfulText(summary, collected.streamError));
210
+ const result = await this.runWithRateLimitRetry(input.abortSignal, async () => {
211
+ const collected = await collectEvents(await this.runner.runStreamed({
212
+ projectDir: this.deps.projectDir,
213
+ model,
214
+ prompt: promptWithSystem(input.system, input.prompt),
215
+ configOverrides: config.configOverrides,
216
+ env: config.env,
217
+ outputSchema: z.toJSONSchema(input.schema, { target: 'draft-7' }),
218
+ ...(input.abortSignal ? { signal: input.abortSignal } : {}),
219
+ }));
220
+ const summary = summarizeCodexExecEvents(collected.events, { startedAt });
221
+ return { collected, summary };
222
+ }, ({ collected, summary }) => summaryError(summary, collected.streamError));
223
+ input.onMetrics?.(metrics(result.summary, startedAt));
224
+ return parseStructuredOutput(input.schema, assertSuccessfulText(result.summary, result.collected.streamError));
189
225
  }
190
226
  finally {
191
227
  await mcp?.close();
@@ -214,38 +250,45 @@ export class CodexKtxLlmRuntime {
214
250
  }
215
251
  : {}),
216
252
  });
217
- const abortController = new AbortController();
218
- const onStep = async (stepIndex) => {
253
+ const result = await this.runWithRateLimitRetry(params.abortSignal, async () => {
254
+ const linked = linkAbortSignal(params.abortSignal);
255
+ const abortController = linked.controller;
219
256
  try {
220
- await params.onStepFinish?.({ stepIndex, stepBudget: params.stepBudget });
257
+ const collected = await collectEvents(await this.runner.runStreamed({
258
+ projectDir: this.deps.projectDir,
259
+ model,
260
+ prompt: promptWithSystem(params.systemPrompt, params.userPrompt),
261
+ configOverrides: config.configOverrides,
262
+ env: config.env,
263
+ signal: abortController.signal,
264
+ }), { stepBudget: params.stepBudget, abortController });
265
+ const summary = summarizeCodexExecEvents(collected.events, { startedAt });
266
+ return { collected, summary };
221
267
  }
222
- catch (error) {
223
- this.logger.warn(`[codex-runner] onStepFinish callback threw; ignoring: ${error instanceof Error ? error.message : String(error)}`);
268
+ finally {
269
+ linked.dispose();
224
270
  }
225
- };
226
- const collected = await collectEvents(await this.runner.runStreamed({
227
- projectDir: this.deps.projectDir,
228
- model,
229
- prompt: promptWithSystem(params.systemPrompt, params.userPrompt),
230
- configOverrides: config.configOverrides,
231
- env: config.env,
232
- signal: abortController.signal,
233
- }), { stepBudget: params.stepBudget, abortController, onStep });
234
- const summary = summarizeCodexExecEvents(collected.events, { startedAt });
235
- const error = summaryError(summary, collected.streamError);
236
- const stopReason = collected.budgetExceeded ? 'budget' : error ? 'error' : summary.stopReason;
271
+ }, ({ collected, summary }) => summaryError(summary, collected.streamError));
272
+ const error = summaryError(result.summary, result.collected.streamError);
273
+ if (isAbortError(error)) {
274
+ throw error;
275
+ }
276
+ const stopReason = result.collected.budgetExceeded ? 'budget' : error ? 'error' : result.summary.stopReason;
237
277
  return {
238
278
  stopReason,
239
279
  ...(stopReason === 'error' && error ? { error } : {}),
240
280
  metrics: {
241
281
  totalMs: Date.now() - startedAt,
242
- usage: summary.usage,
243
- stepCount: summary.stepCount,
244
- stepBoundariesMs: summary.stepBoundariesMs,
282
+ usage: result.summary.usage,
283
+ stepCount: result.summary.stepCount,
284
+ stepBoundariesMs: result.summary.stepBoundariesMs,
245
285
  },
246
286
  };
247
287
  }
248
288
  catch (error) {
289
+ if (isAbortError(error)) {
290
+ throw error;
291
+ }
249
292
  const err = error instanceof Error ? error : new Error(String(error));
250
293
  return {
251
294
  stopReason: 'error',
@@ -2,19 +2,29 @@ import { createKtxEmbeddingProvider } from '../../llm/embedding-provider.js';
2
2
  import { createKtxLlmProvider } from '../../llm/model-provider.js';
3
3
  import type { KtxEmbeddingConfig, KtxEmbeddingProvider, KtxLlmConfig, KtxLlmProvider } from '../../llm/types.js';
4
4
  import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from '../project/config.js';
5
+ import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
5
6
  import { ClaudeCodeKtxLlmRuntime } from './claude-code-runtime.js';
6
7
  import { CodexKtxLlmRuntime } from './codex-runtime.js';
8
+ import type { RateLimitGovernor } from './rate-limit-governor.js';
7
9
  import type { KtxLlmRuntimePort } from './runtime-port.js';
10
+ type ClaudeCodeRuntimeDeps = ConstructorParameters<typeof ClaudeCodeKtxLlmRuntime>[0] & {
11
+ rateLimitGovernor?: RateLimitGovernor;
12
+ };
13
+ type CodexRuntimeDeps = ConstructorParameters<typeof CodexKtxLlmRuntime>[0] & {
14
+ rateLimitGovernor?: RateLimitGovernor;
15
+ };
16
+ type AiSdkRuntimeDeps = ConstructorParameters<typeof AiSdkKtxLlmRuntime>[0] & {
17
+ rateLimitGovernor?: RateLimitGovernor;
18
+ };
8
19
  interface LocalConfigDeps {
9
20
  env?: NodeJS.ProcessEnv;
10
21
  projectDir?: string;
22
+ rateLimitGovernor?: RateLimitGovernor;
11
23
  createKtxLlmProvider?: typeof createKtxLlmProvider;
12
24
  createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
13
- createClaudeCodeRuntime?: (deps: ConstructorParameters<typeof ClaudeCodeKtxLlmRuntime>[0]) => KtxLlmRuntimePort;
14
- createCodexRuntime?: (deps: ConstructorParameters<typeof CodexKtxLlmRuntime>[0]) => KtxLlmRuntimePort;
15
- createAiSdkRuntime?: (deps: {
16
- llmProvider: KtxLlmProvider;
17
- }) => KtxLlmRuntimePort;
25
+ createClaudeCodeRuntime?: (deps: ClaudeCodeRuntimeDeps) => KtxLlmRuntimePort;
26
+ createCodexRuntime?: (deps: CodexRuntimeDeps) => KtxLlmRuntimePort;
27
+ createAiSdkRuntime?: (deps: AiSdkRuntimeDeps) => KtxLlmRuntimePort;
18
28
  }
19
29
  export declare function resolveLocalKtxLlmConfig(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): KtxLlmConfig | null;
20
30
  /** @internal */
@@ -90,6 +90,7 @@ export function createLocalKtxLlmRuntimeFromConfig(config, deps = {}) {
90
90
  projectDir,
91
91
  modelSlots: resolved.modelSlots,
92
92
  env: deps.env,
93
+ rateLimitGovernor: deps.rateLimitGovernor,
93
94
  });
94
95
  }
95
96
  if (resolved.backend === 'codex') {
@@ -100,10 +101,14 @@ export function createLocalKtxLlmRuntimeFromConfig(config, deps = {}) {
100
101
  return (deps.createCodexRuntime ?? ((runtimeDeps) => new CodexKtxLlmRuntime(runtimeDeps)))({
101
102
  projectDir,
102
103
  modelSlots: resolved.modelSlots,
104
+ rateLimitGovernor: deps.rateLimitGovernor,
103
105
  });
104
106
  }
105
107
  const llmProvider = (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
106
- return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({ llmProvider });
108
+ return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({
109
+ llmProvider,
110
+ rateLimitGovernor: deps.rateLimitGovernor,
111
+ });
107
112
  }
108
113
  export function resolveLocalKtxEmbeddingConfig(config, env) {
109
114
  if (config.backend === 'none') {
@@ -0,0 +1,103 @@
1
+ export type RateLimitProvider = 'claude-subscription' | 'anthropic-api' | 'vertex' | 'codex';
2
+ type RateLimitSignalStatus = 'allowed' | 'warning' | 'rejected';
3
+ export interface RateLimitSignal {
4
+ provider: RateLimitProvider;
5
+ status: RateLimitSignalStatus;
6
+ resetAtMs?: number;
7
+ retryAfterMs?: number;
8
+ utilization?: number;
9
+ rateLimitType?: string;
10
+ }
11
+ export interface RateLimitRetryConfig {
12
+ maxAttempts: number;
13
+ baseDelayMs: number;
14
+ maxDelayMs: number;
15
+ jitter: boolean;
16
+ }
17
+ export interface RateLimitGovernorConfig {
18
+ enabled: boolean;
19
+ maxConcurrency: number;
20
+ throttleThreshold: number;
21
+ minConcurrencyUnderPressure: number;
22
+ maxWaitMs?: number;
23
+ waitStateTickMs: number;
24
+ retry: RateLimitRetryConfig;
25
+ }
26
+ export type RateLimitWaitState = {
27
+ kind: 'rate_limit_observed';
28
+ provider: RateLimitProvider;
29
+ status: RateLimitSignalStatus;
30
+ rateLimitType?: string;
31
+ resetAtMs?: number;
32
+ retryAfterMs?: number;
33
+ utilization?: number;
34
+ } | {
35
+ kind: 'concurrency_adjusted';
36
+ provider: RateLimitProvider;
37
+ from: number;
38
+ to: number;
39
+ reason: string;
40
+ rateLimitType?: string;
41
+ utilization?: number;
42
+ } | {
43
+ kind: 'wait_started' | 'wait_tick' | 'wait_finished';
44
+ provider: RateLimitProvider;
45
+ rateLimitType?: string;
46
+ resumeAtMs: number;
47
+ remainingMs: number;
48
+ };
49
+ export interface RateLimitGovernorDeps {
50
+ now?: () => number;
51
+ sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
52
+ random?: () => number;
53
+ }
54
+ export type RateLimitRelease = () => void;
55
+ type Subscriber = (state: RateLimitWaitState) => void;
56
+ export declare function createRateLimitGovernorConfig(input?: Partial<RateLimitGovernorConfig> & {
57
+ retry?: Partial<RateLimitRetryConfig>;
58
+ }): RateLimitGovernorConfig;
59
+ export declare class RateLimitGovernor {
60
+ private readonly config;
61
+ private readonly now;
62
+ private readonly sleep;
63
+ private readonly random;
64
+ private readonly subscribers;
65
+ private waiters;
66
+ private active;
67
+ private effectiveLimit;
68
+ private pausedUntilMs;
69
+ private pausedProvider;
70
+ private pausedRateLimitType;
71
+ private pausedTickMs;
72
+ private opaqueAttempts;
73
+ private pauseGeneration;
74
+ private visibleWaitAbort;
75
+ constructor(config: RateLimitGovernorConfig, deps?: RateLimitGovernorDeps);
76
+ currentLimit(): number;
77
+ /**
78
+ * Total attempts a runtime should make for a single rate-limited LLM call,
79
+ * including the first try. Returns 1 (no outer retry) when pacing is disabled:
80
+ * the outer retry loop only exists to cooperate with this governor's pause, so
81
+ * without active pacing there is no backoff to apply and the backend's own
82
+ * retry handles transient rejections.
83
+ */
84
+ maxRetryAttempts(): number;
85
+ activeSlots(): number;
86
+ subscribe(cb: Subscriber): () => void;
87
+ report(signal: RateLimitSignal): void;
88
+ waitForReady(signal?: AbortSignal): Promise<void>;
89
+ acquireWorkSlot(signal?: AbortSignal): Promise<RateLimitRelease>;
90
+ private applyPause;
91
+ private resumeAtMsFor;
92
+ private adjustLimit;
93
+ private startVisibleWaitTicker;
94
+ private stopVisibleWaitTicker;
95
+ private runVisibleWaitTicker;
96
+ private finishPause;
97
+ private waitForPause;
98
+ private waitForSlot;
99
+ private wakeWaiters;
100
+ private emitWait;
101
+ private emit;
102
+ }
103
+ export {};
@@ -0,0 +1,285 @@
1
+ import { createAbortError, throwIfAborted } from '../core/abort.js';
2
+ const defaultSleep = (ms, signal) => new Promise((resolve, reject) => {
3
+ if (signal?.aborted) {
4
+ reject(createAbortError());
5
+ return;
6
+ }
7
+ const timeout = setTimeout(resolve, ms);
8
+ signal?.addEventListener('abort', () => {
9
+ clearTimeout(timeout);
10
+ reject(createAbortError());
11
+ }, { once: true });
12
+ });
13
+ export function createRateLimitGovernorConfig(input = {}) {
14
+ return {
15
+ enabled: input.enabled ?? true,
16
+ maxConcurrency: input.maxConcurrency ?? 1,
17
+ throttleThreshold: input.throttleThreshold ?? 0.8,
18
+ minConcurrencyUnderPressure: input.minConcurrencyUnderPressure ?? 1,
19
+ ...(input.maxWaitMs !== undefined ? { maxWaitMs: input.maxWaitMs } : {}),
20
+ waitStateTickMs: input.waitStateTickMs ?? 1_000,
21
+ retry: {
22
+ maxAttempts: input.retry?.maxAttempts ?? 6,
23
+ baseDelayMs: input.retry?.baseDelayMs ?? 1_000,
24
+ maxDelayMs: input.retry?.maxDelayMs ?? 60_000,
25
+ jitter: input.retry?.jitter ?? true,
26
+ },
27
+ };
28
+ }
29
+ export class RateLimitGovernor {
30
+ config;
31
+ now;
32
+ sleep;
33
+ random;
34
+ subscribers = new Set();
35
+ waiters = [];
36
+ active = 0;
37
+ effectiveLimit;
38
+ pausedUntilMs = null;
39
+ pausedProvider = null;
40
+ pausedRateLimitType;
41
+ pausedTickMs = null;
42
+ opaqueAttempts = new Map();
43
+ pauseGeneration = 0;
44
+ visibleWaitAbort = null;
45
+ constructor(config, deps = {}) {
46
+ this.config = config;
47
+ this.now = deps.now ?? Date.now;
48
+ this.sleep = deps.sleep ?? defaultSleep;
49
+ this.random = deps.random ?? Math.random;
50
+ this.effectiveLimit = Math.max(1, config.maxConcurrency);
51
+ }
52
+ currentLimit() {
53
+ return this.config.enabled ? this.effectiveLimit : this.config.maxConcurrency;
54
+ }
55
+ /**
56
+ * Total attempts a runtime should make for a single rate-limited LLM call,
57
+ * including the first try. Returns 1 (no outer retry) when pacing is disabled:
58
+ * the outer retry loop only exists to cooperate with this governor's pause, so
59
+ * without active pacing there is no backoff to apply and the backend's own
60
+ * retry handles transient rejections.
61
+ */
62
+ maxRetryAttempts() {
63
+ return this.config.enabled ? Math.max(1, this.config.retry.maxAttempts) : 1;
64
+ }
65
+ activeSlots() {
66
+ return this.active;
67
+ }
68
+ subscribe(cb) {
69
+ this.subscribers.add(cb);
70
+ if (this.pausedUntilMs !== null) {
71
+ this.startVisibleWaitTicker();
72
+ }
73
+ return () => {
74
+ this.subscribers.delete(cb);
75
+ if (this.subscribers.size === 0) {
76
+ this.stopVisibleWaitTicker();
77
+ this.wakeWaiters();
78
+ }
79
+ };
80
+ }
81
+ report(signal) {
82
+ if (!this.config.enabled) {
83
+ return;
84
+ }
85
+ this.emit({
86
+ kind: 'rate_limit_observed',
87
+ provider: signal.provider,
88
+ status: signal.status,
89
+ ...(signal.rateLimitType ? { rateLimitType: signal.rateLimitType } : {}),
90
+ ...(signal.resetAtMs !== undefined ? { resetAtMs: signal.resetAtMs } : {}),
91
+ ...(signal.retryAfterMs !== undefined ? { retryAfterMs: signal.retryAfterMs } : {}),
92
+ ...(signal.utilization !== undefined ? { utilization: signal.utilization } : {}),
93
+ });
94
+ if (signal.status === 'rejected') {
95
+ this.applyPause(signal);
96
+ return;
97
+ }
98
+ if (signal.status === 'warning' || (signal.utilization ?? 0) >= this.config.throttleThreshold) {
99
+ this.adjustLimit(Math.max(1, this.config.minConcurrencyUnderPressure), signal, 'provider pressure');
100
+ return;
101
+ }
102
+ this.opaqueAttempts.delete(signal.provider);
103
+ if ((signal.utilization ?? 0) < this.config.throttleThreshold) {
104
+ this.adjustLimit(Math.max(1, this.config.maxConcurrency), signal, 'provider recovered');
105
+ }
106
+ }
107
+ async waitForReady(signal) {
108
+ throwIfAborted(signal);
109
+ if (!this.config.enabled) {
110
+ return;
111
+ }
112
+ await this.waitForPause(signal);
113
+ throwIfAborted(signal);
114
+ }
115
+ async acquireWorkSlot(signal) {
116
+ throwIfAborted(signal);
117
+ if (!this.config.enabled) {
118
+ this.active += 1;
119
+ return () => {
120
+ this.active -= 1;
121
+ };
122
+ }
123
+ while (true) {
124
+ throwIfAborted(signal);
125
+ await this.waitForPause(signal);
126
+ throwIfAborted(signal);
127
+ if (this.active < this.effectiveLimit) {
128
+ this.active += 1;
129
+ let released = false;
130
+ return () => {
131
+ if (released)
132
+ return;
133
+ released = true;
134
+ this.active -= 1;
135
+ this.wakeWaiters();
136
+ };
137
+ }
138
+ await this.waitForSlot(signal);
139
+ }
140
+ }
141
+ applyPause(signal) {
142
+ const resumeAtMs = this.resumeAtMsFor(signal);
143
+ const boundedResumeAtMs = this.config.maxWaitMs === undefined ? resumeAtMs : Math.min(resumeAtMs, this.now() + this.config.maxWaitMs);
144
+ if (this.pausedUntilMs === null || boundedResumeAtMs > this.pausedUntilMs) {
145
+ this.pausedUntilMs = boundedResumeAtMs;
146
+ this.pausedProvider = signal.provider;
147
+ this.pausedRateLimitType = signal.rateLimitType;
148
+ this.pausedTickMs = signal.rateLimitType === 'opaque' ? Math.max(1, boundedResumeAtMs - this.now()) : null;
149
+ this.emitWait('wait_started');
150
+ this.startVisibleWaitTicker();
151
+ this.wakeWaiters();
152
+ }
153
+ this.adjustLimit(Math.max(1, this.config.minConcurrencyUnderPressure), signal, 'provider rejected');
154
+ }
155
+ resumeAtMsFor(signal) {
156
+ if (signal.resetAtMs !== undefined) {
157
+ return signal.resetAtMs;
158
+ }
159
+ if (signal.retryAfterMs !== undefined) {
160
+ return this.now() + signal.retryAfterMs;
161
+ }
162
+ const attempts = this.opaqueAttempts.get(signal.provider) ?? 0;
163
+ this.opaqueAttempts.set(signal.provider, Math.min(attempts + 1, this.config.retry.maxAttempts));
164
+ const base = Math.min(this.config.retry.maxDelayMs, this.config.retry.baseDelayMs * 2 ** Math.min(attempts, this.config.retry.maxAttempts - 1));
165
+ const jitterMultiplier = this.config.retry.jitter ? 0.75 + this.random() * 0.5 : 1;
166
+ return this.now() + Math.round(base * jitterMultiplier);
167
+ }
168
+ adjustLimit(to, signal, reason) {
169
+ const bounded = Math.max(1, Math.min(this.config.maxConcurrency, to));
170
+ if (bounded === this.effectiveLimit) {
171
+ return;
172
+ }
173
+ const from = this.effectiveLimit;
174
+ this.effectiveLimit = bounded;
175
+ this.emit({
176
+ kind: 'concurrency_adjusted',
177
+ provider: signal.provider,
178
+ from,
179
+ to: bounded,
180
+ reason,
181
+ ...(signal.rateLimitType ? { rateLimitType: signal.rateLimitType } : {}),
182
+ ...(signal.utilization !== undefined ? { utilization: signal.utilization } : {}),
183
+ });
184
+ this.wakeWaiters();
185
+ }
186
+ startVisibleWaitTicker() {
187
+ if (this.subscribers.size === 0 || this.pausedUntilMs === null) {
188
+ return;
189
+ }
190
+ this.stopVisibleWaitTicker();
191
+ const generation = (this.pauseGeneration += 1);
192
+ const controller = new AbortController();
193
+ this.visibleWaitAbort = controller;
194
+ void this.runVisibleWaitTicker(generation, controller.signal).catch(() => undefined);
195
+ }
196
+ stopVisibleWaitTicker() {
197
+ this.visibleWaitAbort?.abort();
198
+ this.visibleWaitAbort = null;
199
+ }
200
+ async runVisibleWaitTicker(generation, signal) {
201
+ while (!signal.aborted && generation === this.pauseGeneration && this.pausedUntilMs !== null) {
202
+ const remainingMs = this.pausedUntilMs - this.now();
203
+ if (remainingMs <= 0) {
204
+ this.finishPause(generation);
205
+ return;
206
+ }
207
+ this.emitWait('wait_tick');
208
+ await this.sleep(Math.min(this.pausedTickMs ?? this.config.waitStateTickMs, remainingMs), signal);
209
+ }
210
+ }
211
+ finishPause(generation) {
212
+ if (generation !== undefined && generation !== this.pauseGeneration) {
213
+ return;
214
+ }
215
+ this.emitWait('wait_finished');
216
+ this.pausedUntilMs = null;
217
+ this.pausedProvider = null;
218
+ this.pausedRateLimitType = undefined;
219
+ this.pausedTickMs = null;
220
+ this.stopVisibleWaitTicker();
221
+ this.wakeWaiters();
222
+ }
223
+ async waitForPause(signal) {
224
+ throwIfAborted(signal);
225
+ while (this.pausedUntilMs !== null) {
226
+ const remainingMs = this.pausedUntilMs - this.now();
227
+ if (remainingMs <= 0) {
228
+ this.finishPause();
229
+ return;
230
+ }
231
+ if (this.visibleWaitAbort !== null) {
232
+ await this.waitForSlot(signal);
233
+ }
234
+ else {
235
+ await this.sleep(Math.min(this.pausedTickMs ?? this.config.waitStateTickMs, remainingMs), signal);
236
+ }
237
+ throwIfAborted(signal);
238
+ }
239
+ }
240
+ waitForSlot(signal) {
241
+ if (signal?.aborted) {
242
+ return Promise.reject(createAbortError());
243
+ }
244
+ return new Promise((resolve, reject) => {
245
+ const wake = () => {
246
+ cleanup();
247
+ resolve();
248
+ };
249
+ const onAbort = () => {
250
+ cleanup();
251
+ reject(createAbortError());
252
+ };
253
+ const cleanup = () => {
254
+ this.waiters = this.waiters.filter((candidate) => candidate !== wake);
255
+ signal?.removeEventListener('abort', onAbort);
256
+ };
257
+ this.waiters.push(wake);
258
+ signal?.addEventListener('abort', onAbort, { once: true });
259
+ });
260
+ }
261
+ wakeWaiters() {
262
+ const waiters = this.waiters;
263
+ this.waiters = [];
264
+ for (const waiter of waiters) {
265
+ waiter();
266
+ }
267
+ }
268
+ emitWait(kind) {
269
+ if (this.pausedUntilMs === null || this.pausedProvider === null) {
270
+ return;
271
+ }
272
+ this.emit({
273
+ kind,
274
+ provider: this.pausedProvider,
275
+ ...(this.pausedRateLimitType ? { rateLimitType: this.pausedRateLimitType } : {}),
276
+ resumeAtMs: this.pausedUntilMs,
277
+ remainingMs: Math.max(0, this.pausedUntilMs - this.now()),
278
+ });
279
+ }
280
+ emit(state) {
281
+ for (const subscriber of this.subscribers) {
282
+ subscriber(state);
283
+ }
284
+ }
285
+ }