@kaelio/ktx 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/python/{kaelio_ktx-0.9.0-py3-none-any.whl → kaelio_ktx-0.10.0-py3-none-any.whl} +0 -0
- package/assets/python/manifest.json +4 -4
- package/dist/.tsbuildinfo +1 -1
- package/dist/clack.d.ts +6 -0
- package/dist/clack.js +17 -2
- package/dist/cli-program.d.ts +3 -0
- package/dist/cli-program.js +42 -2
- package/dist/cli-runtime.d.ts +3 -0
- package/dist/cli-runtime.js +44 -0
- package/dist/commands/setup-commands.js +2 -3
- package/dist/connection.js +23 -1
- package/dist/connectors/bigquery/connector.d.ts +2 -5
- package/dist/connectors/bigquery/connector.js +2 -2
- package/dist/connectors/clickhouse/connector.d.ts +2 -5
- package/dist/connectors/clickhouse/connector.js +2 -2
- package/dist/connectors/mysql/connector.d.ts +7 -6
- package/dist/connectors/mysql/connector.js +25 -5
- package/dist/connectors/mysql/dialect.d.ts +1 -1
- package/dist/connectors/mysql/dialect.js +12 -2
- package/dist/connectors/postgres/connector.d.ts +2 -5
- package/dist/connectors/postgres/connector.js +2 -2
- package/dist/connectors/snowflake/connector.d.ts +2 -5
- package/dist/connectors/snowflake/connector.js +2 -2
- package/dist/connectors/sqlite/connector.d.ts +2 -5
- package/dist/connectors/sqlite/connector.js +2 -2
- package/dist/connectors/sqlserver/connector.d.ts +2 -5
- package/dist/connectors/sqlserver/connector.js +2 -2
- package/dist/context/connections/drivers.d.ts +0 -1
- package/dist/context/connections/drivers.js +0 -7
- package/dist/context/connections/query-executor.d.ts +2 -1
- package/dist/context/core/abort.d.ts +9 -0
- package/dist/context/core/abort.js +36 -0
- package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.d.ts +1 -0
- package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.js +6 -2
- package/dist/context/ingest/context-candidates/curator-pagination.service.d.ts +1 -5
- package/dist/context/ingest/context-candidates/curator-pagination.service.js +1 -3
- package/dist/context/ingest/context-evidence/sqlite-context-evidence-store.d.ts +1 -1
- package/dist/context/ingest/final-gate-repair.d.ts +1 -0
- package/dist/context/ingest/final-gate-repair.js +1 -0
- package/dist/context/ingest/ingest-bundle.runner.d.ts +3 -0
- package/dist/context/ingest/ingest-bundle.runner.js +127 -53
- package/dist/context/ingest/isolated-diff/textual-conflict-resolver.d.ts +1 -0
- package/dist/context/ingest/isolated-diff/textual-conflict-resolver.js +1 -0
- package/dist/context/ingest/isolated-diff/work-unit-executor.d.ts +1 -0
- package/dist/context/ingest/local-bundle-runtime.js +11 -4
- package/dist/context/ingest/local-ingest.d.ts +1 -0
- package/dist/context/ingest/local-ingest.js +13 -3
- package/dist/context/ingest/memory-flow/events.js +1 -1
- package/dist/context/ingest/memory-flow/schema.js +8 -3
- package/dist/context/ingest/memory-flow/types.d.ts +7 -3
- package/dist/context/ingest/ports.d.ts +3 -5
- package/dist/context/ingest/stages/stage-3-work-units.d.ts +1 -4
- package/dist/context/ingest/stages/stage-3-work-units.js +5 -1
- package/dist/context/ingest/stages/stage-4-reconciliation.d.ts +1 -4
- package/dist/context/ingest/stages/stage-4-reconciliation.js +1 -1
- package/dist/context/ingest/types.d.ts +1 -0
- package/dist/context/llm/ai-sdk-runtime.d.ts +3 -0
- package/dist/context/llm/ai-sdk-runtime.js +152 -16
- package/dist/context/llm/claude-code-runtime.d.ts +6 -4
- package/dist/context/llm/claude-code-runtime.js +127 -48
- package/dist/context/llm/codex-runtime.d.ts +3 -3
- package/dist/context/llm/codex-runtime.js +90 -47
- package/dist/context/llm/local-config.d.ts +15 -5
- package/dist/context/llm/local-config.js +6 -1
- package/dist/context/llm/rate-limit-governor.d.ts +103 -0
- package/dist/context/llm/rate-limit-governor.js +285 -0
- package/dist/context/llm/runtime-port.d.ts +3 -6
- package/dist/context/mcp/context-tools.js +43 -13
- package/dist/context/project/config.d.ts +12 -0
- package/dist/context/project/config.js +35 -0
- package/dist/context/scan/types.d.ts +15 -2
- package/dist/context/scan/types.js +12 -0
- package/dist/context/sl/description-normalization.js +4 -14
- package/dist/context/tools/context-candidate-mark.tool.d.ts +2 -2
- package/dist/context-build-view.d.ts +13 -0
- package/dist/context-build-view.js +60 -1
- package/dist/demo-metrics.d.ts +0 -2
- package/dist/demo-metrics.js +1 -11
- package/dist/ingest.d.ts +1 -0
- package/dist/ingest.js +32 -3
- package/dist/io/symbols.d.ts +2 -0
- package/dist/io/symbols.js +2 -0
- package/dist/memory-flow-hud.js +8 -16
- package/dist/public-ingest.js +50 -15
- package/dist/reveal-password-prompt.d.ts +24 -0
- package/dist/reveal-password-prompt.js +78 -0
- package/dist/scan.js +18 -2
- package/dist/setup-databases.d.ts +1 -0
- package/dist/setup-databases.js +23 -3
- package/dist/setup-demo-tour.js +1 -0
- package/dist/setup-embeddings.js +1 -1
- package/dist/setup-models.d.ts +1 -14
- package/dist/setup-models.js +116 -340
- package/dist/setup-prompts.js +3 -2
- package/dist/setup-sources.js +7 -7
- package/dist/setup.d.ts +1 -1
- package/dist/setup.js +1 -1
- package/dist/sl.d.ts +2 -2
- package/dist/sl.js +20 -4
- package/dist/sql.js +18 -2
- package/dist/star-prompt/cache.d.ts +16 -0
- package/dist/star-prompt/cache.js +45 -0
- package/dist/star-prompt/star-count.d.ts +7 -0
- package/dist/star-prompt/star-count.js +66 -0
- package/dist/star-prompt/star-line.d.ts +12 -0
- package/dist/star-prompt/star-line.js +26 -0
- package/dist/telemetry/emitter.d.ts +10 -0
- package/dist/telemetry/emitter.js +31 -0
- package/dist/telemetry/events.d.ts +24 -0
- package/dist/telemetry/events.js +15 -0
- package/dist/telemetry/exception.d.ts +18 -0
- package/dist/telemetry/exception.js +162 -0
- package/dist/telemetry/index.d.ts +3 -2
- package/dist/telemetry/index.js +2 -1
- package/dist/telemetry/redaction-secrets.d.ts +11 -0
- package/dist/telemetry/redaction-secrets.js +92 -0
- package/dist/update-check/cache.d.ts +21 -0
- package/dist/update-check/cache.js +38 -0
- package/dist/update-check/channel.d.ts +15 -0
- package/dist/update-check/channel.js +30 -0
- package/dist/update-check/registry.d.ts +1 -0
- package/dist/update-check/registry.js +45 -0
- package/dist/update-check/update-check.d.ts +43 -0
- package/dist/update-check/update-check.js +116 -0
- package/package.json +8 -1
- package/dist/context/connections/local-query-executor.d.ts +0 -6
- package/dist/context/connections/local-query-executor.js +0 -39
- package/dist/context/connections/postgres-query-executor.d.ts +0 -25
- package/dist/context/connections/postgres-query-executor.js +0 -53
- package/dist/context/connections/sqlite-query-executor.d.ts +0 -4
- package/dist/context/connections/sqlite-query-executor.js +0 -74
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { createSdkMcpServer, query as defaultQuery, } from '@anthropic-ai/claude-agent-sdk';
|
|
2
2
|
import { z } from 'zod';
|
|
3
|
-
import {
|
|
3
|
+
import { createAbortError, isAbortError, throwIfAborted } from '../core/abort.js';
|
|
4
4
|
import { createKtxClaudeCodeEnv } from './claude-code-env.js';
|
|
5
5
|
import { resolveClaudeCodeModel } from './claude-code-models.js';
|
|
6
6
|
import { createClaudeSdkTools, mcpToolIds } from './runtime-tools.js';
|
|
@@ -41,21 +41,6 @@ const STRUCTURED_OUTPUT_TOOL_NAME = 'StructuredOutput';
|
|
|
41
41
|
function isResult(message) {
|
|
42
42
|
return message.type === 'result';
|
|
43
43
|
}
|
|
44
|
-
// Skip emissions the SDK does not count toward `num_turns`: `pause_turn` continuations and
|
|
45
|
-
// errored partials (e.g. `max_output_tokens`) it retries internally. Without this, the
|
|
46
|
-
// runtime's step counter outruns `maxTurns` and the HUD renders e.g. `step 69/40`.
|
|
47
|
-
function countsAsAssistantTurn(message) {
|
|
48
|
-
if (message.type !== 'assistant' || message.parent_tool_use_id !== null) {
|
|
49
|
-
return false;
|
|
50
|
-
}
|
|
51
|
-
if (message.error !== undefined) {
|
|
52
|
-
return false;
|
|
53
|
-
}
|
|
54
|
-
if (message.message.stop_reason === 'pause_turn') {
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
return true;
|
|
58
|
-
}
|
|
59
44
|
function resultError(result) {
|
|
60
45
|
if (result.subtype === 'success') {
|
|
61
46
|
return undefined;
|
|
@@ -104,6 +89,67 @@ function assertInitIsolation(message, allowedToolIds, expectedMcpServerNames) {
|
|
|
104
89
|
function expectedMcpServerNames(tools) {
|
|
105
90
|
return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set();
|
|
106
91
|
}
|
|
92
|
+
const CLAUDE_RATE_LIMIT_ERROR_MARKERS = /\b429\b|rate limit|too many requests|quota exceeded|overloaded|max_retries/i;
|
|
93
|
+
function normalizeClaudeResetAtMs(value) {
|
|
94
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
95
|
+
return Math.round(value < 10_000_000_000 ? value * 1_000 : value);
|
|
96
|
+
}
|
|
97
|
+
if (typeof value === 'string') {
|
|
98
|
+
const numeric = Number(value);
|
|
99
|
+
if (Number.isFinite(numeric) && numeric > 0) {
|
|
100
|
+
return normalizeClaudeResetAtMs(numeric);
|
|
101
|
+
}
|
|
102
|
+
const parsed = Date.parse(value);
|
|
103
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
104
|
+
}
|
|
105
|
+
return undefined;
|
|
106
|
+
}
|
|
107
|
+
function isClaudeRateLimitResult(result, rejectedSignal) {
|
|
108
|
+
const error = resultError(result);
|
|
109
|
+
if (!error) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
if (rejectedSignal?.status === 'rejected') {
|
|
113
|
+
return true;
|
|
114
|
+
}
|
|
115
|
+
const resultDetails = result;
|
|
116
|
+
const details = [
|
|
117
|
+
error.message,
|
|
118
|
+
resultDetails.stop_reason,
|
|
119
|
+
resultDetails.terminal_reason,
|
|
120
|
+
...(resultDetails.errors ?? []),
|
|
121
|
+
]
|
|
122
|
+
.filter((value) => typeof value === 'string' && value.length > 0)
|
|
123
|
+
.join('\n');
|
|
124
|
+
return CLAUDE_RATE_LIMIT_ERROR_MARKERS.test(details);
|
|
125
|
+
}
|
|
126
|
+
function claudeRateLimitSignal(message) {
|
|
127
|
+
const record = message;
|
|
128
|
+
if (record.type === 'rate_limit_event') {
|
|
129
|
+
const info = record.rate_limit_info;
|
|
130
|
+
if (!info)
|
|
131
|
+
return null;
|
|
132
|
+
const rawStatus = typeof info.status === 'string' ? info.status : 'allowed';
|
|
133
|
+
const resetAtMs = normalizeClaudeResetAtMs(info.resetsAt);
|
|
134
|
+
return {
|
|
135
|
+
provider: 'claude-subscription',
|
|
136
|
+
status: rawStatus === 'rejected' ? 'rejected' : rawStatus === 'allowed_warning' ? 'warning' : 'allowed',
|
|
137
|
+
...(resetAtMs !== undefined ? { resetAtMs } : {}),
|
|
138
|
+
...(typeof info.rateLimitType === 'string' ? { rateLimitType: info.rateLimitType } : {}),
|
|
139
|
+
...(typeof info.utilization === 'number' ? { utilization: info.utilization } : {}),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
if (record.subtype === 'api_retry' || record.type === 'api_retry') {
|
|
143
|
+
const retryDelayMs = typeof record.retry_delay_ms === 'number' ? record.retry_delay_ms : undefined;
|
|
144
|
+
return {
|
|
145
|
+
provider: 'claude-subscription',
|
|
146
|
+
status: 'warning',
|
|
147
|
+
...(retryDelayMs !== undefined ? { retryAfterMs: retryDelayMs } : {}),
|
|
148
|
+
rateLimitType: 'api_retry',
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
107
153
|
function managedMcpSettings(serverNames) {
|
|
108
154
|
return {
|
|
109
155
|
allowManagedMcpServersOnly: true,
|
|
@@ -150,28 +196,63 @@ function baseOptions(input) {
|
|
|
150
196
|
}
|
|
151
197
|
async function collectResult(params) {
|
|
152
198
|
let result;
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
199
|
+
let rejectedRateLimitSignal;
|
|
200
|
+
throwIfAborted(params.abortSignal);
|
|
201
|
+
await params.rateLimitGovernor?.waitForReady(params.abortSignal);
|
|
202
|
+
throwIfAborted(params.abortSignal);
|
|
203
|
+
const queryResult = params.query({ prompt: params.prompt, options: params.options });
|
|
204
|
+
const onAbort = () => {
|
|
205
|
+
void Promise.resolve(queryResult.interrupt?.()).catch(() => undefined);
|
|
206
|
+
};
|
|
207
|
+
params.abortSignal?.addEventListener('abort', onAbort, { once: true });
|
|
208
|
+
try {
|
|
209
|
+
for await (const message of queryResult) {
|
|
210
|
+
throwIfAborted(params.abortSignal);
|
|
211
|
+
const rateLimitSignal = claudeRateLimitSignal(message);
|
|
212
|
+
if (rateLimitSignal) {
|
|
213
|
+
if (rateLimitSignal.status === 'rejected') {
|
|
214
|
+
rejectedRateLimitSignal = rateLimitSignal;
|
|
215
|
+
}
|
|
216
|
+
params.rateLimitGovernor?.report(rateLimitSignal);
|
|
217
|
+
}
|
|
218
|
+
assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames);
|
|
219
|
+
if (isResult(message)) {
|
|
220
|
+
result = message;
|
|
221
|
+
}
|
|
160
222
|
}
|
|
161
223
|
}
|
|
224
|
+
finally {
|
|
225
|
+
params.abortSignal?.removeEventListener('abort', onAbort);
|
|
226
|
+
}
|
|
227
|
+
if (params.abortSignal?.aborted) {
|
|
228
|
+
throw createAbortError();
|
|
229
|
+
}
|
|
162
230
|
if (!result) {
|
|
163
231
|
throw new Error('Claude Code query returned no result message');
|
|
164
232
|
}
|
|
165
|
-
return
|
|
233
|
+
return {
|
|
234
|
+
result,
|
|
235
|
+
...(rejectedRateLimitSignal ? { rejectedRateLimitSignal } : {}),
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
async function collectResultWithRateLimitRetry(params) {
|
|
239
|
+
// maxRetryAttempts() returns 1 when no governor is present or pacing is
|
|
240
|
+
// disabled, so a rate-limited result surfaces without an extra query; the
|
|
241
|
+
// Claude Code SDK applies its own backoff for transient rejections.
|
|
242
|
+
const maxAttempts = params.rateLimitGovernor?.maxRetryAttempts() ?? 1;
|
|
243
|
+
for (let attempt = 0;; attempt += 1) {
|
|
244
|
+
const outcome = await collectResult(params);
|
|
245
|
+
if (!isClaudeRateLimitResult(outcome.result, outcome.rejectedRateLimitSignal) || attempt >= maxAttempts - 1) {
|
|
246
|
+
return outcome.result;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
166
249
|
}
|
|
167
250
|
export class ClaudeCodeKtxLlmRuntime {
|
|
168
251
|
deps;
|
|
169
252
|
runQuery;
|
|
170
|
-
logger;
|
|
171
253
|
constructor(deps) {
|
|
172
254
|
this.deps = deps;
|
|
173
255
|
this.runQuery = deps.query ?? defaultQuery;
|
|
174
|
-
this.logger = deps.logger ?? noopLogger;
|
|
175
256
|
}
|
|
176
257
|
async generateText(input) {
|
|
177
258
|
const options = baseOptions({
|
|
@@ -182,12 +263,14 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
182
263
|
tools: input.tools,
|
|
183
264
|
});
|
|
184
265
|
const startedAt = Date.now();
|
|
185
|
-
const result = await
|
|
266
|
+
const result = await collectResultWithRateLimitRetry({
|
|
186
267
|
query: this.runQuery,
|
|
187
268
|
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
|
188
269
|
options,
|
|
189
270
|
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
|
|
190
271
|
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
|
272
|
+
rateLimitGovernor: this.deps.rateLimitGovernor,
|
|
273
|
+
abortSignal: input.abortSignal,
|
|
191
274
|
});
|
|
192
275
|
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
|
|
193
276
|
const error = resultError(result);
|
|
@@ -216,12 +299,14 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
216
299
|
outputFormat: { type: 'json_schema', schema: jsonSchema(input.schema) },
|
|
217
300
|
};
|
|
218
301
|
const startedAt = Date.now();
|
|
219
|
-
const result = await
|
|
302
|
+
const result = await collectResultWithRateLimitRetry({
|
|
220
303
|
query: this.runQuery,
|
|
221
304
|
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
|
222
305
|
options,
|
|
223
306
|
allowedToolIds: new Set([...mcpToolIds(input.tools ?? {}), STRUCTURED_OUTPUT_TOOL_NAME]),
|
|
224
307
|
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
|
308
|
+
rateLimitGovernor: this.deps.rateLimitGovernor,
|
|
309
|
+
abortSignal: input.abortSignal,
|
|
225
310
|
});
|
|
226
311
|
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
|
|
227
312
|
const error = resultError(result);
|
|
@@ -234,9 +319,7 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
234
319
|
return input.schema.parse(result.structured_output);
|
|
235
320
|
}
|
|
236
321
|
async runAgentLoop(params) {
|
|
237
|
-
let stepIndex = 0;
|
|
238
322
|
const startedAt = Date.now();
|
|
239
|
-
const stepBoundariesMs = [];
|
|
240
323
|
try {
|
|
241
324
|
const options = baseOptions({
|
|
242
325
|
projectDir: this.deps.projectDir,
|
|
@@ -245,25 +328,14 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
245
328
|
maxTurns: params.stepBudget,
|
|
246
329
|
tools: params.toolSet,
|
|
247
330
|
});
|
|
248
|
-
const result = await
|
|
331
|
+
const result = await collectResultWithRateLimitRetry({
|
|
249
332
|
query: this.runQuery,
|
|
250
333
|
prompt: params.userPrompt,
|
|
251
334
|
options: { ...options, systemPrompt: params.systemPrompt },
|
|
252
335
|
allowedToolIds: new Set(mcpToolIds(params.toolSet)),
|
|
253
336
|
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
stepBoundariesMs.push(Date.now() - startedAt);
|
|
257
|
-
if (!params.onStepFinish) {
|
|
258
|
-
return;
|
|
259
|
-
}
|
|
260
|
-
try {
|
|
261
|
-
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
|
262
|
-
}
|
|
263
|
-
catch (error) {
|
|
264
|
-
this.logger.warn(`[claude-code-runner] onStepFinish callback threw; ignoring: ${error instanceof Error ? error.message : String(error)}`);
|
|
265
|
-
}
|
|
266
|
-
},
|
|
337
|
+
rateLimitGovernor: this.deps.rateLimitGovernor,
|
|
338
|
+
abortSignal: params.abortSignal,
|
|
267
339
|
});
|
|
268
340
|
const stopReason = mapClaudeCodeStopReason(result);
|
|
269
341
|
const error = resultError(result);
|
|
@@ -272,18 +344,25 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
272
344
|
...(stopReason === 'error' && error ? { error } : {}),
|
|
273
345
|
metrics: {
|
|
274
346
|
totalMs: Date.now() - startedAt,
|
|
275
|
-
|
|
276
|
-
|
|
347
|
+
// Authoritative turn count from the SDK result. The runtime no longer
|
|
348
|
+
// re-derives a per-turn counter: it could not match the SDK's `num_turns`
|
|
349
|
+
// and overshot `maxTurns` (the source of the misleading `step 70/40`).
|
|
350
|
+
// Per-step boundaries require that counter and are not consumed anywhere.
|
|
351
|
+
stepCount: result.num_turns,
|
|
352
|
+
stepBoundariesMs: [],
|
|
277
353
|
usage: claudeTokenUsage(result),
|
|
278
354
|
},
|
|
279
355
|
};
|
|
280
356
|
}
|
|
281
357
|
catch (error) {
|
|
358
|
+
if (isAbortError(error)) {
|
|
359
|
+
throw error;
|
|
360
|
+
}
|
|
282
361
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
283
362
|
return {
|
|
284
363
|
stopReason: 'error',
|
|
285
364
|
error: err,
|
|
286
|
-
metrics: { totalMs: Date.now() - startedAt, stepCount:
|
|
365
|
+
metrics: { totalMs: Date.now() - startedAt, stepCount: 0, stepBoundariesMs: [], usage: {} },
|
|
287
366
|
};
|
|
288
367
|
}
|
|
289
368
|
}
|
|
@@ -306,7 +385,7 @@ export async function runClaudeCodeAuthProbe(input) {
|
|
|
306
385
|
env: input.env,
|
|
307
386
|
maxTurns: 1,
|
|
308
387
|
});
|
|
309
|
-
const result = await
|
|
388
|
+
const result = await collectResultWithRateLimitRetry({
|
|
310
389
|
query: input.query ?? defaultQuery,
|
|
311
390
|
prompt: 'Reply with exactly: ok',
|
|
312
391
|
options,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { type KtxLogger } from '../core/config.js';
|
|
3
2
|
import { type CodexRuntimeMcpServerHandle } from './codex-mcp-runtime-server.js';
|
|
4
3
|
import { type CodexSdkRunner } from './codex-sdk-runner.js';
|
|
4
|
+
import type { RateLimitGovernor } from './rate-limit-governor.js';
|
|
5
5
|
import type { KtxGenerateObjectInput, KtxGenerateTextInput, KtxLlmRuntimePort, KtxRuntimeToolSet, RunLoopParams, RunLoopResult } from './runtime-port.js';
|
|
6
6
|
export interface CodexKtxLlmRuntimeDeps {
|
|
7
7
|
projectDir: string;
|
|
@@ -13,13 +13,13 @@ export interface CodexKtxLlmRuntimeDeps {
|
|
|
13
13
|
projectDir: string;
|
|
14
14
|
toolSet: KtxRuntimeToolSet;
|
|
15
15
|
}) => Promise<CodexRuntimeMcpServerHandle>;
|
|
16
|
-
|
|
16
|
+
rateLimitGovernor?: Pick<RateLimitGovernor, 'waitForReady' | 'report' | 'maxRetryAttempts'>;
|
|
17
17
|
}
|
|
18
18
|
export declare class CodexKtxLlmRuntime implements KtxLlmRuntimePort {
|
|
19
19
|
private readonly deps;
|
|
20
20
|
private readonly runner;
|
|
21
|
-
private readonly logger;
|
|
22
21
|
constructor(deps: CodexKtxLlmRuntimeDeps);
|
|
22
|
+
private runWithRateLimitRetry;
|
|
23
23
|
generateText(input: KtxGenerateTextInput): Promise<string>;
|
|
24
24
|
generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(input: KtxGenerateObjectInput<TOutput, TSchema>): Promise<TOutput>;
|
|
25
25
|
runAgentLoop(params: RunLoopParams): Promise<RunLoopResult>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import {
|
|
2
|
+
import { isAbortError, linkAbortSignal } from '../core/abort.js';
|
|
3
3
|
import { isCompletedAgentStep, summarizeCodexExecEvents } from './codex-exec-events.js';
|
|
4
4
|
import { startCodexRuntimeMcpServer, } from './codex-mcp-runtime-server.js';
|
|
5
5
|
import { resolveCodexModel } from './codex-models.js';
|
|
@@ -18,8 +18,8 @@ function isTurnCompleted(event) {
|
|
|
18
18
|
return eventRecord(event)?.type === 'turn.completed';
|
|
19
19
|
}
|
|
20
20
|
/**
|
|
21
|
-
* Drains the Codex stream once,
|
|
22
|
-
*
|
|
21
|
+
* Drains the Codex stream once, counting each completed agent action so the
|
|
22
|
+
* step budget is enforced mid-run. Every
|
|
23
23
|
* completed agent-action item counts (see {@link isCompletedAgentStep}), so
|
|
24
24
|
* built-in `command_execution` steps decrement the budget the same as
|
|
25
25
|
* `mcp_tool_call`s. A turn that produced no actions still counts as one step,
|
|
@@ -48,7 +48,6 @@ async function collectEvents(events, options = {}) {
|
|
|
48
48
|
continue;
|
|
49
49
|
}
|
|
50
50
|
completedSteps += 1;
|
|
51
|
-
await options.onStep?.(completedSteps);
|
|
52
51
|
if (isActionStep && options.stepBudget !== undefined && completedSteps >= options.stepBudget) {
|
|
53
52
|
budgetExceeded = true;
|
|
54
53
|
options.abortController?.abort();
|
|
@@ -107,14 +106,43 @@ async function mcpForTools(input) {
|
|
|
107
106
|
function runtimeToolNames(toolSet) {
|
|
108
107
|
return Object.values(toolSet ?? {}).map((descriptor) => descriptor.name);
|
|
109
108
|
}
|
|
109
|
+
const CODEX_RATE_LIMIT_MARKERS = /\b429\b|rate limit|too many requests|quota exceeded|temporarily overloaded/i;
|
|
110
|
+
function isCodexRateLimitError(error) {
|
|
111
|
+
return !!error && CODEX_RATE_LIMIT_MARKERS.test(error.message);
|
|
112
|
+
}
|
|
110
113
|
export class CodexKtxLlmRuntime {
|
|
111
114
|
deps;
|
|
112
115
|
runner;
|
|
113
|
-
logger;
|
|
114
116
|
constructor(deps) {
|
|
115
117
|
this.deps = deps;
|
|
116
118
|
this.runner = deps.runner ?? new CodexSdkCliRunner();
|
|
117
|
-
|
|
119
|
+
}
|
|
120
|
+
async runWithRateLimitRetry(abortSignal, run, getError) {
|
|
121
|
+
// maxRetryAttempts() returns 1 when no governor is present or pacing is
|
|
122
|
+
// disabled, so an opaque rate-limit failure surfaces on the first attempt
|
|
123
|
+
// instead of being retried with no backoff.
|
|
124
|
+
const maxAttempts = this.deps.rateLimitGovernor?.maxRetryAttempts() ?? 1;
|
|
125
|
+
for (let attempt = 0;; attempt += 1) {
|
|
126
|
+
await this.deps.rateLimitGovernor?.waitForReady(abortSignal);
|
|
127
|
+
const lastAttempt = attempt >= maxAttempts - 1;
|
|
128
|
+
try {
|
|
129
|
+
const result = await run();
|
|
130
|
+
const error = getError(result);
|
|
131
|
+
if (!isCodexRateLimitError(error) || lastAttempt) {
|
|
132
|
+
return result;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
catch (error) {
|
|
136
|
+
if (isAbortError(error)) {
|
|
137
|
+
throw error;
|
|
138
|
+
}
|
|
139
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
140
|
+
if (!isCodexRateLimitError(err) || lastAttempt) {
|
|
141
|
+
throw error;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
this.deps.rateLimitGovernor?.report({ provider: 'codex', status: 'rejected', rateLimitType: 'opaque' });
|
|
145
|
+
}
|
|
118
146
|
}
|
|
119
147
|
async generateText(input) {
|
|
120
148
|
const startedAt = Date.now();
|
|
@@ -138,16 +166,20 @@ export class CodexKtxLlmRuntime {
|
|
|
138
166
|
}
|
|
139
167
|
: {}),
|
|
140
168
|
});
|
|
141
|
-
const
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
169
|
+
const result = await this.runWithRateLimitRetry(input.abortSignal, async () => {
|
|
170
|
+
const collected = await collectEvents(await this.runner.runStreamed({
|
|
171
|
+
projectDir: this.deps.projectDir,
|
|
172
|
+
model,
|
|
173
|
+
prompt: promptWithSystem(input.system, input.prompt),
|
|
174
|
+
configOverrides: config.configOverrides,
|
|
175
|
+
env: config.env,
|
|
176
|
+
...(input.abortSignal ? { signal: input.abortSignal } : {}),
|
|
177
|
+
}));
|
|
178
|
+
const summary = summarizeCodexExecEvents(collected.events, { startedAt });
|
|
179
|
+
return { collected, summary };
|
|
180
|
+
}, ({ collected, summary }) => summaryError(summary, collected.streamError));
|
|
181
|
+
input.onMetrics?.(metrics(result.summary, startedAt));
|
|
182
|
+
return assertSuccessfulText(result.summary, result.collected.streamError);
|
|
151
183
|
}
|
|
152
184
|
finally {
|
|
153
185
|
await mcp?.close();
|
|
@@ -175,17 +207,21 @@ export class CodexKtxLlmRuntime {
|
|
|
175
207
|
}
|
|
176
208
|
: {}),
|
|
177
209
|
});
|
|
178
|
-
const
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
210
|
+
const result = await this.runWithRateLimitRetry(input.abortSignal, async () => {
|
|
211
|
+
const collected = await collectEvents(await this.runner.runStreamed({
|
|
212
|
+
projectDir: this.deps.projectDir,
|
|
213
|
+
model,
|
|
214
|
+
prompt: promptWithSystem(input.system, input.prompt),
|
|
215
|
+
configOverrides: config.configOverrides,
|
|
216
|
+
env: config.env,
|
|
217
|
+
outputSchema: z.toJSONSchema(input.schema, { target: 'draft-7' }),
|
|
218
|
+
...(input.abortSignal ? { signal: input.abortSignal } : {}),
|
|
219
|
+
}));
|
|
220
|
+
const summary = summarizeCodexExecEvents(collected.events, { startedAt });
|
|
221
|
+
return { collected, summary };
|
|
222
|
+
}, ({ collected, summary }) => summaryError(summary, collected.streamError));
|
|
223
|
+
input.onMetrics?.(metrics(result.summary, startedAt));
|
|
224
|
+
return parseStructuredOutput(input.schema, assertSuccessfulText(result.summary, result.collected.streamError));
|
|
189
225
|
}
|
|
190
226
|
finally {
|
|
191
227
|
await mcp?.close();
|
|
@@ -214,38 +250,45 @@ export class CodexKtxLlmRuntime {
|
|
|
214
250
|
}
|
|
215
251
|
: {}),
|
|
216
252
|
});
|
|
217
|
-
const
|
|
218
|
-
|
|
253
|
+
const result = await this.runWithRateLimitRetry(params.abortSignal, async () => {
|
|
254
|
+
const linked = linkAbortSignal(params.abortSignal);
|
|
255
|
+
const abortController = linked.controller;
|
|
219
256
|
try {
|
|
220
|
-
await
|
|
257
|
+
const collected = await collectEvents(await this.runner.runStreamed({
|
|
258
|
+
projectDir: this.deps.projectDir,
|
|
259
|
+
model,
|
|
260
|
+
prompt: promptWithSystem(params.systemPrompt, params.userPrompt),
|
|
261
|
+
configOverrides: config.configOverrides,
|
|
262
|
+
env: config.env,
|
|
263
|
+
signal: abortController.signal,
|
|
264
|
+
}), { stepBudget: params.stepBudget, abortController });
|
|
265
|
+
const summary = summarizeCodexExecEvents(collected.events, { startedAt });
|
|
266
|
+
return { collected, summary };
|
|
221
267
|
}
|
|
222
|
-
|
|
223
|
-
|
|
268
|
+
finally {
|
|
269
|
+
linked.dispose();
|
|
224
270
|
}
|
|
225
|
-
};
|
|
226
|
-
const
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
env: config.env,
|
|
232
|
-
signal: abortController.signal,
|
|
233
|
-
}), { stepBudget: params.stepBudget, abortController, onStep });
|
|
234
|
-
const summary = summarizeCodexExecEvents(collected.events, { startedAt });
|
|
235
|
-
const error = summaryError(summary, collected.streamError);
|
|
236
|
-
const stopReason = collected.budgetExceeded ? 'budget' : error ? 'error' : summary.stopReason;
|
|
271
|
+
}, ({ collected, summary }) => summaryError(summary, collected.streamError));
|
|
272
|
+
const error = summaryError(result.summary, result.collected.streamError);
|
|
273
|
+
if (isAbortError(error)) {
|
|
274
|
+
throw error;
|
|
275
|
+
}
|
|
276
|
+
const stopReason = result.collected.budgetExceeded ? 'budget' : error ? 'error' : result.summary.stopReason;
|
|
237
277
|
return {
|
|
238
278
|
stopReason,
|
|
239
279
|
...(stopReason === 'error' && error ? { error } : {}),
|
|
240
280
|
metrics: {
|
|
241
281
|
totalMs: Date.now() - startedAt,
|
|
242
|
-
usage: summary.usage,
|
|
243
|
-
stepCount: summary.stepCount,
|
|
244
|
-
stepBoundariesMs: summary.stepBoundariesMs,
|
|
282
|
+
usage: result.summary.usage,
|
|
283
|
+
stepCount: result.summary.stepCount,
|
|
284
|
+
stepBoundariesMs: result.summary.stepBoundariesMs,
|
|
245
285
|
},
|
|
246
286
|
};
|
|
247
287
|
}
|
|
248
288
|
catch (error) {
|
|
289
|
+
if (isAbortError(error)) {
|
|
290
|
+
throw error;
|
|
291
|
+
}
|
|
249
292
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
250
293
|
return {
|
|
251
294
|
stopReason: 'error',
|
|
@@ -2,19 +2,29 @@ import { createKtxEmbeddingProvider } from '../../llm/embedding-provider.js';
|
|
|
2
2
|
import { createKtxLlmProvider } from '../../llm/model-provider.js';
|
|
3
3
|
import type { KtxEmbeddingConfig, KtxEmbeddingProvider, KtxLlmConfig, KtxLlmProvider } from '../../llm/types.js';
|
|
4
4
|
import type { KtxProjectEmbeddingConfig, KtxProjectLlmConfig } from '../project/config.js';
|
|
5
|
+
import { AiSdkKtxLlmRuntime } from './ai-sdk-runtime.js';
|
|
5
6
|
import { ClaudeCodeKtxLlmRuntime } from './claude-code-runtime.js';
|
|
6
7
|
import { CodexKtxLlmRuntime } from './codex-runtime.js';
|
|
8
|
+
import type { RateLimitGovernor } from './rate-limit-governor.js';
|
|
7
9
|
import type { KtxLlmRuntimePort } from './runtime-port.js';
|
|
10
|
+
type ClaudeCodeRuntimeDeps = ConstructorParameters<typeof ClaudeCodeKtxLlmRuntime>[0] & {
|
|
11
|
+
rateLimitGovernor?: RateLimitGovernor;
|
|
12
|
+
};
|
|
13
|
+
type CodexRuntimeDeps = ConstructorParameters<typeof CodexKtxLlmRuntime>[0] & {
|
|
14
|
+
rateLimitGovernor?: RateLimitGovernor;
|
|
15
|
+
};
|
|
16
|
+
type AiSdkRuntimeDeps = ConstructorParameters<typeof AiSdkKtxLlmRuntime>[0] & {
|
|
17
|
+
rateLimitGovernor?: RateLimitGovernor;
|
|
18
|
+
};
|
|
8
19
|
interface LocalConfigDeps {
|
|
9
20
|
env?: NodeJS.ProcessEnv;
|
|
10
21
|
projectDir?: string;
|
|
22
|
+
rateLimitGovernor?: RateLimitGovernor;
|
|
11
23
|
createKtxLlmProvider?: typeof createKtxLlmProvider;
|
|
12
24
|
createKtxEmbeddingProvider?: typeof createKtxEmbeddingProvider;
|
|
13
|
-
createClaudeCodeRuntime?: (deps:
|
|
14
|
-
createCodexRuntime?: (deps:
|
|
15
|
-
createAiSdkRuntime?: (deps:
|
|
16
|
-
llmProvider: KtxLlmProvider;
|
|
17
|
-
}) => KtxLlmRuntimePort;
|
|
25
|
+
createClaudeCodeRuntime?: (deps: ClaudeCodeRuntimeDeps) => KtxLlmRuntimePort;
|
|
26
|
+
createCodexRuntime?: (deps: CodexRuntimeDeps) => KtxLlmRuntimePort;
|
|
27
|
+
createAiSdkRuntime?: (deps: AiSdkRuntimeDeps) => KtxLlmRuntimePort;
|
|
18
28
|
}
|
|
19
29
|
export declare function resolveLocalKtxLlmConfig(config: KtxProjectLlmConfig, env: NodeJS.ProcessEnv): KtxLlmConfig | null;
|
|
20
30
|
/** @internal */
|
|
@@ -90,6 +90,7 @@ export function createLocalKtxLlmRuntimeFromConfig(config, deps = {}) {
|
|
|
90
90
|
projectDir,
|
|
91
91
|
modelSlots: resolved.modelSlots,
|
|
92
92
|
env: deps.env,
|
|
93
|
+
rateLimitGovernor: deps.rateLimitGovernor,
|
|
93
94
|
});
|
|
94
95
|
}
|
|
95
96
|
if (resolved.backend === 'codex') {
|
|
@@ -100,10 +101,14 @@ export function createLocalKtxLlmRuntimeFromConfig(config, deps = {}) {
|
|
|
100
101
|
return (deps.createCodexRuntime ?? ((runtimeDeps) => new CodexKtxLlmRuntime(runtimeDeps)))({
|
|
101
102
|
projectDir,
|
|
102
103
|
modelSlots: resolved.modelSlots,
|
|
104
|
+
rateLimitGovernor: deps.rateLimitGovernor,
|
|
103
105
|
});
|
|
104
106
|
}
|
|
105
107
|
const llmProvider = (deps.createKtxLlmProvider ?? createKtxLlmProvider)(resolved);
|
|
106
|
-
return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({
|
|
108
|
+
return (deps.createAiSdkRuntime ?? ((runtimeDeps) => new AiSdkKtxLlmRuntime(runtimeDeps)))({
|
|
109
|
+
llmProvider,
|
|
110
|
+
rateLimitGovernor: deps.rateLimitGovernor,
|
|
111
|
+
});
|
|
107
112
|
}
|
|
108
113
|
export function resolveLocalKtxEmbeddingConfig(config, env) {
|
|
109
114
|
if (config.backend === 'none') {
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
export type RateLimitProvider = 'claude-subscription' | 'anthropic-api' | 'vertex' | 'codex';
|
|
2
|
+
type RateLimitSignalStatus = 'allowed' | 'warning' | 'rejected';
|
|
3
|
+
export interface RateLimitSignal {
|
|
4
|
+
provider: RateLimitProvider;
|
|
5
|
+
status: RateLimitSignalStatus;
|
|
6
|
+
resetAtMs?: number;
|
|
7
|
+
retryAfterMs?: number;
|
|
8
|
+
utilization?: number;
|
|
9
|
+
rateLimitType?: string;
|
|
10
|
+
}
|
|
11
|
+
export interface RateLimitRetryConfig {
|
|
12
|
+
maxAttempts: number;
|
|
13
|
+
baseDelayMs: number;
|
|
14
|
+
maxDelayMs: number;
|
|
15
|
+
jitter: boolean;
|
|
16
|
+
}
|
|
17
|
+
export interface RateLimitGovernorConfig {
|
|
18
|
+
enabled: boolean;
|
|
19
|
+
maxConcurrency: number;
|
|
20
|
+
throttleThreshold: number;
|
|
21
|
+
minConcurrencyUnderPressure: number;
|
|
22
|
+
maxWaitMs?: number;
|
|
23
|
+
waitStateTickMs: number;
|
|
24
|
+
retry: RateLimitRetryConfig;
|
|
25
|
+
}
|
|
26
|
+
export type RateLimitWaitState = {
|
|
27
|
+
kind: 'rate_limit_observed';
|
|
28
|
+
provider: RateLimitProvider;
|
|
29
|
+
status: RateLimitSignalStatus;
|
|
30
|
+
rateLimitType?: string;
|
|
31
|
+
resetAtMs?: number;
|
|
32
|
+
retryAfterMs?: number;
|
|
33
|
+
utilization?: number;
|
|
34
|
+
} | {
|
|
35
|
+
kind: 'concurrency_adjusted';
|
|
36
|
+
provider: RateLimitProvider;
|
|
37
|
+
from: number;
|
|
38
|
+
to: number;
|
|
39
|
+
reason: string;
|
|
40
|
+
rateLimitType?: string;
|
|
41
|
+
utilization?: number;
|
|
42
|
+
} | {
|
|
43
|
+
kind: 'wait_started' | 'wait_tick' | 'wait_finished';
|
|
44
|
+
provider: RateLimitProvider;
|
|
45
|
+
rateLimitType?: string;
|
|
46
|
+
resumeAtMs: number;
|
|
47
|
+
remainingMs: number;
|
|
48
|
+
};
|
|
49
|
+
export interface RateLimitGovernorDeps {
|
|
50
|
+
now?: () => number;
|
|
51
|
+
sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
|
|
52
|
+
random?: () => number;
|
|
53
|
+
}
|
|
54
|
+
export type RateLimitRelease = () => void;
|
|
55
|
+
type Subscriber = (state: RateLimitWaitState) => void;
|
|
56
|
+
export declare function createRateLimitGovernorConfig(input?: Partial<RateLimitGovernorConfig> & {
|
|
57
|
+
retry?: Partial<RateLimitRetryConfig>;
|
|
58
|
+
}): RateLimitGovernorConfig;
|
|
59
|
+
export declare class RateLimitGovernor {
|
|
60
|
+
private readonly config;
|
|
61
|
+
private readonly now;
|
|
62
|
+
private readonly sleep;
|
|
63
|
+
private readonly random;
|
|
64
|
+
private readonly subscribers;
|
|
65
|
+
private waiters;
|
|
66
|
+
private active;
|
|
67
|
+
private effectiveLimit;
|
|
68
|
+
private pausedUntilMs;
|
|
69
|
+
private pausedProvider;
|
|
70
|
+
private pausedRateLimitType;
|
|
71
|
+
private pausedTickMs;
|
|
72
|
+
private opaqueAttempts;
|
|
73
|
+
private pauseGeneration;
|
|
74
|
+
private visibleWaitAbort;
|
|
75
|
+
constructor(config: RateLimitGovernorConfig, deps?: RateLimitGovernorDeps);
|
|
76
|
+
currentLimit(): number;
|
|
77
|
+
/**
|
|
78
|
+
* Total attempts a runtime should make for a single rate-limited LLM call,
|
|
79
|
+
* including the first try. Returns 1 (no outer retry) when pacing is disabled:
|
|
80
|
+
* the outer retry loop only exists to cooperate with this governor's pause, so
|
|
81
|
+
* without active pacing there is no backoff to apply and the backend's own
|
|
82
|
+
* retry handles transient rejections.
|
|
83
|
+
*/
|
|
84
|
+
maxRetryAttempts(): number;
|
|
85
|
+
activeSlots(): number;
|
|
86
|
+
subscribe(cb: Subscriber): () => void;
|
|
87
|
+
report(signal: RateLimitSignal): void;
|
|
88
|
+
waitForReady(signal?: AbortSignal): Promise<void>;
|
|
89
|
+
acquireWorkSlot(signal?: AbortSignal): Promise<RateLimitRelease>;
|
|
90
|
+
private applyPause;
|
|
91
|
+
private resumeAtMsFor;
|
|
92
|
+
private adjustLimit;
|
|
93
|
+
private startVisibleWaitTicker;
|
|
94
|
+
private stopVisibleWaitTicker;
|
|
95
|
+
private runVisibleWaitTicker;
|
|
96
|
+
private finishPause;
|
|
97
|
+
private waitForPause;
|
|
98
|
+
private waitForSlot;
|
|
99
|
+
private wakeWaiters;
|
|
100
|
+
private emitWait;
|
|
101
|
+
private emit;
|
|
102
|
+
}
|
|
103
|
+
export {};
|