@kaelio/ktx 0.9.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/python/{kaelio_ktx-0.9.0-py3-none-any.whl → kaelio_ktx-0.11.0-py3-none-any.whl} +0 -0
- package/assets/python/manifest.json +4 -4
- package/dist/.tsbuildinfo +1 -1
- package/dist/clack.d.ts +6 -0
- package/dist/clack.js +17 -2
- package/dist/cli-program.d.ts +3 -0
- package/dist/cli-program.js +46 -2
- package/dist/cli-runtime.d.ts +5 -0
- package/dist/cli-runtime.js +50 -0
- package/dist/commands/setup-commands.js +2 -3
- package/dist/community-cta.d.ts +11 -0
- package/dist/community-cta.js +19 -0
- package/dist/connection.js +23 -1
- package/dist/connectors/bigquery/connector.d.ts +2 -5
- package/dist/connectors/bigquery/connector.js +2 -2
- package/dist/connectors/clickhouse/connector.d.ts +2 -5
- package/dist/connectors/clickhouse/connector.js +2 -2
- package/dist/connectors/mysql/connector.d.ts +7 -6
- package/dist/connectors/mysql/connector.js +25 -5
- package/dist/connectors/mysql/dialect.d.ts +1 -1
- package/dist/connectors/mysql/dialect.js +12 -2
- package/dist/connectors/postgres/connector.d.ts +2 -5
- package/dist/connectors/postgres/connector.js +2 -2
- package/dist/connectors/snowflake/connector.d.ts +2 -5
- package/dist/connectors/snowflake/connector.js +2 -2
- package/dist/connectors/sqlite/connector.d.ts +2 -5
- package/dist/connectors/sqlite/connector.js +2 -2
- package/dist/connectors/sqlserver/connector.d.ts +2 -5
- package/dist/connectors/sqlserver/connector.js +2 -2
- package/dist/context/connections/drivers.d.ts +0 -1
- package/dist/context/connections/drivers.js +0 -7
- package/dist/context/connections/query-executor.d.ts +2 -1
- package/dist/context/core/abort.d.ts +9 -0
- package/dist/context/core/abort.js +36 -0
- package/dist/context/core/git-env.d.ts +12 -1
- package/dist/context/core/git-env.js +17 -2
- package/dist/context/core/git.service.js +15 -7
- package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.d.ts +1 -0
- package/dist/context/ingest/adapters/historic-sql/query-history-filter-picker.js +6 -2
- package/dist/context/ingest/context-candidates/curator-pagination.service.d.ts +1 -5
- package/dist/context/ingest/context-candidates/curator-pagination.service.js +1 -3
- package/dist/context/ingest/context-evidence/sqlite-context-evidence-store.d.ts +1 -1
- package/dist/context/ingest/final-gate-repair.d.ts +1 -0
- package/dist/context/ingest/final-gate-repair.js +1 -0
- package/dist/context/ingest/ingest-bundle.runner.d.ts +3 -0
- package/dist/context/ingest/ingest-bundle.runner.js +127 -53
- package/dist/context/ingest/isolated-diff/textual-conflict-resolver.d.ts +1 -0
- package/dist/context/ingest/isolated-diff/textual-conflict-resolver.js +1 -0
- package/dist/context/ingest/isolated-diff/work-unit-executor.d.ts +1 -0
- package/dist/context/ingest/local-bundle-runtime.js +11 -4
- package/dist/context/ingest/local-ingest.d.ts +1 -0
- package/dist/context/ingest/local-ingest.js +13 -3
- package/dist/context/ingest/memory-flow/events.js +1 -1
- package/dist/context/ingest/memory-flow/schema.js +8 -3
- package/dist/context/ingest/memory-flow/types.d.ts +7 -3
- package/dist/context/ingest/ports.d.ts +3 -5
- package/dist/context/ingest/stages/stage-3-work-units.d.ts +1 -4
- package/dist/context/ingest/stages/stage-3-work-units.js +5 -1
- package/dist/context/ingest/stages/stage-4-reconciliation.d.ts +1 -4
- package/dist/context/ingest/stages/stage-4-reconciliation.js +1 -1
- package/dist/context/ingest/types.d.ts +1 -0
- package/dist/context/llm/ai-sdk-runtime.d.ts +3 -0
- package/dist/context/llm/ai-sdk-runtime.js +152 -16
- package/dist/context/llm/claude-code-runtime.d.ts +6 -4
- package/dist/context/llm/claude-code-runtime.js +127 -48
- package/dist/context/llm/codex-runtime.d.ts +3 -3
- package/dist/context/llm/codex-runtime.js +90 -47
- package/dist/context/llm/local-config.d.ts +15 -5
- package/dist/context/llm/local-config.js +6 -1
- package/dist/context/llm/rate-limit-governor.d.ts +103 -0
- package/dist/context/llm/rate-limit-governor.js +285 -0
- package/dist/context/llm/runtime-port.d.ts +3 -6
- package/dist/context/mcp/context-tools.js +43 -13
- package/dist/context/project/config.d.ts +12 -0
- package/dist/context/project/config.js +35 -0
- package/dist/context/scan/types.d.ts +15 -2
- package/dist/context/scan/types.js +12 -0
- package/dist/context/sl/description-normalization.js +4 -14
- package/dist/context/tools/context-candidate-mark.tool.d.ts +2 -2
- package/dist/context-build-view.d.ts +13 -0
- package/dist/context-build-view.js +60 -1
- package/dist/demo-metrics.d.ts +0 -2
- package/dist/demo-metrics.js +1 -11
- package/dist/ingest.d.ts +1 -0
- package/dist/ingest.js +32 -3
- package/dist/io/symbols.d.ts +2 -0
- package/dist/io/symbols.js +2 -0
- package/dist/io/tty.d.ts +9 -0
- package/dist/io/tty.js +5 -0
- package/dist/links.d.ts +1 -0
- package/dist/links.js +1 -0
- package/dist/memory-flow-hud.js +8 -16
- package/dist/public-ingest.js +50 -15
- package/dist/reveal-password-prompt.d.ts +24 -0
- package/dist/reveal-password-prompt.js +78 -0
- package/dist/scan.js +18 -2
- package/dist/setup-agents.js +1 -5
- package/dist/setup-databases.d.ts +1 -0
- package/dist/setup-databases.js +23 -3
- package/dist/setup-demo-tour.js +1 -0
- package/dist/setup-embeddings.js +1 -1
- package/dist/setup-models.d.ts +1 -14
- package/dist/setup-models.js +116 -340
- package/dist/setup-prompts.js +4 -7
- package/dist/setup-sources.js +7 -7
- package/dist/setup.d.ts +26 -1
- package/dist/setup.js +78 -7
- package/dist/sl.d.ts +2 -2
- package/dist/sl.js +20 -4
- package/dist/sql.js +18 -2
- package/dist/star-prompt/cache.d.ts +16 -0
- package/dist/star-prompt/cache.js +45 -0
- package/dist/star-prompt/star-count.d.ts +7 -0
- package/dist/star-prompt/star-count.js +66 -0
- package/dist/star-prompt/star-line.d.ts +12 -0
- package/dist/star-prompt/star-line.js +26 -0
- package/dist/telemetry/command-hook.d.ts +24 -0
- package/dist/telemetry/command-hook.js +37 -3
- package/dist/telemetry/emitter.d.ts +10 -0
- package/dist/telemetry/emitter.js +31 -0
- package/dist/telemetry/events.d.ts +24 -0
- package/dist/telemetry/events.js +15 -0
- package/dist/telemetry/exception.d.ts +18 -0
- package/dist/telemetry/exception.js +162 -0
- package/dist/telemetry/index.d.ts +4 -3
- package/dist/telemetry/index.js +3 -2
- package/dist/telemetry/redaction-secrets.d.ts +11 -0
- package/dist/telemetry/redaction-secrets.js +92 -0
- package/dist/update-check/cache.d.ts +21 -0
- package/dist/update-check/cache.js +38 -0
- package/dist/update-check/channel.d.ts +15 -0
- package/dist/update-check/channel.js +30 -0
- package/dist/update-check/registry.d.ts +1 -0
- package/dist/update-check/registry.js +45 -0
- package/dist/update-check/update-check.d.ts +43 -0
- package/dist/update-check/update-check.js +116 -0
- package/package.json +8 -1
- package/dist/context/connections/local-query-executor.d.ts +0 -6
- package/dist/context/connections/local-query-executor.js +0 -39
- package/dist/context/connections/postgres-query-executor.d.ts +0 -25
- package/dist/context/connections/postgres-query-executor.js +0 -53
- package/dist/context/connections/sqlite-query-executor.d.ts +0 -4
- package/dist/context/connections/sqlite-query-executor.js +0 -74
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { KtxMessageBuilder, splitKtxSystemMessages } from '../../llm/message-builder.js';
|
|
2
2
|
import { generateText, Output, stepCountIs } from 'ai';
|
|
3
3
|
import { noopLogger } from '../../context/core/config.js';
|
|
4
|
+
import { isAbortError } from '../core/abort.js';
|
|
4
5
|
import { summarizeKtxLlmDebugRequest } from './debug-request-recorder.js';
|
|
5
6
|
import { createAiSdkToolSet } from './runtime-tools.js';
|
|
6
7
|
function toLlmTokenUsage(usage) {
|
|
@@ -16,6 +17,108 @@ function toLlmTokenUsage(usage) {
|
|
|
16
17
|
function hasTools(tools) {
|
|
17
18
|
return Object.keys(tools).length > 0;
|
|
18
19
|
}
|
|
20
|
+
function modelProviderName(model) {
|
|
21
|
+
const provider = model.provider ?? '';
|
|
22
|
+
return provider.includes('vertex') || provider.includes('google') ? 'vertex' : 'anthropic-api';
|
|
23
|
+
}
|
|
24
|
+
const RATE_LIMIT_HEADER_PAIRS = [
|
|
25
|
+
{
|
|
26
|
+
limit: 'anthropic-ratelimit-requests-limit',
|
|
27
|
+
remaining: 'anthropic-ratelimit-requests-remaining',
|
|
28
|
+
rateLimitType: 'rpm',
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
limit: 'anthropic-ratelimit-tokens-limit',
|
|
32
|
+
remaining: 'anthropic-ratelimit-tokens-remaining',
|
|
33
|
+
rateLimitType: 'tpm',
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
limit: 'anthropic-ratelimit-input-tokens-limit',
|
|
37
|
+
remaining: 'anthropic-ratelimit-input-tokens-remaining',
|
|
38
|
+
rateLimitType: 'itpm',
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
limit: 'anthropic-ratelimit-output-tokens-limit',
|
|
42
|
+
remaining: 'anthropic-ratelimit-output-tokens-remaining',
|
|
43
|
+
rateLimitType: 'otpm',
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
limit: 'x-ratelimit-limit-requests',
|
|
47
|
+
remaining: 'x-ratelimit-remaining-requests',
|
|
48
|
+
rateLimitType: 'rpm',
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
limit: 'x-ratelimit-limit-tokens',
|
|
52
|
+
remaining: 'x-ratelimit-remaining-tokens',
|
|
53
|
+
rateLimitType: 'tpm',
|
|
54
|
+
},
|
|
55
|
+
];
|
|
56
|
+
function normalizeHeaders(headers) {
|
|
57
|
+
if (!headers || typeof headers !== 'object') {
|
|
58
|
+
return {};
|
|
59
|
+
}
|
|
60
|
+
const get = headers.get;
|
|
61
|
+
if (typeof get === 'function') {
|
|
62
|
+
const out = {};
|
|
63
|
+
for (const pair of RATE_LIMIT_HEADER_PAIRS) {
|
|
64
|
+
const limit = get.call(headers, pair.limit);
|
|
65
|
+
const remaining = get.call(headers, pair.remaining);
|
|
66
|
+
if (typeof limit === 'string')
|
|
67
|
+
out[pair.limit] = limit;
|
|
68
|
+
if (typeof remaining === 'string')
|
|
69
|
+
out[pair.remaining] = remaining;
|
|
70
|
+
}
|
|
71
|
+
return out;
|
|
72
|
+
}
|
|
73
|
+
return Object.fromEntries(Object.entries(headers)
|
|
74
|
+
.filter((entry) => typeof entry[1] === 'string' || typeof entry[1] === 'number')
|
|
75
|
+
.map(([key, value]) => [key.toLowerCase(), String(value)]));
|
|
76
|
+
}
|
|
77
|
+
function numericHeader(headers, key) {
|
|
78
|
+
const value = Number(headers[key]);
|
|
79
|
+
return Number.isFinite(value) && value >= 0 ? value : undefined;
|
|
80
|
+
}
|
|
81
|
+
function utilizationForPair(headers, pair) {
|
|
82
|
+
const limit = numericHeader(headers, pair.limit);
|
|
83
|
+
const remaining = numericHeader(headers, pair.remaining);
|
|
84
|
+
if (limit === undefined || remaining === undefined || limit <= 0) {
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
return 1 - Math.min(limit, remaining) / limit;
|
|
88
|
+
}
|
|
89
|
+
function aiSdkHeaderRateLimitSignal(provider, result) {
|
|
90
|
+
const headers = normalizeHeaders(result.response?.headers);
|
|
91
|
+
let best;
|
|
92
|
+
for (const pair of RATE_LIMIT_HEADER_PAIRS) {
|
|
93
|
+
const utilization = utilizationForPair(headers, pair);
|
|
94
|
+
if (utilization === undefined) {
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (!best || utilization > best.utilization) {
|
|
98
|
+
best = { utilization, rateLimitType: pair.rateLimitType };
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
if (!best) {
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
provider,
|
|
106
|
+
status: 'allowed',
|
|
107
|
+
rateLimitType: best.rateLimitType,
|
|
108
|
+
utilization: Number(best.utilization.toFixed(4)),
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
function retryAfterMs(error) {
|
|
112
|
+
const value = error.retryAfter;
|
|
113
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
114
|
+
return value < 1_000 ? value * 1_000 : value;
|
|
115
|
+
}
|
|
116
|
+
return undefined;
|
|
117
|
+
}
|
|
118
|
+
function isAiSdkRateLimitError(error) {
|
|
119
|
+
const record = error;
|
|
120
|
+
return record.name === 'TooManyRequestsError' || record.statusCode === 429 || record.status === 429;
|
|
121
|
+
}
|
|
19
122
|
export class AiSdkKtxLlmRuntime {
|
|
20
123
|
deps;
|
|
21
124
|
logger;
|
|
@@ -23,6 +126,37 @@ export class AiSdkKtxLlmRuntime {
|
|
|
23
126
|
this.deps = deps;
|
|
24
127
|
this.logger = deps.logger ?? noopLogger;
|
|
25
128
|
}
|
|
129
|
+
async generateTextWithRateLimitRetry(provider, abortSignal, run) {
|
|
130
|
+
// maxRetryAttempts() returns 1 when no governor is present or pacing is
|
|
131
|
+
// disabled, so a 429 throws immediately instead of hammering the provider
|
|
132
|
+
// with no backoff; the AI SDK's own maxRetries still handles transient 429s.
|
|
133
|
+
const maxAttempts = this.deps.rateLimitGovernor?.maxRetryAttempts() ?? 1;
|
|
134
|
+
let attempt = 0;
|
|
135
|
+
while (true) {
|
|
136
|
+
await this.deps.rateLimitGovernor?.waitForReady(abortSignal);
|
|
137
|
+
try {
|
|
138
|
+
const result = await run();
|
|
139
|
+
const signal = aiSdkHeaderRateLimitSignal(provider, result);
|
|
140
|
+
if (signal) {
|
|
141
|
+
this.deps.rateLimitGovernor?.report(signal);
|
|
142
|
+
}
|
|
143
|
+
return result;
|
|
144
|
+
}
|
|
145
|
+
catch (error) {
|
|
146
|
+
if (isAbortError(error) || !isAiSdkRateLimitError(error) || attempt >= maxAttempts - 1) {
|
|
147
|
+
throw error;
|
|
148
|
+
}
|
|
149
|
+
attempt += 1;
|
|
150
|
+
const retryAfter = retryAfterMs(error);
|
|
151
|
+
this.deps.rateLimitGovernor?.report({
|
|
152
|
+
provider,
|
|
153
|
+
status: 'rejected',
|
|
154
|
+
rateLimitType: 'http_429',
|
|
155
|
+
...(retryAfter !== undefined ? { retryAfterMs: retryAfter } : {}),
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
26
160
|
async generateText(input) {
|
|
27
161
|
const model = this.deps.llmProvider.getModel(input.role);
|
|
28
162
|
if (model.provider === 'deterministic') {
|
|
@@ -37,12 +171,13 @@ export class AiSdkKtxLlmRuntime {
|
|
|
37
171
|
});
|
|
38
172
|
const split = splitKtxSystemMessages(built.messages);
|
|
39
173
|
const startedAt = Date.now();
|
|
40
|
-
const
|
|
174
|
+
const request = {
|
|
41
175
|
model,
|
|
42
176
|
temperature: input.temperature ?? 0,
|
|
43
177
|
...(split.system ? { system: split.system } : {}),
|
|
44
178
|
messages: split.messages,
|
|
45
179
|
tools: built.tools,
|
|
180
|
+
...(input.abortSignal ? { abortSignal: input.abortSignal } : {}),
|
|
46
181
|
...(hasTools(tools)
|
|
47
182
|
? {
|
|
48
183
|
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
|
@@ -50,7 +185,8 @@ export class AiSdkKtxLlmRuntime {
|
|
|
50
185
|
}),
|
|
51
186
|
}
|
|
52
187
|
: {}),
|
|
53
|
-
}
|
|
188
|
+
};
|
|
189
|
+
const result = await this.generateTextWithRateLimitRetry(modelProviderName(model), input.abortSignal, () => generateText(request));
|
|
54
190
|
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) });
|
|
55
191
|
if (typeof result.text !== 'string') {
|
|
56
192
|
throw new Error('KTX LLM text generation returned no text');
|
|
@@ -68,12 +204,13 @@ export class AiSdkKtxLlmRuntime {
|
|
|
68
204
|
});
|
|
69
205
|
const split = splitKtxSystemMessages(built.messages);
|
|
70
206
|
const startedAt = Date.now();
|
|
71
|
-
const
|
|
207
|
+
const request = {
|
|
72
208
|
model,
|
|
73
209
|
temperature: input.temperature ?? 0,
|
|
74
210
|
...(split.system ? { system: split.system } : {}),
|
|
75
211
|
messages: split.messages,
|
|
76
212
|
tools: built.tools,
|
|
213
|
+
...(input.abortSignal ? { abortSignal: input.abortSignal } : {}),
|
|
77
214
|
...(hasTools(tools)
|
|
78
215
|
? {
|
|
79
216
|
experimental_repairToolCall: this.deps.llmProvider.repairToolCallHandler({
|
|
@@ -82,7 +219,8 @@ export class AiSdkKtxLlmRuntime {
|
|
|
82
219
|
}
|
|
83
220
|
: {}),
|
|
84
221
|
output: Output.object({ schema: input.schema }),
|
|
85
|
-
}
|
|
222
|
+
};
|
|
223
|
+
const result = await this.generateTextWithRateLimitRetry(modelProviderName(model), input.abortSignal, () => generateText(request));
|
|
86
224
|
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: toLlmTokenUsage(result.totalUsage ?? result.usage) });
|
|
87
225
|
if (result.output == null) {
|
|
88
226
|
throw new Error('KTX LLM object generation returned no output');
|
|
@@ -114,7 +252,7 @@ export class AiSdkKtxLlmRuntime {
|
|
|
114
252
|
messages: built.messages,
|
|
115
253
|
tools: built.tools,
|
|
116
254
|
}));
|
|
117
|
-
const
|
|
255
|
+
const request = {
|
|
118
256
|
model,
|
|
119
257
|
temperature: 0,
|
|
120
258
|
stopWhen: stepCountIs(params.stepBudget),
|
|
@@ -125,20 +263,15 @@ export class AiSdkKtxLlmRuntime {
|
|
|
125
263
|
...(promptMessages.system ? { system: promptMessages.system } : {}),
|
|
126
264
|
messages: promptMessages.messages,
|
|
127
265
|
tools: built.tools,
|
|
128
|
-
|
|
266
|
+
...(params.abortSignal ? { abortSignal: params.abortSignal } : {}),
|
|
267
|
+
// Count model round-trips locally for metrics. `stepCountIs(stepBudget)`
|
|
268
|
+
// caps the loop, so this counter never exceeds the budget.
|
|
269
|
+
onStepFinish: () => {
|
|
129
270
|
stepIndex += 1;
|
|
130
271
|
stepBoundariesMs.push(Date.now() - startedAt);
|
|
131
|
-
if (!params.onStepFinish) {
|
|
132
|
-
return;
|
|
133
|
-
}
|
|
134
|
-
try {
|
|
135
|
-
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
|
136
|
-
}
|
|
137
|
-
catch (err) {
|
|
138
|
-
this.logger.warn(`[agent-runner] onStepFinish callback threw; ignoring: ${err instanceof Error ? err.message : String(err)}`);
|
|
139
|
-
}
|
|
140
272
|
},
|
|
141
|
-
}
|
|
273
|
+
};
|
|
274
|
+
const result = await this.generateTextWithRateLimitRetry(modelProviderName(model), params.abortSignal, () => generateText(request));
|
|
142
275
|
return {
|
|
143
276
|
stopReason: 'natural',
|
|
144
277
|
metrics: {
|
|
@@ -150,6 +283,9 @@ export class AiSdkKtxLlmRuntime {
|
|
|
150
283
|
};
|
|
151
284
|
}
|
|
152
285
|
catch (error) {
|
|
286
|
+
if (isAbortError(error)) {
|
|
287
|
+
throw error;
|
|
288
|
+
}
|
|
153
289
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
154
290
|
this.logger.warn(`[agent-runner] loop failed: ${err.message}`);
|
|
155
291
|
return {
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import { query as defaultQuery, type SDKMessage, type SDKResultMessage } from '@anthropic-ai/claude-agent-sdk';
|
|
2
2
|
import { z } from 'zod';
|
|
3
|
-
import {
|
|
3
|
+
import type { RateLimitGovernor } from './rate-limit-governor.js';
|
|
4
4
|
import type { KtxGenerateObjectInput, KtxGenerateTextInput, KtxLlmRuntimePort, RunLoopParams, RunLoopResult, RunLoopStopReason } from './runtime-port.js';
|
|
5
|
-
type
|
|
5
|
+
type QueryResult = AsyncIterable<SDKMessage> & {
|
|
6
|
+
interrupt?: () => void | Promise<void>;
|
|
7
|
+
};
|
|
8
|
+
type QueryFn = (params: Parameters<typeof defaultQuery>[0]) => QueryResult;
|
|
6
9
|
export interface ClaudeCodeKtxLlmRuntimeDeps {
|
|
7
10
|
projectDir: string;
|
|
8
11
|
modelSlots: {
|
|
@@ -10,14 +13,13 @@ export interface ClaudeCodeKtxLlmRuntimeDeps {
|
|
|
10
13
|
} & Partial<Record<string, string>>;
|
|
11
14
|
query?: QueryFn;
|
|
12
15
|
env?: NodeJS.ProcessEnv;
|
|
13
|
-
|
|
16
|
+
rateLimitGovernor?: Pick<RateLimitGovernor, 'waitForReady' | 'report' | 'maxRetryAttempts'>;
|
|
14
17
|
}
|
|
15
18
|
/** @internal */
|
|
16
19
|
export declare function mapClaudeCodeStopReason(result: SDKResultMessage): RunLoopStopReason;
|
|
17
20
|
export declare class ClaudeCodeKtxLlmRuntime implements KtxLlmRuntimePort {
|
|
18
21
|
private readonly deps;
|
|
19
22
|
private readonly runQuery;
|
|
20
|
-
private readonly logger;
|
|
21
23
|
constructor(deps: ClaudeCodeKtxLlmRuntimeDeps);
|
|
22
24
|
generateText(input: KtxGenerateTextInput): Promise<string>;
|
|
23
25
|
generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(input: KtxGenerateObjectInput<TOutput, TSchema>): Promise<TOutput>;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { createSdkMcpServer, query as defaultQuery, } from '@anthropic-ai/claude-agent-sdk';
|
|
2
2
|
import { z } from 'zod';
|
|
3
|
-
import {
|
|
3
|
+
import { createAbortError, isAbortError, throwIfAborted } from '../core/abort.js';
|
|
4
4
|
import { createKtxClaudeCodeEnv } from './claude-code-env.js';
|
|
5
5
|
import { resolveClaudeCodeModel } from './claude-code-models.js';
|
|
6
6
|
import { createClaudeSdkTools, mcpToolIds } from './runtime-tools.js';
|
|
@@ -41,21 +41,6 @@ const STRUCTURED_OUTPUT_TOOL_NAME = 'StructuredOutput';
|
|
|
41
41
|
function isResult(message) {
|
|
42
42
|
return message.type === 'result';
|
|
43
43
|
}
|
|
44
|
-
// Skip emissions the SDK does not count toward `num_turns`: `pause_turn` continuations and
|
|
45
|
-
// errored partials (e.g. `max_output_tokens`) it retries internally. Without this, the
|
|
46
|
-
// runtime's step counter outruns `maxTurns` and the HUD renders e.g. `step 69/40`.
|
|
47
|
-
function countsAsAssistantTurn(message) {
|
|
48
|
-
if (message.type !== 'assistant' || message.parent_tool_use_id !== null) {
|
|
49
|
-
return false;
|
|
50
|
-
}
|
|
51
|
-
if (message.error !== undefined) {
|
|
52
|
-
return false;
|
|
53
|
-
}
|
|
54
|
-
if (message.message.stop_reason === 'pause_turn') {
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
return true;
|
|
58
|
-
}
|
|
59
44
|
function resultError(result) {
|
|
60
45
|
if (result.subtype === 'success') {
|
|
61
46
|
return undefined;
|
|
@@ -104,6 +89,67 @@ function assertInitIsolation(message, allowedToolIds, expectedMcpServerNames) {
|
|
|
104
89
|
function expectedMcpServerNames(tools) {
|
|
105
90
|
return tools && Object.keys(tools).length > 0 ? new Set([KTX_MCP_SERVER_NAME]) : new Set();
|
|
106
91
|
}
|
|
92
|
+
const CLAUDE_RATE_LIMIT_ERROR_MARKERS = /\b429\b|rate limit|too many requests|quota exceeded|overloaded|max_retries/i;
|
|
93
|
+
function normalizeClaudeResetAtMs(value) {
|
|
94
|
+
if (typeof value === 'number' && Number.isFinite(value) && value > 0) {
|
|
95
|
+
return Math.round(value < 10_000_000_000 ? value * 1_000 : value);
|
|
96
|
+
}
|
|
97
|
+
if (typeof value === 'string') {
|
|
98
|
+
const numeric = Number(value);
|
|
99
|
+
if (Number.isFinite(numeric) && numeric > 0) {
|
|
100
|
+
return normalizeClaudeResetAtMs(numeric);
|
|
101
|
+
}
|
|
102
|
+
const parsed = Date.parse(value);
|
|
103
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
104
|
+
}
|
|
105
|
+
return undefined;
|
|
106
|
+
}
|
|
107
|
+
function isClaudeRateLimitResult(result, rejectedSignal) {
|
|
108
|
+
const error = resultError(result);
|
|
109
|
+
if (!error) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
if (rejectedSignal?.status === 'rejected') {
|
|
113
|
+
return true;
|
|
114
|
+
}
|
|
115
|
+
const resultDetails = result;
|
|
116
|
+
const details = [
|
|
117
|
+
error.message,
|
|
118
|
+
resultDetails.stop_reason,
|
|
119
|
+
resultDetails.terminal_reason,
|
|
120
|
+
...(resultDetails.errors ?? []),
|
|
121
|
+
]
|
|
122
|
+
.filter((value) => typeof value === 'string' && value.length > 0)
|
|
123
|
+
.join('\n');
|
|
124
|
+
return CLAUDE_RATE_LIMIT_ERROR_MARKERS.test(details);
|
|
125
|
+
}
|
|
126
|
+
function claudeRateLimitSignal(message) {
|
|
127
|
+
const record = message;
|
|
128
|
+
if (record.type === 'rate_limit_event') {
|
|
129
|
+
const info = record.rate_limit_info;
|
|
130
|
+
if (!info)
|
|
131
|
+
return null;
|
|
132
|
+
const rawStatus = typeof info.status === 'string' ? info.status : 'allowed';
|
|
133
|
+
const resetAtMs = normalizeClaudeResetAtMs(info.resetsAt);
|
|
134
|
+
return {
|
|
135
|
+
provider: 'claude-subscription',
|
|
136
|
+
status: rawStatus === 'rejected' ? 'rejected' : rawStatus === 'allowed_warning' ? 'warning' : 'allowed',
|
|
137
|
+
...(resetAtMs !== undefined ? { resetAtMs } : {}),
|
|
138
|
+
...(typeof info.rateLimitType === 'string' ? { rateLimitType: info.rateLimitType } : {}),
|
|
139
|
+
...(typeof info.utilization === 'number' ? { utilization: info.utilization } : {}),
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
if (record.subtype === 'api_retry' || record.type === 'api_retry') {
|
|
143
|
+
const retryDelayMs = typeof record.retry_delay_ms === 'number' ? record.retry_delay_ms : undefined;
|
|
144
|
+
return {
|
|
145
|
+
provider: 'claude-subscription',
|
|
146
|
+
status: 'warning',
|
|
147
|
+
...(retryDelayMs !== undefined ? { retryAfterMs: retryDelayMs } : {}),
|
|
148
|
+
rateLimitType: 'api_retry',
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
107
153
|
function managedMcpSettings(serverNames) {
|
|
108
154
|
return {
|
|
109
155
|
allowManagedMcpServersOnly: true,
|
|
@@ -150,28 +196,63 @@ function baseOptions(input) {
|
|
|
150
196
|
}
|
|
151
197
|
async function collectResult(params) {
|
|
152
198
|
let result;
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
199
|
+
let rejectedRateLimitSignal;
|
|
200
|
+
throwIfAborted(params.abortSignal);
|
|
201
|
+
await params.rateLimitGovernor?.waitForReady(params.abortSignal);
|
|
202
|
+
throwIfAborted(params.abortSignal);
|
|
203
|
+
const queryResult = params.query({ prompt: params.prompt, options: params.options });
|
|
204
|
+
const onAbort = () => {
|
|
205
|
+
void Promise.resolve(queryResult.interrupt?.()).catch(() => undefined);
|
|
206
|
+
};
|
|
207
|
+
params.abortSignal?.addEventListener('abort', onAbort, { once: true });
|
|
208
|
+
try {
|
|
209
|
+
for await (const message of queryResult) {
|
|
210
|
+
throwIfAborted(params.abortSignal);
|
|
211
|
+
const rateLimitSignal = claudeRateLimitSignal(message);
|
|
212
|
+
if (rateLimitSignal) {
|
|
213
|
+
if (rateLimitSignal.status === 'rejected') {
|
|
214
|
+
rejectedRateLimitSignal = rateLimitSignal;
|
|
215
|
+
}
|
|
216
|
+
params.rateLimitGovernor?.report(rateLimitSignal);
|
|
217
|
+
}
|
|
218
|
+
assertInitIsolation(message, params.allowedToolIds, params.expectedMcpServerNames);
|
|
219
|
+
if (isResult(message)) {
|
|
220
|
+
result = message;
|
|
221
|
+
}
|
|
160
222
|
}
|
|
161
223
|
}
|
|
224
|
+
finally {
|
|
225
|
+
params.abortSignal?.removeEventListener('abort', onAbort);
|
|
226
|
+
}
|
|
227
|
+
if (params.abortSignal?.aborted) {
|
|
228
|
+
throw createAbortError();
|
|
229
|
+
}
|
|
162
230
|
if (!result) {
|
|
163
231
|
throw new Error('Claude Code query returned no result message');
|
|
164
232
|
}
|
|
165
|
-
return
|
|
233
|
+
return {
|
|
234
|
+
result,
|
|
235
|
+
...(rejectedRateLimitSignal ? { rejectedRateLimitSignal } : {}),
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
async function collectResultWithRateLimitRetry(params) {
|
|
239
|
+
// maxRetryAttempts() returns 1 when no governor is present or pacing is
|
|
240
|
+
// disabled, so a rate-limited result surfaces without an extra query; the
|
|
241
|
+
// Claude Code SDK applies its own backoff for transient rejections.
|
|
242
|
+
const maxAttempts = params.rateLimitGovernor?.maxRetryAttempts() ?? 1;
|
|
243
|
+
for (let attempt = 0;; attempt += 1) {
|
|
244
|
+
const outcome = await collectResult(params);
|
|
245
|
+
if (!isClaudeRateLimitResult(outcome.result, outcome.rejectedRateLimitSignal) || attempt >= maxAttempts - 1) {
|
|
246
|
+
return outcome.result;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
166
249
|
}
|
|
167
250
|
export class ClaudeCodeKtxLlmRuntime {
|
|
168
251
|
deps;
|
|
169
252
|
runQuery;
|
|
170
|
-
logger;
|
|
171
253
|
constructor(deps) {
|
|
172
254
|
this.deps = deps;
|
|
173
255
|
this.runQuery = deps.query ?? defaultQuery;
|
|
174
|
-
this.logger = deps.logger ?? noopLogger;
|
|
175
256
|
}
|
|
176
257
|
async generateText(input) {
|
|
177
258
|
const options = baseOptions({
|
|
@@ -182,12 +263,14 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
182
263
|
tools: input.tools,
|
|
183
264
|
});
|
|
184
265
|
const startedAt = Date.now();
|
|
185
|
-
const result = await
|
|
266
|
+
const result = await collectResultWithRateLimitRetry({
|
|
186
267
|
query: this.runQuery,
|
|
187
268
|
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
|
188
269
|
options,
|
|
189
270
|
allowedToolIds: new Set(mcpToolIds(input.tools ?? {})),
|
|
190
271
|
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
|
272
|
+
rateLimitGovernor: this.deps.rateLimitGovernor,
|
|
273
|
+
abortSignal: input.abortSignal,
|
|
191
274
|
});
|
|
192
275
|
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
|
|
193
276
|
const error = resultError(result);
|
|
@@ -216,12 +299,14 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
216
299
|
outputFormat: { type: 'json_schema', schema: jsonSchema(input.schema) },
|
|
217
300
|
};
|
|
218
301
|
const startedAt = Date.now();
|
|
219
|
-
const result = await
|
|
302
|
+
const result = await collectResultWithRateLimitRetry({
|
|
220
303
|
query: this.runQuery,
|
|
221
304
|
prompt: [input.system, input.prompt].filter(Boolean).join('\n\n'),
|
|
222
305
|
options,
|
|
223
306
|
allowedToolIds: new Set([...mcpToolIds(input.tools ?? {}), STRUCTURED_OUTPUT_TOOL_NAME]),
|
|
224
307
|
expectedMcpServerNames: expectedMcpServerNames(input.tools),
|
|
308
|
+
rateLimitGovernor: this.deps.rateLimitGovernor,
|
|
309
|
+
abortSignal: input.abortSignal,
|
|
225
310
|
});
|
|
226
311
|
input.onMetrics?.({ totalMs: Date.now() - startedAt, usage: claudeTokenUsage(result) });
|
|
227
312
|
const error = resultError(result);
|
|
@@ -234,9 +319,7 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
234
319
|
return input.schema.parse(result.structured_output);
|
|
235
320
|
}
|
|
236
321
|
async runAgentLoop(params) {
|
|
237
|
-
let stepIndex = 0;
|
|
238
322
|
const startedAt = Date.now();
|
|
239
|
-
const stepBoundariesMs = [];
|
|
240
323
|
try {
|
|
241
324
|
const options = baseOptions({
|
|
242
325
|
projectDir: this.deps.projectDir,
|
|
@@ -245,25 +328,14 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
245
328
|
maxTurns: params.stepBudget,
|
|
246
329
|
tools: params.toolSet,
|
|
247
330
|
});
|
|
248
|
-
const result = await
|
|
331
|
+
const result = await collectResultWithRateLimitRetry({
|
|
249
332
|
query: this.runQuery,
|
|
250
333
|
prompt: params.userPrompt,
|
|
251
334
|
options: { ...options, systemPrompt: params.systemPrompt },
|
|
252
335
|
allowedToolIds: new Set(mcpToolIds(params.toolSet)),
|
|
253
336
|
expectedMcpServerNames: expectedMcpServerNames(params.toolSet),
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
stepBoundariesMs.push(Date.now() - startedAt);
|
|
257
|
-
if (!params.onStepFinish) {
|
|
258
|
-
return;
|
|
259
|
-
}
|
|
260
|
-
try {
|
|
261
|
-
await params.onStepFinish({ stepIndex, stepBudget: params.stepBudget });
|
|
262
|
-
}
|
|
263
|
-
catch (error) {
|
|
264
|
-
this.logger.warn(`[claude-code-runner] onStepFinish callback threw; ignoring: ${error instanceof Error ? error.message : String(error)}`);
|
|
265
|
-
}
|
|
266
|
-
},
|
|
337
|
+
rateLimitGovernor: this.deps.rateLimitGovernor,
|
|
338
|
+
abortSignal: params.abortSignal,
|
|
267
339
|
});
|
|
268
340
|
const stopReason = mapClaudeCodeStopReason(result);
|
|
269
341
|
const error = resultError(result);
|
|
@@ -272,18 +344,25 @@ export class ClaudeCodeKtxLlmRuntime {
|
|
|
272
344
|
...(stopReason === 'error' && error ? { error } : {}),
|
|
273
345
|
metrics: {
|
|
274
346
|
totalMs: Date.now() - startedAt,
|
|
275
|
-
|
|
276
|
-
|
|
347
|
+
// Authoritative turn count from the SDK result. The runtime no longer
|
|
348
|
+
// re-derives a per-turn counter: it could not match the SDK's `num_turns`
|
|
349
|
+
// and overshot `maxTurns` (the source of the misleading `step 70/40`).
|
|
350
|
+
// Per-step boundaries require that counter and are not consumed anywhere.
|
|
351
|
+
stepCount: result.num_turns,
|
|
352
|
+
stepBoundariesMs: [],
|
|
277
353
|
usage: claudeTokenUsage(result),
|
|
278
354
|
},
|
|
279
355
|
};
|
|
280
356
|
}
|
|
281
357
|
catch (error) {
|
|
358
|
+
if (isAbortError(error)) {
|
|
359
|
+
throw error;
|
|
360
|
+
}
|
|
282
361
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
283
362
|
return {
|
|
284
363
|
stopReason: 'error',
|
|
285
364
|
error: err,
|
|
286
|
-
metrics: { totalMs: Date.now() - startedAt, stepCount:
|
|
365
|
+
metrics: { totalMs: Date.now() - startedAt, stepCount: 0, stepBoundariesMs: [], usage: {} },
|
|
287
366
|
};
|
|
288
367
|
}
|
|
289
368
|
}
|
|
@@ -306,7 +385,7 @@ export async function runClaudeCodeAuthProbe(input) {
|
|
|
306
385
|
env: input.env,
|
|
307
386
|
maxTurns: 1,
|
|
308
387
|
});
|
|
309
|
-
const result = await
|
|
388
|
+
const result = await collectResultWithRateLimitRetry({
|
|
310
389
|
query: input.query ?? defaultQuery,
|
|
311
390
|
prompt: 'Reply with exactly: ok',
|
|
312
391
|
options,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { type KtxLogger } from '../core/config.js';
|
|
3
2
|
import { type CodexRuntimeMcpServerHandle } from './codex-mcp-runtime-server.js';
|
|
4
3
|
import { type CodexSdkRunner } from './codex-sdk-runner.js';
|
|
4
|
+
import type { RateLimitGovernor } from './rate-limit-governor.js';
|
|
5
5
|
import type { KtxGenerateObjectInput, KtxGenerateTextInput, KtxLlmRuntimePort, KtxRuntimeToolSet, RunLoopParams, RunLoopResult } from './runtime-port.js';
|
|
6
6
|
export interface CodexKtxLlmRuntimeDeps {
|
|
7
7
|
projectDir: string;
|
|
@@ -13,13 +13,13 @@ export interface CodexKtxLlmRuntimeDeps {
|
|
|
13
13
|
projectDir: string;
|
|
14
14
|
toolSet: KtxRuntimeToolSet;
|
|
15
15
|
}) => Promise<CodexRuntimeMcpServerHandle>;
|
|
16
|
-
|
|
16
|
+
rateLimitGovernor?: Pick<RateLimitGovernor, 'waitForReady' | 'report' | 'maxRetryAttempts'>;
|
|
17
17
|
}
|
|
18
18
|
export declare class CodexKtxLlmRuntime implements KtxLlmRuntimePort {
|
|
19
19
|
private readonly deps;
|
|
20
20
|
private readonly runner;
|
|
21
|
-
private readonly logger;
|
|
22
21
|
constructor(deps: CodexKtxLlmRuntimeDeps);
|
|
22
|
+
private runWithRateLimitRetry;
|
|
23
23
|
generateText(input: KtxGenerateTextInput): Promise<string>;
|
|
24
24
|
generateObject<TOutput, TSchema extends z.ZodType<TOutput>>(input: KtxGenerateObjectInput<TOutput, TSchema>): Promise<TOutput>;
|
|
25
25
|
runAgentLoop(params: RunLoopParams): Promise<RunLoopResult>;
|