@arvoretech/pi-kiro-provider 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +121 -0
  2. package/dist/bracket-tool-parser.d.ts +12 -0
  3. package/dist/bracket-tool-parser.d.ts.map +1 -0
  4. package/dist/bracket-tool-parser.js +78 -0
  5. package/dist/bracket-tool-parser.js.map +1 -0
  6. package/dist/debug.d.ts +3 -0
  7. package/dist/debug.d.ts.map +1 -0
  8. package/dist/debug.js +49 -0
  9. package/dist/debug.js.map +1 -0
  10. package/dist/event-parser.d.ts +44 -0
  11. package/dist/event-parser.d.ts.map +1 -0
  12. package/dist/event-parser.js +66 -0
  13. package/dist/event-parser.js.map +1 -0
  14. package/dist/history.d.ts +13 -0
  15. package/dist/history.d.ts.map +1 -0
  16. package/dist/history.js +121 -0
  17. package/dist/history.js.map +1 -0
  18. package/dist/index.d.ts +6 -0
  19. package/dist/index.d.ts.map +1 -0
  20. package/dist/index.js +44 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/kiro-cli.d.ts +32 -0
  23. package/dist/kiro-cli.d.ts.map +1 -0
  24. package/dist/kiro-cli.js +271 -0
  25. package/dist/kiro-cli.js.map +1 -0
  26. package/dist/kiro-ide.d.ts +13 -0
  27. package/dist/kiro-ide.d.ts.map +1 -0
  28. package/dist/kiro-ide.js +74 -0
  29. package/dist/kiro-ide.js.map +1 -0
  30. package/dist/login-ui.d.ts +18 -0
  31. package/dist/login-ui.d.ts.map +1 -0
  32. package/dist/login-ui.js +124 -0
  33. package/dist/login-ui.js.map +1 -0
  34. package/dist/login.d.ts +16 -0
  35. package/dist/login.d.ts.map +1 -0
  36. package/dist/login.js +217 -0
  37. package/dist/login.js.map +1 -0
  38. package/dist/models.d.ts +72 -0
  39. package/dist/models.d.ts.map +1 -0
  40. package/dist/models.js +461 -0
  41. package/dist/models.js.map +1 -0
  42. package/dist/oauth.d.ts +30 -0
  43. package/dist/oauth.d.ts.map +1 -0
  44. package/dist/oauth.js +226 -0
  45. package/dist/oauth.js.map +1 -0
  46. package/dist/retry.d.ts +21 -0
  47. package/dist/retry.d.ts.map +1 -0
  48. package/dist/retry.js +51 -0
  49. package/dist/retry.js.map +1 -0
  50. package/dist/stream.d.ts +5 -0
  51. package/dist/stream.d.ts.map +1 -0
  52. package/dist/stream.js +858 -0
  53. package/dist/stream.js.map +1 -0
  54. package/dist/thinking-parser.d.ts +24 -0
  55. package/dist/thinking-parser.d.ts.map +1 -0
  56. package/dist/thinking-parser.js +205 -0
  57. package/dist/thinking-parser.js.map +1 -0
  58. package/dist/tokenizer.d.ts +2 -0
  59. package/dist/tokenizer.d.ts.map +1 -0
  60. package/dist/tokenizer.js +16 -0
  61. package/dist/tokenizer.js.map +1 -0
  62. package/dist/transform.d.ts +63 -0
  63. package/dist/transform.d.ts.map +1 -0
  64. package/dist/transform.js +200 -0
  65. package/dist/transform.js.map +1 -0
  66. package/dist/truncation.d.ts +4 -0
  67. package/dist/truncation.d.ts.map +1 -0
  68. package/dist/truncation.js +13 -0
  69. package/dist/truncation.js.map +1 -0
  70. package/dist/usage.d.ts +90 -0
  71. package/dist/usage.d.ts.map +1 -0
  72. package/dist/usage.js +169 -0
  73. package/dist/usage.js.map +1 -0
  74. package/package.json +61 -0
package/dist/stream.js ADDED
@@ -0,0 +1,858 @@
1
+ // ABOUTME: Core streaming integration for Kiro API requests and responses.
2
+ // ABOUTME: Handles request building, retry logic, event parsing, and token counting.
3
+ import { appendFile, mkdir } from "node:fs/promises";
4
+ import { homedir } from "node:os";
5
+ import { join } from "node:path";
6
+ import * as PiAi from "@earendil-works/pi-ai";
7
+ import { UniversalEventStreamMarshaller } from "@smithy/core/event-streams";
8
+ import { parseBracketToolCalls } from "./bracket-tool-parser.js";
9
+ import { debugEnabled, debugLog } from "./debug.js";
10
+ import { parseKiroEvent } from "./event-parser.js";
11
+ import { addPlaceholderTools, HISTORY_LIMIT, HISTORY_LIMIT_CONTEXT_WINDOW, truncateHistory, } from "./history.js";
12
+ import { getKiroCliCredentials, getKiroCliCredentialsAllowExpired, refreshViaKiroCli, } from "./kiro-cli.js";
13
+ import { resolveKiroModel } from "./models.js";
14
+ import { capacityRetryConfig, exponentialBackoff, firstTokenTimeoutForModel, isCapacityError, isNonRetryableBodyError, isTooBigError, MAX_RETRY_DELAY, } from "./retry.js";
15
+ import { ThinkingTagParser } from "./thinking-parser.js";
16
+ import { countTokens } from "./tokenizer.js";
17
+ import { buildHistory, convertImagesToKiro, convertToolsToKiro, extractImages, getContentText, normalizeMessages, sanitizeSurrogates, TOOL_RESULT_LIMIT, truncate, } from "./transform.js";
18
+ import { TRUNCATION_NOTICE, wasPreviousResponseTruncated, } from "./truncation.js";
19
+ const CAPACITY_LOG_DIR = join(homedir(), ".pi", "logs");
20
+ const CAPACITY_LOG_FILE = join(CAPACITY_LOG_DIR, "capacity-retries.log");
21
+ const eventStreamMarshaller = new UniversalEventStreamMarshaller({
22
+ utf8Encoder: (input) => new TextDecoder().decode(input),
23
+ utf8Decoder: (input) => new TextEncoder().encode(input),
24
+ });
25
+ let capacityLogDirCreated = false;
26
+ function logCapacityEvent(message) {
27
+ // Fire-and-forget async logging to avoid blocking the event loop
28
+ (async () => {
29
+ try {
30
+ if (!capacityLogDirCreated) {
31
+ await mkdir(CAPACITY_LOG_DIR, { recursive: true });
32
+ capacityLogDirCreated = true;
33
+ }
34
+ await appendFile(CAPACITY_LOG_FILE, `${new Date().toISOString()} ${message}\n`);
35
+ }
36
+ catch {
37
+ // best-effort logging, don't break the provider
38
+ }
39
+ })();
40
+ }
41
+ /** Delay that rejects early if the abort signal fires. */
42
+ function abortableDelay(ms, signal) {
43
+ if (signal?.aborted)
44
+ return Promise.reject(signal.reason);
45
+ return new Promise((resolve, reject) => {
46
+ const timer = setTimeout(resolve, ms);
47
+ signal?.addEventListener("abort", () => {
48
+ clearTimeout(timer);
49
+ reject(signal.reason);
50
+ }, { once: true });
51
+ });
52
+ }
53
+ // --- profileArn resolution (cached per endpoint) ---
54
+ const profileArnCache = new Map();
55
+ const profileArnPending = new Set();
56
+ /** Reset profileArn cache — exported for tests. */
57
+ export function resetProfileArnCache(resolved = false) {
58
+ profileArnCache.clear();
59
+ profileArnPending.clear();
60
+ if (resolved)
61
+ profileArnPending.add("__all__");
62
+ }
63
+ async function resolveProfileArn(accessToken, endpoint) {
64
+ if (profileArnPending.has("__all__"))
65
+ return undefined;
66
+ if (profileArnCache.has(endpoint))
67
+ return profileArnCache.get(endpoint);
68
+ if (profileArnPending.has(endpoint))
69
+ return undefined;
70
+ try {
71
+ const ep = new URL(endpoint);
72
+ ep.pathname = ep.pathname.replace(/\/generateAssistantResponse\/?$/, "/");
73
+ ep.search = "";
74
+ ep.hash = "";
75
+ const r = await fetch(ep.toString(), {
76
+ method: "POST",
77
+ headers: {
78
+ "Content-Type": "application/x-amz-json-1.0",
79
+ Authorization: `Bearer ${accessToken}`,
80
+ "X-Amz-Target": "AmazonCodeWhispererService.ListAvailableProfiles",
81
+ },
82
+ body: "{}",
83
+ });
84
+ if (!r.ok) {
85
+ console.warn(`[pi-provider-kiro] Failed to resolve profileArn: ListAvailableProfiles returned ${r.status} ${r.statusText}. Will retry on the next request.`);
86
+ return undefined;
87
+ }
88
+ const j = (await r.json());
89
+ const arn = j.profiles?.find((p) => p.arn)?.arn;
90
+ if (!arn) {
91
+ debugLog("profileArn.empty", {
92
+ message: "ListAvailableProfiles returned no profile ARN; this is expected for some social-login tokens.",
93
+ });
94
+ return undefined;
95
+ }
96
+ profileArnCache.set(endpoint, arn);
97
+ return arn;
98
+ }
99
+ catch (error) {
100
+ console.warn(`[pi-provider-kiro] Failed to resolve profileArn: ${error instanceof Error ? error.message : String(error)}. Will retry on the next request.`);
101
+ return undefined;
102
+ }
103
+ }
104
+ function emitToolCall(state, output, stream) {
105
+ if (!state.input.trim()) {
106
+ // Kiro API omits the input payload when the model calls a tool with no
107
+ // arguments (e.g. mcp({})). Treat empty input as an empty object rather
108
+ // than skipping — these are valid zero-arg tool calls, not truncations.
109
+ state.input = "{}";
110
+ }
111
+ let args;
112
+ try {
113
+ args = JSON.parse(state.input);
114
+ }
115
+ catch (e) {
116
+ console.warn(`[pi-provider-kiro] Failed to parse tool input for "${state.name}" (toolUseId: ${state.toolUseId}): ${e instanceof Error ? e.message : String(e)}. Raw input (${state.input.length} chars): ${state.input.substring(0, 200)}`);
117
+ return false;
118
+ }
119
+ const contentIndex = output.content.length;
120
+ const toolCall = {
121
+ type: "toolCall",
122
+ id: state.toolUseId,
123
+ name: state.name,
124
+ arguments: args,
125
+ };
126
+ output.content.push(toolCall);
127
+ stream.push({ type: "toolcall_start", contentIndex, partial: output });
128
+ stream.push({
129
+ type: "toolcall_delta",
130
+ contentIndex,
131
+ delta: state.input,
132
+ partial: output,
133
+ });
134
+ stream.push({
135
+ type: "toolcall_end",
136
+ contentIndex,
137
+ toolCall,
138
+ partial: output,
139
+ });
140
+ return true;
141
+ }
142
+ export function streamKiro(model, context, options) {
143
+ // pi-ai's barrel re-exports the class as type-only before the runtime class re-export, so
144
+ // a named import of AssistantMessageEventStream resolves to a type. Read it from the
145
+ // namespace import to get the actual constructor. Replaces the removed
146
+ // createAssistantMessageEventStream() factory (gone in @oh-my-pi/pi-ai).
147
+ const StreamCtor = PiAi.AssistantMessageEventStream;
148
+ const stream = new StreamCtor();
149
+ (async () => {
150
+ const output = {
151
+ role: "assistant",
152
+ content: [],
153
+ api: model.api,
154
+ provider: model.provider,
155
+ model: model.id,
156
+ usage: {
157
+ input: 0,
158
+ output: 0,
159
+ cacheRead: 0,
160
+ cacheWrite: 0,
161
+ totalTokens: 0,
162
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
163
+ },
164
+ stopReason: "stop",
165
+ timestamp: Date.now(),
166
+ };
167
+ try {
168
+ let accessToken = options?.apiKey;
169
+ if (!accessToken)
170
+ throw new Error("Kiro credentials not set. Run /login kiro or install kiro-cli.");
171
+ const endpoint = model.baseUrl ||
172
+ "https://q.us-east-1.amazonaws.com/generateAssistantResponse";
173
+ const optionProfileArn = options?.credentials?.profileArn ||
174
+ options?.profileArn;
175
+ const cliCreds = getKiroCliCredentials() ?? getKiroCliCredentialsAllowExpired();
176
+ const cliProfileArn = cliCreds?.access === accessToken ? cliCreds.profileArn : undefined;
177
+ let profileArn = optionProfileArn ||
178
+ cliProfileArn ||
179
+ (await resolveProfileArn(accessToken, endpoint));
180
+ // Trigger dynamic models cache update in the background if empty or stale
181
+ const ep = new URL(endpoint);
182
+ const region = ep.hostname.split(".")[1] || "us-east-1";
183
+ const { isCacheStale, updateKiroModelsCache } = await import("./models.js");
184
+ if (!process.env.VITEST && isCacheStale(region)) {
185
+ updateKiroModelsCache(accessToken, region, profileArn).catch(() => { });
186
+ }
187
+ const kiroModelId = resolveKiroModel(model.id);
188
+ const thinkingEnabled = !!options?.reasoning || model.reasoning;
189
+ debugLog("request.init", {
190
+ endpoint,
191
+ model: model.id,
192
+ kiroModelId,
193
+ contextWindow: model.contextWindow,
194
+ thinkingEnabled,
195
+ reasoning: options?.reasoning,
196
+ messageCount: context.messages.length,
197
+ toolCount: context.tools?.length ?? 0,
198
+ hasSystemPrompt: !!context.systemPrompt,
199
+ profileArn,
200
+ sessionId: options?.sessionId,
201
+ });
202
+ let systemPrompt = context.systemPrompt ?? "";
203
+ if (thinkingEnabled) {
204
+ const budget = options?.reasoning === "xhigh"
205
+ ? 50000
206
+ : options?.reasoning === "high"
207
+ ? 30000
208
+ : options?.reasoning === "medium"
209
+ ? 20000
210
+ : 10000;
211
+ systemPrompt = `<thinking_mode>enabled</thinking_mode><max_thinking_length>${budget}</max_thinking_length>${systemPrompt ? `\n${systemPrompt}` : ""}`;
212
+ }
213
+ let retryCount = 0;
214
+ const maxRetries = 3;
215
+ const conversationId = options?.sessionId ?? crypto.randomUUID();
216
+ while (retryCount <= maxRetries) {
217
+ if (options?.signal?.aborted)
218
+ throw options.signal.reason;
219
+ const effectiveSystemPrompt = systemPrompt;
220
+ const normalized = normalizeMessages(context.messages);
221
+ const { history: rawHistory, systemPrepended, currentMsgStartIdx, } = buildHistory(normalized, kiroModelId, effectiveSystemPrompt);
222
+ // Scale history limit to model context window
223
+ // HISTORY_LIMIT (850K chars) is sized for 200K token models
224
+ const dynamicHistoryLimit = Math.floor((model.contextWindow / HISTORY_LIMIT_CONTEXT_WINDOW) * HISTORY_LIMIT);
225
+ const history = truncateHistory(rawHistory, dynamicHistoryLimit);
226
+ const toolResultLimit = TOOL_RESULT_LIMIT;
227
+ const currentMessages = normalized.slice(currentMsgStartIdx);
228
+ const firstMsg = currentMessages[0];
229
+ let currentContent = "";
230
+ const currentToolResults = [];
231
+ let currentImages;
232
+ if (firstMsg?.role === "assistant") {
233
+ const am = firstMsg;
234
+ let armContent = "";
235
+ const armToolUses = [];
236
+ if (Array.isArray(am.content))
237
+ for (const b of am.content) {
238
+ if (b.type === "text")
239
+ armContent += b.text;
240
+ else if (b.type === "thinking")
241
+ armContent = `<thinking>${b.thinking}</thinking>\n\n${armContent}`;
242
+ else if (b.type === "toolCall") {
243
+ const tc = b;
244
+ armToolUses.push({
245
+ name: tc.name,
246
+ toolUseId: tc.id,
247
+ input: typeof tc.arguments === "string"
248
+ ? JSON.parse(tc.arguments)
249
+ : tc.arguments,
250
+ });
251
+ }
252
+ }
253
+ if (armContent || armToolUses.length > 0) {
254
+ const lastEntryForArm = history[history.length - 1];
255
+ const prevArm = lastEntryForArm?.assistantResponseMessage;
256
+ if (history.length > 0 &&
257
+ !lastEntryForArm?.userInputMessage &&
258
+ prevArm) {
259
+ // Merge into previous assistant message to maintain alternation without synthetic padding
260
+ prevArm.content += `\n\n${armContent}`;
261
+ if (armToolUses.length > 0)
262
+ prevArm.toolUses = [
263
+ ...(prevArm.toolUses || []),
264
+ ...armToolUses,
265
+ ];
266
+ }
267
+ else {
268
+ history.push({
269
+ assistantResponseMessage: {
270
+ content: armContent,
271
+ ...(armToolUses.length > 0 ? { toolUses: armToolUses } : {}),
272
+ },
273
+ });
274
+ }
275
+ }
276
+ const toolResultImages = [];
277
+ for (let i = 1; i < currentMessages.length; i++) {
278
+ const m = currentMessages[i];
279
+ if (m.role === "toolResult") {
280
+ const trm = m;
281
+ currentToolResults.push({
282
+ content: [
283
+ { text: truncate(getContentText(m), toolResultLimit) },
284
+ ],
285
+ status: trm.isError ? "error" : "success",
286
+ toolUseId: trm.toolCallId,
287
+ });
288
+ if (Array.isArray(trm.content))
289
+ for (const c of trm.content)
290
+ if (c.type === "image")
291
+ toolResultImages.push(c);
292
+ }
293
+ }
294
+ if (toolResultImages.length > 0) {
295
+ const converted = convertImagesToKiro(toolResultImages);
296
+ currentImages = currentImages
297
+ ? [...currentImages, ...converted]
298
+ : converted;
299
+ }
300
+ currentContent =
301
+ currentToolResults.length > 0
302
+ ? "Tool results provided."
303
+ : "Please proceed with the task.";
304
+ }
305
+ else if (firstMsg?.role === "toolResult") {
306
+ const toolResultImages2 = [];
307
+ for (const m of currentMessages)
308
+ if (m.role === "toolResult") {
309
+ const trm = m;
310
+ currentToolResults.push({
311
+ content: [
312
+ { text: truncate(getContentText(m), toolResultLimit) },
313
+ ],
314
+ status: trm.isError ? "error" : "success",
315
+ toolUseId: trm.toolCallId,
316
+ });
317
+ if (Array.isArray(trm.content))
318
+ for (const c of trm.content)
319
+ if (c.type === "image")
320
+ toolResultImages2.push(c);
321
+ }
322
+ if (toolResultImages2.length > 0) {
323
+ const converted = convertImagesToKiro(toolResultImages2);
324
+ currentImages = currentImages
325
+ ? [...currentImages, ...converted]
326
+ : converted;
327
+ }
328
+ currentContent = "Tool results provided.";
329
+ }
330
+ else if (firstMsg?.role === "user") {
331
+ currentContent =
332
+ typeof firstMsg.content === "string"
333
+ ? firstMsg.content
334
+ : getContentText(firstMsg);
335
+ if (effectiveSystemPrompt && !systemPrepended)
336
+ currentContent = `${effectiveSystemPrompt}\n\n${currentContent}`;
337
+ }
338
+ // Prepend truncation notice if the previous assistant response was cut off
339
+ if (wasPreviousResponseTruncated(context.messages)) {
340
+ currentContent = `${TRUNCATION_NOTICE}\n\n${currentContent}`;
341
+ }
342
+ // Always synthesize placeholder specs for tool names referenced in
343
+ // history, even when context.tools is empty/undefined. Without this,
344
+ // an "advisor-style" call that inherits a tool-rich conversation but
345
+ // declares no current tools is rejected by Kiro as "Improperly formed
346
+ // request" because history references toolUses with no tool catalog.
347
+ let uimc;
348
+ const baseTools = context.tools?.length
349
+ ? convertToolsToKiro(context.tools)
350
+ : [];
351
+ const finalTools = history.length > 0
352
+ ? addPlaceholderTools(baseTools, history)
353
+ : baseTools;
354
+ if (currentToolResults.length > 0 || finalTools.length > 0) {
355
+ uimc = {};
356
+ if (currentToolResults.length > 0)
357
+ uimc.toolResults = currentToolResults;
358
+ if (finalTools.length > 0)
359
+ uimc.tools = finalTools;
360
+ }
361
+ if (firstMsg?.role === "user") {
362
+ const imgs = extractImages(firstMsg);
363
+ if (imgs.length > 0)
364
+ currentImages = convertImagesToKiro(imgs);
365
+ }
366
+ // kiro-cli does not enforce alternation — the API accepts
367
+ // non-alternating history. No synthetic padding needed.
368
+ const request = {
369
+ conversationState: {
370
+ chatTriggerType: "MANUAL",
371
+ agentTaskType: "vibe",
372
+ conversationId,
373
+ currentMessage: {
374
+ userInputMessage: {
375
+ content: sanitizeSurrogates(currentContent),
376
+ modelId: kiroModelId,
377
+ origin: "KIRO_CLI",
378
+ ...(currentImages ? { images: currentImages } : {}),
379
+ ...(uimc ? { userInputMessageContext: uimc } : {}),
380
+ },
381
+ },
382
+ ...(history.length > 0 ? { history } : {}),
383
+ },
384
+ ...(profileArn ? { profileArn } : {}),
385
+ agentMode: "vibe",
386
+ };
387
+ let response;
388
+ // Reset per outer iteration — each 403 retry gets a fresh capacity budget
389
+ let capacityRetryCount = 0;
390
+ // Inner loop: retry capacity errors without consuming outer retry budget
391
+ while (true) {
392
+ const mid = crypto.randomUUID().replace(/-/g, "");
393
+ const ua = `aws-sdk-rust/1.0.0 ua/2.1 os/other lang/rust api/codewhispererstreaming#1.28.3 m/E app/AmazonQ-For-CLI md/appVersion-1.28.3-${mid}`;
394
+ debugLog("request.send", {
395
+ attempt: retryCount,
396
+ capacityAttempt: capacityRetryCount,
397
+ historyLen: history.length,
398
+ currentContentLen: currentContent.length,
399
+ hasImages: !!currentImages,
400
+ toolResultCount: currentToolResults.length,
401
+ request,
402
+ });
403
+ response = await fetch(endpoint, {
404
+ method: "POST",
405
+ headers: {
406
+ "Content-Type": "application/x-amz-json-1.0",
407
+ Accept: "application/json",
408
+ Authorization: `Bearer ${accessToken}`,
409
+ "X-Amz-Target": "AmazonCodeWhispererStreamingService.GenerateAssistantResponse",
410
+ "x-amzn-codewhisperer-optout": "true",
411
+ "amz-sdk-invocation-id": crypto.randomUUID(),
412
+ "amz-sdk-request": "attempt=1; max=1",
413
+ "x-amzn-kiro-agent-mode": "vibe",
414
+ "x-amz-user-agent": ua,
415
+ "user-agent": ua,
416
+ },
417
+ body: JSON.stringify(request),
418
+ signal: options?.signal,
419
+ });
420
+ if (!response.ok) {
421
+ let errText = "";
422
+ try {
423
+ errText = await response.text();
424
+ }
425
+ catch {
426
+ errText = "";
427
+ }
428
+ debugLog("response.error", {
429
+ status: response.status,
430
+ statusText: response.statusText,
431
+ body: errText,
432
+ });
433
+ // Retry transient capacity errors with longer backoff
434
+ if (isCapacityError(errText) &&
435
+ capacityRetryCount < capacityRetryConfig.maxRetries) {
436
+ capacityRetryCount++;
437
+ const delayMs = exponentialBackoff(capacityRetryCount - 1, capacityRetryConfig.baseDelayMs, 30_000);
438
+ const msg = `INSUFFICIENT_MODEL_CAPACITY — retrying in ${delayMs}ms (${capacityRetryCount}/${capacityRetryConfig.maxRetries})`;
439
+ console.error(`[pi-provider-kiro] ${msg}`);
440
+ logCapacityEvent(msg);
441
+ await abortableDelay(delayMs, options?.signal);
442
+ continue;
443
+ }
444
+ if (isCapacityError(errText)) {
445
+ logCapacityEvent(`INSUFFICIENT_MODEL_CAPACITY — exhausted ${capacityRetryConfig.maxRetries} retries, giving up`);
446
+ }
447
+ if (response.status === 403 &&
448
+ !isCapacityError(errText) &&
449
+ retryCount < maxRetries) {
450
+ retryCount++;
451
+ // On 403, try to get a fresh token before retrying — the current
452
+ // one may have been rotated by kiro-cli or another session. If
453
+ // the cached kiro-cli token is also stale, actively refresh it.
454
+ const freshCreds = getKiroCliCredentials() ?? refreshViaKiroCli();
455
+ if (freshCreds?.access)
456
+ accessToken = freshCreds.access;
457
+ // Re-resolve profileArn with fresh credentials
458
+ profileArnCache.delete(endpoint);
459
+ const refreshedProfileArn = options?.credentials?.profileArn ||
460
+ options?.profileArn ||
461
+ freshCreds?.profileArn;
462
+ profileArn =
463
+ refreshedProfileArn ||
464
+ (await resolveProfileArn(accessToken, endpoint));
465
+ const delayMs = exponentialBackoff(retryCount - 1, 500, MAX_RETRY_DELAY);
466
+ await abortableDelay(delayMs, options?.signal);
467
+ break; // break inner loop, continue outer loop
468
+ }
469
+ // Avoid pi-coding-agent's outer auto-retry from treating known
470
+ // Kiro quota/capacity body markers as generic retryable 429s.
471
+ // This covers both hard quota (MONTHLY_REQUEST_COUNT) and
472
+ // exhausted capacity retries (INSUFFICIENT_MODEL_CAPACITY).
473
+ if (isNonRetryableBodyError(errText) || isCapacityError(errText)) {
474
+ throw new Error(`Kiro API error: ${errText || response.statusText}`);
475
+ }
476
+ // Format error so pi-ai's isContextOverflow() recognizes it
477
+ if (isTooBigError(response.status, errText)) {
478
+ throw new Error(`Kiro API error: context_length_exceeded (${response.status} ${errText})`);
479
+ }
480
+ throw new Error(`Kiro API error: ${response.status} ${response.statusText} ${errText}`);
481
+ }
482
+ break; // success, break inner loop
483
+ }
484
+ if (capacityRetryCount > 0 && response.ok) {
485
+ logCapacityEvent(`INSUFFICIENT_MODEL_CAPACITY — succeeded after ${capacityRetryCount} retries`);
486
+ }
487
+ // 403 retry: continue outer loop
488
+ if (!response.ok)
489
+ continue;
490
+ stream.push({ type: "start", partial: output });
491
+ if (!response.body)
492
+ throw new Error("No response body");
493
+ const bodyReader = response.body.getReader();
494
+ let totalContent = "";
495
+ let lastContentData = "";
496
+ let usageEvent = null;
497
+ let receivedContextUsage = false;
498
+ const thinkingParser = thinkingEnabled
499
+ ? new ThinkingTagParser(output, stream)
500
+ : null;
501
+ let textBlockIndex = null;
502
+ let emittedToolCalls = 0;
503
+ let sawAnyToolCalls = false;
504
+ let currentToolCall = null;
505
+ const flushToolCall = () => {
506
+ if (!currentToolCall)
507
+ return;
508
+ if (emitToolCall(currentToolCall, output, stream))
509
+ emittedToolCalls++;
510
+ currentToolCall = null;
511
+ };
512
+ const IDLE_TIMEOUT = 300_000;
513
+ let idleTimer = null;
514
+ let idleCancelled = false;
515
+ const resetIdle = () => {
516
+ if (idleTimer)
517
+ clearTimeout(idleTimer);
518
+ idleTimer = setTimeout(() => {
519
+ idleCancelled = true;
520
+ void bodyReader.cancel().catch(() => { });
521
+ }, IDLE_TIMEOUT);
522
+ };
523
+ let gotFirstToken = false;
524
+ let firstTokenTimedOut = false;
525
+ let streamError = null;
526
+ const FIRST_TOKEN_SENTINEL = Symbol("firstTokenTimeout");
527
+ // Smithy EventStreamMarshaller handles: chunk reassembly, CRC validation,
528
+ // protocol error/exception detection, and payload deserialization.
529
+ const bodyIterable = {
530
+ async *[Symbol.asyncIterator]() {
531
+ try {
532
+ while (true) {
533
+ const { done, value } = await bodyReader.read();
534
+ if (done)
535
+ return;
536
+ yield value;
537
+ }
538
+ }
539
+ finally {
540
+ bodyReader.releaseLock();
541
+ }
542
+ },
543
+ };
544
+ const utf8Decoder = new TextDecoder();
545
+ const eventStream = eventStreamMarshaller.deserialize(bodyIterable, async (event) => {
546
+ const key = Object.keys(event)[0];
547
+ const msg = event[key];
548
+ const parsed = JSON.parse(utf8Decoder.decode(msg.body));
549
+ return { [key]: parsed };
550
+ });
551
+ const iterator = eventStream[Symbol.asyncIterator]();
552
+ while (true) {
553
+ let iterResult;
554
+ try {
555
+ if (!gotFirstToken) {
556
+ const readPromise = iterator.next();
557
+ const result = await Promise.race([
558
+ readPromise,
559
+ new Promise((resolve) => setTimeout(() => resolve(FIRST_TOKEN_SENTINEL), firstTokenTimeoutForModel(model.id))),
560
+ ]);
561
+ if (result === FIRST_TOKEN_SENTINEL) {
562
+ readPromise.catch(() => { }); // suppress dangling rejection
563
+ void bodyReader.cancel().catch(() => { });
564
+ firstTokenTimedOut = true;
565
+ break;
566
+ }
567
+ iterResult = result;
568
+ gotFirstToken = true;
569
+ resetIdle();
570
+ }
571
+ else {
572
+ iterResult = await iterator.next();
573
+ }
574
+ }
575
+ catch (e) {
576
+ // Smithy throws on :message-type error/exception headers
577
+ streamError =
578
+ e instanceof Error
579
+ ? e.message
580
+ : (typeof e === "object" && e !== null
581
+ ? JSON.stringify(e)
582
+ : String(e)) || "Unknown stream error";
583
+ break;
584
+ }
585
+ const { done, value } = iterResult;
586
+ if (done)
587
+ break;
588
+ resetIdle();
589
+ const eventPayload = Object.values(value)[0];
590
+ const event = parseKiroEvent(eventPayload);
591
+ if (!event)
592
+ continue;
593
+ if (debugEnabled())
594
+ debugLog("stream.events", [event]);
595
+ switch (event.type) {
596
+ case "contextUsage": {
597
+ const pct = event.data.contextUsagePercentage;
598
+ output.usage.input = Math.round((pct / 100) * model.contextWindow);
599
+ output.usage.contextPercent = pct;
600
+ receivedContextUsage = true;
601
+ break;
602
+ }
603
+ case "content": {
604
+ if (event.data === lastContentData)
605
+ continue;
606
+ lastContentData = event.data;
607
+ totalContent += event.data;
608
+ if (thinkingParser) {
609
+ thinkingParser.processChunk(event.data);
610
+ }
611
+ else {
612
+ if (textBlockIndex === null) {
613
+ textBlockIndex = output.content.length;
614
+ output.content.push({ type: "text", text: "" });
615
+ stream.push({
616
+ type: "text_start",
617
+ contentIndex: textBlockIndex,
618
+ partial: output,
619
+ });
620
+ }
621
+ output.content[textBlockIndex].text +=
622
+ event.data;
623
+ stream.push({
624
+ type: "text_delta",
625
+ contentIndex: textBlockIndex,
626
+ delta: event.data,
627
+ partial: output,
628
+ });
629
+ }
630
+ break;
631
+ }
632
+ case "toolUse": {
633
+ const tc = event.data;
634
+ sawAnyToolCalls = true;
635
+ if (!currentToolCall ||
636
+ currentToolCall.toolUseId !== tc.toolUseId) {
637
+ flushToolCall();
638
+ currentToolCall = {
639
+ toolUseId: tc.toolUseId,
640
+ name: tc.name,
641
+ input: "",
642
+ };
643
+ }
644
+ currentToolCall.input += tc.input || "";
645
+ if (tc.input)
646
+ totalContent += tc.input;
647
+ if (tc.stop)
648
+ flushToolCall();
649
+ break;
650
+ }
651
+ case "toolUseInput": {
652
+ if (currentToolCall)
653
+ currentToolCall.input += event.data.input || "";
654
+ if (event.data.input)
655
+ totalContent += event.data.input;
656
+ break;
657
+ }
658
+ case "toolUseStop": {
659
+ if (event.data.stop)
660
+ flushToolCall();
661
+ break;
662
+ }
663
+ case "usage": {
664
+ usageEvent = event.data;
665
+ break;
666
+ }
667
+ case "error": {
668
+ const errMsg = event.data.message
669
+ ? `${event.data.error}: ${event.data.message}`
670
+ : event.data.error;
671
+ streamError = errMsg;
672
+ void bodyReader.cancel().catch(() => { });
673
+ break;
674
+ }
675
+ // followupPrompt events are intentionally ignored
676
+ }
677
+ if (streamError)
678
+ break;
679
+ }
680
+ if (idleTimer)
681
+ clearTimeout(idleTimer);
682
+ if (firstTokenTimedOut || idleCancelled || streamError) {
683
+ // Timed out or received error mid-stream: retry with backoff
684
+ if (retryCount < maxRetries) {
685
+ retryCount++;
686
+ const delayMs = exponentialBackoff(retryCount - 1, 1000, MAX_RETRY_DELAY);
687
+ await abortableDelay(delayMs, options?.signal);
688
+ continue;
689
+ }
690
+ if (streamError) {
691
+ throw new Error(`Kiro API stream error after max retries: ${streamError}`);
692
+ }
693
+ throw new Error(`Kiro API error: ${firstTokenTimedOut ? "first token" : "idle"} timeout after max retries`);
694
+ }
695
+ if (currentToolCall && emitToolCall(currentToolCall, output, stream)) {
696
+ emittedToolCalls++;
697
+ }
698
+ if (thinkingParser) {
699
+ thinkingParser.finalize();
700
+ textBlockIndex = thinkingParser.getTextBlockIndex();
701
+ }
702
+ // Fallback: extract bracket-style tool calls from content if no native tool calls
703
+ if (!sawAnyToolCalls && textBlockIndex !== null) {
704
+ const textBlock = output.content[textBlockIndex];
705
+ const bracketResult = parseBracketToolCalls(textBlock.text);
706
+ if (bracketResult.toolCalls.length > 0) {
707
+ sawAnyToolCalls = true;
708
+ textBlock.text = bracketResult.cleanedText;
709
+ for (const btc of bracketResult.toolCalls) {
710
+ if (emitToolCall({
711
+ toolUseId: btc.toolUseId,
712
+ name: btc.name,
713
+ input: JSON.stringify(btc.arguments),
714
+ }, output, stream)) {
715
+ emittedToolCalls++;
716
+ }
717
+ }
718
+ }
719
+ }
720
+ // Strip echo noise: when tool calls are present and the text content
721
+ // is just "." or similar short echo from history padding, remove it.
722
+ // This prevents the echo from accumulating in conversation history
723
+ // and reinforcing the pattern in future turns.
724
+ if (emittedToolCalls > 0 && textBlockIndex !== null) {
725
+ const textBlock = output.content[textBlockIndex];
726
+ if (/^\s*(\.+|continue)\s*$/i.test(textBlock.text)) {
727
+ textBlock.text = "";
728
+ }
729
+ }
730
+ if (textBlockIndex !== null)
731
+ stream.push({
732
+ type: "text_end",
733
+ contentIndex: textBlockIndex,
734
+ content: output.content[textBlockIndex].text,
735
+ partial: output,
736
+ });
737
+ // The Kiro streaming API does not reliably emit per-response output
738
+ // token counts (unlike Anthropic's `output_tokens` or Bedrock's
739
+ // `usage.outputTokens`). When the `usage` event is missing or only
740
+ // reports `inputTokens`, fall back to a tiktoken estimate over
741
+ // everything the assistant emitted — text plus tool-call input JSON
742
+ // (accumulated into `totalContent` above). Otherwise tool-call-only
743
+ // turns report 0 output tokens and break consumers like the TPS
744
+ // extension that watch `usage.output`.
745
+ if (usageEvent?.inputTokens !== undefined)
746
+ output.usage.input = usageEvent.inputTokens;
747
+ output.usage.output =
748
+ usageEvent?.outputTokens ?? countTokens(totalContent);
749
+ output.usage.totalTokens = output.usage.input + output.usage.output;
750
+ try {
751
+ PiAi.calculateCost(model, output.usage);
752
+ }
753
+ catch {
754
+ // Model might not have cost info, use zeros
755
+ output.usage.cost = {
756
+ input: 0,
757
+ output: 0,
758
+ cacheRead: 0,
759
+ cacheWrite: 0,
760
+ total: 0,
761
+ };
762
+ }
763
+ // Detect degenerate responses: the API returned 200 but produced no
764
+ // usable content at all — no text and no tool calls (not even broken
765
+ // ones). This happens when the stream is truncated early or the API
766
+ // returns only a contextUsage event. Retry with backoff.
767
+ //
768
+ // Also detect "Continue" echo loops: the model's entire response is
769
+ // just "continue" (case-insensitive) with no tool calls. This happens
770
+ // when synthetic history padding teaches the model to echo "Continue"
771
+ // as a valid response, causing an infinite loop where pi sends
772
+ // "continue" back and the model echoes it again.
773
+ //
774
+ // When tool calls *were* present but all got dropped (empty/unparseable
775
+ // input), don't retry — the API did respond, it just sent malformed
776
+ // tool calls. Retrying would likely produce the same result. The
777
+ // stopReason fix below prevents the agent loop stall.
778
+ const hasText = textBlockIndex !== null &&
779
+ output.content[textBlockIndex].text.length > 0;
780
+ const responseText = hasText
781
+ ? output.content[textBlockIndex].text
782
+ : "";
783
+ const isEchoLoop = hasText &&
784
+ !sawAnyToolCalls &&
785
+ /^\s*(continue|\.+)\s*$/i.test(responseText);
786
+ if ((!hasText && !sawAnyToolCalls) || isEchoLoop) {
787
+ if (retryCount < maxRetries) {
788
+ retryCount++;
789
+ const delayMs = exponentialBackoff(retryCount - 1, 1000, MAX_RETRY_DELAY);
790
+ console.warn(`[pi-provider-kiro] ${isEchoLoop ? 'Echo loop detected (model responded with just "Continue")' : "Empty response (no text, no tool calls)"} — retrying (${retryCount}/${maxRetries})`);
791
+ // Reset output content for the retry
792
+ output.content = [];
793
+ textBlockIndex = null;
794
+ await abortableDelay(delayMs, options?.signal);
795
+ continue;
796
+ }
797
+ if (isEchoLoop) {
798
+ // After max retries, strip the echo text to prevent the agent
799
+ // loop from interpreting "Continue" as a continuation signal.
800
+ output.content[textBlockIndex].text = "";
801
+ console.warn(`[pi-provider-kiro] Echo loop persisted after ${maxRetries} retries — stripping "Continue" response`);
802
+ }
803
+ else {
804
+ console.warn(`[pi-provider-kiro] Empty response after ${maxRetries} retries — returning stopReason:"stop" to avoid agent loop stall`);
805
+ }
806
+ }
807
+ // Use emittedToolCalls (not toolCalls.length) to avoid stopReason:"toolUse"
808
+ // when all tool calls were skipped due to empty/unparseable input — that
809
+ // combination (empty content + toolUse stop) causes pi's agent loop to
810
+ // stall waiting for tool results that will never arrive.
811
+ if (!receivedContextUsage && emittedToolCalls === 0) {
812
+ output.stopReason = "length";
813
+ }
814
+ else {
815
+ output.stopReason = emittedToolCalls > 0 ? "toolUse" : "stop";
816
+ }
817
+ stream.push({
818
+ type: "done",
819
+ reason: output.stopReason,
820
+ message: output,
821
+ });
822
+ debugLog("response.done", {
823
+ stopReason: output.stopReason,
824
+ emittedToolCalls,
825
+ sawAnyToolCalls,
826
+ textLen: textBlockIndex !== null
827
+ ? output.content[textBlockIndex].text.length
828
+ : 0,
829
+ usage: output.usage,
830
+ content: output.content,
831
+ });
832
+ stream.end();
833
+ break;
834
+ }
835
+ }
836
+ catch (error) {
837
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
838
+ output.errorMessage =
839
+ error instanceof Error ? error.message : String(error);
840
+ debugLog("response.caught", {
841
+ stopReason: output.stopReason,
842
+ error: output.errorMessage,
843
+ });
844
+ stream.push({ type: "error", reason: output.stopReason, error: output });
845
+ stream.end();
846
+ }
847
+ })().catch(() => {
848
+ // Safety net: catch any rejection that escapes the inner try/catch
849
+ // (e.g., AbortError during signal teardown). Without this, the
850
+ // fire-and-forget IIFE produces an unhandled rejection that crashes pi.
851
+ try {
852
+ stream.end();
853
+ }
854
+ catch { }
855
+ });
856
+ return stream;
857
+ }
858
+ //# sourceMappingURL=stream.js.map