npm - @arvoretech/pi-kiro-provider - Versions diffs - 0.8.1 - Mend

@arvoretech/pi-kiro-provider 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/README.md +121 -0
package/dist/bracket-tool-parser.d.ts +12 -0
package/dist/bracket-tool-parser.d.ts.map +1 -0
package/dist/bracket-tool-parser.js +78 -0
package/dist/bracket-tool-parser.js.map +1 -0
package/dist/debug.d.ts +3 -0
package/dist/debug.d.ts.map +1 -0
package/dist/debug.js +49 -0
package/dist/debug.js.map +1 -0
package/dist/event-parser.d.ts +44 -0
package/dist/event-parser.d.ts.map +1 -0
package/dist/event-parser.js +66 -0
package/dist/event-parser.js.map +1 -0
package/dist/history.d.ts +13 -0
package/dist/history.d.ts.map +1 -0
package/dist/history.js +121 -0
package/dist/history.js.map +1 -0
package/dist/index.d.ts +6 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +44 -0
package/dist/index.js.map +1 -0
package/dist/kiro-cli.d.ts +32 -0
package/dist/kiro-cli.d.ts.map +1 -0
package/dist/kiro-cli.js +271 -0
package/dist/kiro-cli.js.map +1 -0
package/dist/kiro-ide.d.ts +13 -0
package/dist/kiro-ide.d.ts.map +1 -0
package/dist/kiro-ide.js +74 -0
package/dist/kiro-ide.js.map +1 -0
package/dist/login-ui.d.ts +18 -0
package/dist/login-ui.d.ts.map +1 -0
package/dist/login-ui.js +124 -0
package/dist/login-ui.js.map +1 -0
package/dist/login.d.ts +16 -0
package/dist/login.d.ts.map +1 -0
package/dist/login.js +217 -0
package/dist/login.js.map +1 -0
package/dist/models.d.ts +72 -0
package/dist/models.d.ts.map +1 -0
package/dist/models.js +461 -0
package/dist/models.js.map +1 -0
package/dist/oauth.d.ts +30 -0
package/dist/oauth.d.ts.map +1 -0
package/dist/oauth.js +226 -0
package/dist/oauth.js.map +1 -0
package/dist/retry.d.ts +21 -0
package/dist/retry.d.ts.map +1 -0
package/dist/retry.js +51 -0
package/dist/retry.js.map +1 -0
package/dist/stream.d.ts +5 -0
package/dist/stream.d.ts.map +1 -0
package/dist/stream.js +858 -0
package/dist/stream.js.map +1 -0
package/dist/thinking-parser.d.ts +24 -0
package/dist/thinking-parser.d.ts.map +1 -0
package/dist/thinking-parser.js +205 -0
package/dist/thinking-parser.js.map +1 -0
package/dist/tokenizer.d.ts +2 -0
package/dist/tokenizer.d.ts.map +1 -0
package/dist/tokenizer.js +16 -0
package/dist/tokenizer.js.map +1 -0
package/dist/transform.d.ts +63 -0
package/dist/transform.d.ts.map +1 -0
package/dist/transform.js +200 -0
package/dist/transform.js.map +1 -0
package/dist/truncation.d.ts +4 -0
package/dist/truncation.d.ts.map +1 -0
package/dist/truncation.js +13 -0
package/dist/truncation.js.map +1 -0
package/dist/usage.d.ts +90 -0
package/dist/usage.d.ts.map +1 -0
package/dist/usage.js +169 -0
package/dist/usage.js.map +1 -0
package/package.json +61 -0

package/dist/stream.js ADDED Viewed

@@ -0,0 +1,858 @@
+// ABOUTME: Core streaming integration for Kiro API requests and responses.
+// ABOUTME: Handles request building, retry logic, event parsing, and token counting.
+import { appendFile, mkdir } from "node:fs/promises";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import * as PiAi from "@earendil-works/pi-ai";
+import { UniversalEventStreamMarshaller } from "@smithy/core/event-streams";
+import { parseBracketToolCalls } from "./bracket-tool-parser.js";
+import { debugEnabled, debugLog } from "./debug.js";
+import { parseKiroEvent } from "./event-parser.js";
+import { addPlaceholderTools, HISTORY_LIMIT, HISTORY_LIMIT_CONTEXT_WINDOW, truncateHistory, } from "./history.js";
+import { getKiroCliCredentials, getKiroCliCredentialsAllowExpired, refreshViaKiroCli, } from "./kiro-cli.js";
+import { resolveKiroModel } from "./models.js";
+import { capacityRetryConfig, exponentialBackoff, firstTokenTimeoutForModel, isCapacityError, isNonRetryableBodyError, isTooBigError, MAX_RETRY_DELAY, } from "./retry.js";
+import { ThinkingTagParser } from "./thinking-parser.js";
+import { countTokens } from "./tokenizer.js";
+import { buildHistory, convertImagesToKiro, convertToolsToKiro, extractImages, getContentText, normalizeMessages, sanitizeSurrogates, TOOL_RESULT_LIMIT, truncate, } from "./transform.js";
+import { TRUNCATION_NOTICE, wasPreviousResponseTruncated, } from "./truncation.js";
+const CAPACITY_LOG_DIR = join(homedir(), ".pi", "logs");
+const CAPACITY_LOG_FILE = join(CAPACITY_LOG_DIR, "capacity-retries.log");
+const eventStreamMarshaller = new UniversalEventStreamMarshaller({
+    utf8Encoder: (input) => new TextDecoder().decode(input),
+    utf8Decoder: (input) => new TextEncoder().encode(input),
+});
+let capacityLogDirCreated = false;
+function logCapacityEvent(message) {
+    // Fire-and-forget async logging to avoid blocking the event loop
+    (async () => {
+        try {
+            if (!capacityLogDirCreated) {
+                await mkdir(CAPACITY_LOG_DIR, { recursive: true });
+                capacityLogDirCreated = true;
+            }
+            await appendFile(CAPACITY_LOG_FILE, `${new Date().toISOString()} ${message}\n`);
+        }
+        catch {
+            // best-effort logging, don't break the provider
+        }
+    })();
+}
+/** Delay that rejects early if the abort signal fires. */
+function abortableDelay(ms, signal) {
+    if (signal?.aborted)
+        return Promise.reject(signal.reason);
+    return new Promise((resolve, reject) => {
+        const timer = setTimeout(resolve, ms);
+        signal?.addEventListener("abort", () => {
+            clearTimeout(timer);
+            reject(signal.reason);
+        }, { once: true });
+    });
+}
+// --- profileArn resolution (cached per endpoint) ---
+const profileArnCache = new Map();
+const profileArnPending = new Set();
+/** Reset profileArn cache — exported for tests. */
+export function resetProfileArnCache(resolved = false) {
+    profileArnCache.clear();
+    profileArnPending.clear();
+    if (resolved)
+        profileArnPending.add("__all__");
+}
+async function resolveProfileArn(accessToken, endpoint) {
+    if (profileArnPending.has("__all__"))
+        return undefined;
+    if (profileArnCache.has(endpoint))
+        return profileArnCache.get(endpoint);
+    if (profileArnPending.has(endpoint))
+        return undefined;
+    try {
+        const ep = new URL(endpoint);
+        ep.pathname = ep.pathname.replace(/\/generateAssistantResponse\/?$/, "/");
+        ep.search = "";
+        ep.hash = "";
+        const r = await fetch(ep.toString(), {
+            method: "POST",
+            headers: {
+                "Content-Type": "application/x-amz-json-1.0",
+                Authorization: `Bearer ${accessToken}`,
+                "X-Amz-Target": "AmazonCodeWhispererService.ListAvailableProfiles",
+            },
+            body: "{}",
+        });
+        if (!r.ok) {
+            console.warn(`[pi-provider-kiro] Failed to resolve profileArn: ListAvailableProfiles returned ${r.status} ${r.statusText}. Will retry on the next request.`);
+            return undefined;
+        }
+        const j = (await r.json());
+        const arn = j.profiles?.find((p) => p.arn)?.arn;
+        if (!arn) {
+            debugLog("profileArn.empty", {
+                message: "ListAvailableProfiles returned no profile ARN; this is expected for some social-login tokens.",
+            });
+            return undefined;
+        }
+        profileArnCache.set(endpoint, arn);
+        return arn;
+    }
+    catch (error) {
+        console.warn(`[pi-provider-kiro] Failed to resolve profileArn: ${error instanceof Error ? error.message : String(error)}. Will retry on the next request.`);
+        return undefined;
+    }
+}
+function emitToolCall(state, output, stream) {
+    if (!state.input.trim()) {
+        // Kiro API omits the input payload when the model calls a tool with no
+        // arguments (e.g. mcp({})). Treat empty input as an empty object rather
+        // than skipping — these are valid zero-arg tool calls, not truncations.
+        state.input = "{}";
+    }
+    let args;
+    try {
+        args = JSON.parse(state.input);
+    }
+    catch (e) {
+        console.warn(`[pi-provider-kiro] Failed to parse tool input for "${state.name}" (toolUseId: ${state.toolUseId}): ${e instanceof Error ? e.message : String(e)}. Raw input (${state.input.length} chars): ${state.input.substring(0, 200)}`);
+        return false;
+    }
+    const contentIndex = output.content.length;
+    const toolCall = {
+        type: "toolCall",
+        id: state.toolUseId,
+        name: state.name,
+        arguments: args,
+    };
+    output.content.push(toolCall);
+    stream.push({ type: "toolcall_start", contentIndex, partial: output });
+    stream.push({
+        type: "toolcall_delta",
+        contentIndex,
+        delta: state.input,
+        partial: output,
+    });
+    stream.push({
+        type: "toolcall_end",
+        contentIndex,
+        toolCall,
+        partial: output,
+    });
+    return true;
+}
+export function streamKiro(model, context, options) {
+    // pi-ai's barrel re-exports the class as type-only before the runtime class re-export, so
+    // a named import of AssistantMessageEventStream resolves to a type. Read it from the
+    // namespace import to get the actual constructor. Replaces the removed
+    // createAssistantMessageEventStream() factory (gone in @oh-my-pi/pi-ai).
+    const StreamCtor = PiAi.AssistantMessageEventStream;
+    const stream = new StreamCtor();
+    (async () => {
+        const output = {
+            role: "assistant",
+            content: [],
+            api: model.api,
+            provider: model.provider,
+            model: model.id,
+            usage: {
+                input: 0,
+                output: 0,
+                cacheRead: 0,
+                cacheWrite: 0,
+                totalTokens: 0,
+                cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+            },
+            stopReason: "stop",
+            timestamp: Date.now(),
+        };
+        try {
+            let accessToken = options?.apiKey;
+            if (!accessToken)
+                throw new Error("Kiro credentials not set. Run /login kiro or install kiro-cli.");
+            const endpoint = model.baseUrl ||
+                "https://q.us-east-1.amazonaws.com/generateAssistantResponse";
+            const optionProfileArn = options?.credentials?.profileArn ||
+                options?.profileArn;
+            const cliCreds = getKiroCliCredentials() ?? getKiroCliCredentialsAllowExpired();
+            const cliProfileArn = cliCreds?.access === accessToken ? cliCreds.profileArn : undefined;
+            let profileArn = optionProfileArn ||
+                cliProfileArn ||
+                (await resolveProfileArn(accessToken, endpoint));
+            // Trigger dynamic models cache update in the background if empty or stale
+            const ep = new URL(endpoint);
+            const region = ep.hostname.split(".")[1] || "us-east-1";
+            const { isCacheStale, updateKiroModelsCache } = await import("./models.js");
+            if (!process.env.VITEST && isCacheStale(region)) {
+                updateKiroModelsCache(accessToken, region, profileArn).catch(() => { });
+            }
+            const kiroModelId = resolveKiroModel(model.id);
+            const thinkingEnabled = !!options?.reasoning || model.reasoning;
+            debugLog("request.init", {
+                endpoint,
+                model: model.id,
+                kiroModelId,
+                contextWindow: model.contextWindow,
+                thinkingEnabled,
+                reasoning: options?.reasoning,
+                messageCount: context.messages.length,
+                toolCount: context.tools?.length ?? 0,
+                hasSystemPrompt: !!context.systemPrompt,
+                profileArn,
+                sessionId: options?.sessionId,
+            });
+            let systemPrompt = context.systemPrompt ?? "";
+            if (thinkingEnabled) {
+                const budget = options?.reasoning === "xhigh"
+                    ? 50000
+                    : options?.reasoning === "high"
+                        ? 30000
+                        : options?.reasoning === "medium"
+                            ? 20000
+                            : 10000;
+                systemPrompt = `<thinking_mode>enabled</thinking_mode><max_thinking_length>${budget}</max_thinking_length>${systemPrompt ? `\n${systemPrompt}` : ""}`;
+            }
+            let retryCount = 0;
+            const maxRetries = 3;
+            const conversationId = options?.sessionId ?? crypto.randomUUID();
+            while (retryCount <= maxRetries) {
+                if (options?.signal?.aborted)
+                    throw options.signal.reason;
+                const effectiveSystemPrompt = systemPrompt;
+                const normalized = normalizeMessages(context.messages);
+                const { history: rawHistory, systemPrepended, currentMsgStartIdx, } = buildHistory(normalized, kiroModelId, effectiveSystemPrompt);
+                // Scale history limit to model context window
+                // HISTORY_LIMIT (850K chars) is sized for 200K token models
+                const dynamicHistoryLimit = Math.floor((model.contextWindow / HISTORY_LIMIT_CONTEXT_WINDOW) * HISTORY_LIMIT);
+                const history = truncateHistory(rawHistory, dynamicHistoryLimit);
+                const toolResultLimit = TOOL_RESULT_LIMIT;
+                const currentMessages = normalized.slice(currentMsgStartIdx);
+                const firstMsg = currentMessages[0];
+                let currentContent = "";
+                const currentToolResults = [];
+                let currentImages;
+                if (firstMsg?.role === "assistant") {
+                    const am = firstMsg;
+                    let armContent = "";
+                    const armToolUses = [];
+                    if (Array.isArray(am.content))
+                        for (const b of am.content) {
+                            if (b.type === "text")
+                                armContent += b.text;
+                            else if (b.type === "thinking")
+                                armContent = `<thinking>${b.thinking}</thinking>\n\n${armContent}`;
+                            else if (b.type === "toolCall") {
+                                const tc = b;
+                                armToolUses.push({
+                                    name: tc.name,
+                                    toolUseId: tc.id,
+                                    input: typeof tc.arguments === "string"
+                                        ? JSON.parse(tc.arguments)
+                                        : tc.arguments,
+                                });
+                            }
+                        }
+                    if (armContent || armToolUses.length > 0) {
+                        const lastEntryForArm = history[history.length - 1];
+                        const prevArm = lastEntryForArm?.assistantResponseMessage;
+                        if (history.length > 0 &&
+                            !lastEntryForArm?.userInputMessage &&
+                            prevArm) {
+                            // Merge into previous assistant message to maintain alternation without synthetic padding
+                            prevArm.content += `\n\n${armContent}`;
+                            if (armToolUses.length > 0)
+                                prevArm.toolUses = [
+                                    ...(prevArm.toolUses || []),
+                                    ...armToolUses,
+                                ];
+                        }
+                        else {
+                            history.push({
+                                assistantResponseMessage: {
+                                    content: armContent,
+                                    ...(armToolUses.length > 0 ? { toolUses: armToolUses } : {}),
+                                },
+                            });
+                        }
+                    }
+                    const toolResultImages = [];
+                    for (let i = 1; i < currentMessages.length; i++) {
+                        const m = currentMessages[i];
+                        if (m.role === "toolResult") {
+                            const trm = m;
+                            currentToolResults.push({
+                                content: [
+                                    { text: truncate(getContentText(m), toolResultLimit) },
+                                ],
+                                status: trm.isError ? "error" : "success",
+                                toolUseId: trm.toolCallId,
+                            });
+                            if (Array.isArray(trm.content))
+                                for (const c of trm.content)
+                                    if (c.type === "image")
+                                        toolResultImages.push(c);
+                        }
+                    }
+                    if (toolResultImages.length > 0) {
+                        const converted = convertImagesToKiro(toolResultImages);
+                        currentImages = currentImages
+                            ? [...currentImages, ...converted]
+                            : converted;
+                    }
+                    currentContent =
+                        currentToolResults.length > 0
+                            ? "Tool results provided."
+                            : "Please proceed with the task.";
+                }
+                else if (firstMsg?.role === "toolResult") {
+                    const toolResultImages2 = [];
+                    for (const m of currentMessages)
+                        if (m.role === "toolResult") {
+                            const trm = m;
+                            currentToolResults.push({
+                                content: [
+                                    { text: truncate(getContentText(m), toolResultLimit) },
+                                ],
+                                status: trm.isError ? "error" : "success",
+                                toolUseId: trm.toolCallId,
+                            });
+                            if (Array.isArray(trm.content))
+                                for (const c of trm.content)
+                                    if (c.type === "image")
+                                        toolResultImages2.push(c);
+                        }
+                    if (toolResultImages2.length > 0) {
+                        const converted = convertImagesToKiro(toolResultImages2);
+                        currentImages = currentImages
+                            ? [...currentImages, ...converted]
+                            : converted;
+                    }
+                    currentContent = "Tool results provided.";
+                }
+                else if (firstMsg?.role === "user") {
+                    currentContent =
+                        typeof firstMsg.content === "string"
+                            ? firstMsg.content
+                            : getContentText(firstMsg);
+                    if (effectiveSystemPrompt && !systemPrepended)
+                        currentContent = `${effectiveSystemPrompt}\n\n${currentContent}`;
+                }
+                // Prepend truncation notice if the previous assistant response was cut off
+                if (wasPreviousResponseTruncated(context.messages)) {
+                    currentContent = `${TRUNCATION_NOTICE}\n\n${currentContent}`;
+                }
+                // Always synthesize placeholder specs for tool names referenced in
+                // history, even when context.tools is empty/undefined. Without this,
+                // an "advisor-style" call that inherits a tool-rich conversation but
+                // declares no current tools is rejected by Kiro as "Improperly formed
+                // request" because history references toolUses with no tool catalog.
+                let uimc;
+                const baseTools = context.tools?.length
+                    ? convertToolsToKiro(context.tools)
+                    : [];
+                const finalTools = history.length > 0
+                    ? addPlaceholderTools(baseTools, history)
+                    : baseTools;
+                if (currentToolResults.length > 0 || finalTools.length > 0) {
+                    uimc = {};
+                    if (currentToolResults.length > 0)
+                        uimc.toolResults = currentToolResults;
+                    if (finalTools.length > 0)
+                        uimc.tools = finalTools;
+                }
+                if (firstMsg?.role === "user") {
+                    const imgs = extractImages(firstMsg);
+                    if (imgs.length > 0)
+                        currentImages = convertImagesToKiro(imgs);
+                }
+                // kiro-cli does not enforce alternation — the API accepts
+                // non-alternating history. No synthetic padding needed.
+                const request = {
+                    conversationState: {
+                        chatTriggerType: "MANUAL",
+                        agentTaskType: "vibe",
+                        conversationId,
+                        currentMessage: {
+                            userInputMessage: {
+                                content: sanitizeSurrogates(currentContent),
+                                modelId: kiroModelId,
+                                origin: "KIRO_CLI",
+                                ...(currentImages ? { images: currentImages } : {}),
+                                ...(uimc ? { userInputMessageContext: uimc } : {}),
+                            },
+                        },
+                        ...(history.length > 0 ? { history } : {}),
+                    },
+                    ...(profileArn ? { profileArn } : {}),
+                    agentMode: "vibe",
+                };
+                let response;
+                // Reset per outer iteration — each 403 retry gets a fresh capacity budget
+                let capacityRetryCount = 0;
+                // Inner loop: retry capacity errors without consuming outer retry budget
+                while (true) {
+                    const mid = crypto.randomUUID().replace(/-/g, "");
+                    const ua = `aws-sdk-rust/1.0.0 ua/2.1 os/other lang/rust api/codewhispererstreaming#1.28.3 m/E app/AmazonQ-For-CLI md/appVersion-1.28.3-${mid}`;
+                    debugLog("request.send", {
+                        attempt: retryCount,
+                        capacityAttempt: capacityRetryCount,
+                        historyLen: history.length,
+                        currentContentLen: currentContent.length,
+                        hasImages: !!currentImages,
+                        toolResultCount: currentToolResults.length,
+                        request,
+                    });
+                    response = await fetch(endpoint, {
+                        method: "POST",
+                        headers: {
+                            "Content-Type": "application/x-amz-json-1.0",
+                            Accept: "application/json",
+                            Authorization: `Bearer ${accessToken}`,
+                            "X-Amz-Target": "AmazonCodeWhispererStreamingService.GenerateAssistantResponse",
+                            "x-amzn-codewhisperer-optout": "true",
+                            "amz-sdk-invocation-id": crypto.randomUUID(),
+                            "amz-sdk-request": "attempt=1; max=1",
+                            "x-amzn-kiro-agent-mode": "vibe",
+                            "x-amz-user-agent": ua,
+                            "user-agent": ua,
+                        },
+                        body: JSON.stringify(request),
+                        signal: options?.signal,
+                    });
+                    if (!response.ok) {
+                        let errText = "";
+                        try {
+                            errText = await response.text();
+                        }
+                        catch {
+                            errText = "";
+                        }
+                        debugLog("response.error", {
+                            status: response.status,
+                            statusText: response.statusText,
+                            body: errText,
+                        });
+                        // Retry transient capacity errors with longer backoff
+                        if (isCapacityError(errText) &&
+                            capacityRetryCount < capacityRetryConfig.maxRetries) {
+                            capacityRetryCount++;
+                            const delayMs = exponentialBackoff(capacityRetryCount - 1, capacityRetryConfig.baseDelayMs, 30_000);
+                            const msg = `INSUFFICIENT_MODEL_CAPACITY — retrying in ${delayMs}ms (${capacityRetryCount}/${capacityRetryConfig.maxRetries})`;
+                            console.error(`[pi-provider-kiro] ${msg}`);
+                            logCapacityEvent(msg);
+                            await abortableDelay(delayMs, options?.signal);
+                            continue;
+                        }
+                        if (isCapacityError(errText)) {
+                            logCapacityEvent(`INSUFFICIENT_MODEL_CAPACITY — exhausted ${capacityRetryConfig.maxRetries} retries, giving up`);
+                        }
+                        if (response.status === 403 &&
+                            !isCapacityError(errText) &&
+                            retryCount < maxRetries) {
+                            retryCount++;
+                            // On 403, try to get a fresh token before retrying — the current
+                            // one may have been rotated by kiro-cli or another session. If
+                            // the cached kiro-cli token is also stale, actively refresh it.
+                            const freshCreds = getKiroCliCredentials() ?? refreshViaKiroCli();
+                            if (freshCreds?.access)
+                                accessToken = freshCreds.access;
+                            // Re-resolve profileArn with fresh credentials
+                            profileArnCache.delete(endpoint);
+                            const refreshedProfileArn = options?.credentials?.profileArn ||
+                                options?.profileArn ||
+                                freshCreds?.profileArn;
+                            profileArn =
+                                refreshedProfileArn ||
+                                    (await resolveProfileArn(accessToken, endpoint));
+                            const delayMs = exponentialBackoff(retryCount - 1, 500, MAX_RETRY_DELAY);
+                            await abortableDelay(delayMs, options?.signal);
+                            break; // break inner loop, continue outer loop
+                        }
+                        // Avoid pi-coding-agent's outer auto-retry from treating known
+                        // Kiro quota/capacity body markers as generic retryable 429s.
+                        // This covers both hard quota (MONTHLY_REQUEST_COUNT) and
+                        // exhausted capacity retries (INSUFFICIENT_MODEL_CAPACITY).
+                        if (isNonRetryableBodyError(errText) || isCapacityError(errText)) {
+                            throw new Error(`Kiro API error: ${errText || response.statusText}`);
+                        }
+                        // Format error so pi-ai's isContextOverflow() recognizes it
+                        if (isTooBigError(response.status, errText)) {
+                            throw new Error(`Kiro API error: context_length_exceeded (${response.status} ${errText})`);
+                        }
+                        throw new Error(`Kiro API error: ${response.status} ${response.statusText} ${errText}`);
+                    }
+                    break; // success, break inner loop
+                }
+                if (capacityRetryCount > 0 && response.ok) {
+                    logCapacityEvent(`INSUFFICIENT_MODEL_CAPACITY — succeeded after ${capacityRetryCount} retries`);
+                }
+                // 403 retry: continue outer loop
+                if (!response.ok)
+                    continue;
+                stream.push({ type: "start", partial: output });
+                if (!response.body)
+                    throw new Error("No response body");
+                const bodyReader = response.body.getReader();
+                let totalContent = "";
+                let lastContentData = "";
+                let usageEvent = null;
+                let receivedContextUsage = false;
+                const thinkingParser = thinkingEnabled
+                    ? new ThinkingTagParser(output, stream)
+                    : null;
+                let textBlockIndex = null;
+                let emittedToolCalls = 0;
+                let sawAnyToolCalls = false;
+                let currentToolCall = null;
+                const flushToolCall = () => {
+                    if (!currentToolCall)
+                        return;
+                    if (emitToolCall(currentToolCall, output, stream))
+                        emittedToolCalls++;
+                    currentToolCall = null;
+                };
+                const IDLE_TIMEOUT = 300_000;
+                let idleTimer = null;
+                let idleCancelled = false;
+                const resetIdle = () => {
+                    if (idleTimer)
+                        clearTimeout(idleTimer);
+                    idleTimer = setTimeout(() => {
+                        idleCancelled = true;
+                        void bodyReader.cancel().catch(() => { });
+                    }, IDLE_TIMEOUT);
+                };
+                let gotFirstToken = false;
+                let firstTokenTimedOut = false;
+                let streamError = null;
+                const FIRST_TOKEN_SENTINEL = Symbol("firstTokenTimeout");
+                // Smithy EventStreamMarshaller handles: chunk reassembly, CRC validation,
+                // protocol error/exception detection, and payload deserialization.
+                const bodyIterable = {
+                    async *[Symbol.asyncIterator]() {
+                        try {
+                            while (true) {
+                                const { done, value } = await bodyReader.read();
+                                if (done)
+                                    return;
+                                yield value;
+                            }
+                        }
+                        finally {
+                            bodyReader.releaseLock();
+                        }
+                    },
+                };
+                const utf8Decoder = new TextDecoder();
+                const eventStream = eventStreamMarshaller.deserialize(bodyIterable, async (event) => {
+                    const key = Object.keys(event)[0];
+                    const msg = event[key];
+                    const parsed = JSON.parse(utf8Decoder.decode(msg.body));
+                    return { [key]: parsed };
+                });
+                const iterator = eventStream[Symbol.asyncIterator]();
+                while (true) {
+                    let iterResult;
+                    try {
+                        if (!gotFirstToken) {
+                            const readPromise = iterator.next();
+                            const result = await Promise.race([
+                                readPromise,
+                                new Promise((resolve) => setTimeout(() => resolve(FIRST_TOKEN_SENTINEL), firstTokenTimeoutForModel(model.id))),
+                            ]);
+                            if (result === FIRST_TOKEN_SENTINEL) {
+                                readPromise.catch(() => { }); // suppress dangling rejection
+                                void bodyReader.cancel().catch(() => { });
+                                firstTokenTimedOut = true;
+                                break;
+                            }
+                            iterResult = result;
+                            gotFirstToken = true;
+                            resetIdle();
+                        }
+                        else {
+                            iterResult = await iterator.next();
+                        }
+                    }
+                    catch (e) {
+                        // Smithy throws on :message-type error/exception headers
+                        streamError =
+                            e instanceof Error
+                                ? e.message
+                                : (typeof e === "object" && e !== null
+                                    ? JSON.stringify(e)
+                                    : String(e)) || "Unknown stream error";
+                        break;
+                    }
+                    const { done, value } = iterResult;
+                    if (done)
+                        break;
+                    resetIdle();
+                    const eventPayload = Object.values(value)[0];
+                    const event = parseKiroEvent(eventPayload);
+                    if (!event)
+                        continue;
+                    if (debugEnabled())
+                        debugLog("stream.events", [event]);
+                    switch (event.type) {
+                        case "contextUsage": {
+                            const pct = event.data.contextUsagePercentage;
+                            output.usage.input = Math.round((pct / 100) * model.contextWindow);
+                            output.usage.contextPercent = pct;
+                            receivedContextUsage = true;
+                            break;
+                        }
+                        case "content": {
+                            if (event.data === lastContentData)
+                                continue;
+                            lastContentData = event.data;
+                            totalContent += event.data;
+                            if (thinkingParser) {
+                                thinkingParser.processChunk(event.data);
+                            }
+                            else {
+                                if (textBlockIndex === null) {
+                                    textBlockIndex = output.content.length;
+                                    output.content.push({ type: "text", text: "" });
+                                    stream.push({
+                                        type: "text_start",
+                                        contentIndex: textBlockIndex,
+                                        partial: output,
+                                    });
+                                }
+                                output.content[textBlockIndex].text +=
+                                    event.data;
+                                stream.push({
+                                    type: "text_delta",
+                                    contentIndex: textBlockIndex,
+                                    delta: event.data,
+                                    partial: output,
+                                });
+                            }
+                            break;
+                        }
+                        case "toolUse": {
+                            const tc = event.data;
+                            sawAnyToolCalls = true;
+                            if (!currentToolCall ||
+                                currentToolCall.toolUseId !== tc.toolUseId) {
+                                flushToolCall();
+                                currentToolCall = {
+                                    toolUseId: tc.toolUseId,
+                                    name: tc.name,
+                                    input: "",
+                                };
+                            }
+                            currentToolCall.input += tc.input || "";
+                            if (tc.input)
+                                totalContent += tc.input;
+                            if (tc.stop)
+                                flushToolCall();
+                            break;
+                        }
+                        case "toolUseInput": {
+                            if (currentToolCall)
+                                currentToolCall.input += event.data.input || "";
+                            if (event.data.input)
+                                totalContent += event.data.input;
+                            break;
+                        }
+                        case "toolUseStop": {
+                            if (event.data.stop)
+                                flushToolCall();
+                            break;
+                        }
+                        case "usage": {
+                            usageEvent = event.data;
+                            break;
+                        }
+                        case "error": {
+                            const errMsg = event.data.message
+                                ? `${event.data.error}: ${event.data.message}`
+                                : event.data.error;
+                            streamError = errMsg;
+                            void bodyReader.cancel().catch(() => { });
+                            break;
+                        }
+                        // followupPrompt events are intentionally ignored
+                    }
+                    if (streamError)
+                        break;
+                }
+                if (idleTimer)
+                    clearTimeout(idleTimer);
+                if (firstTokenTimedOut || idleCancelled || streamError) {
+                    // Timed out or received error mid-stream: retry with backoff
+                    if (retryCount < maxRetries) {
+                        retryCount++;
+                        const delayMs = exponentialBackoff(retryCount - 1, 1000, MAX_RETRY_DELAY);
+                        await abortableDelay(delayMs, options?.signal);
+                        continue;
+                    }
+                    if (streamError) {
+                        throw new Error(`Kiro API stream error after max retries: ${streamError}`);
+                    }
+                    throw new Error(`Kiro API error: ${firstTokenTimedOut ? "first token" : "idle"} timeout after max retries`);
+                }
+                if (currentToolCall && emitToolCall(currentToolCall, output, stream)) {
+                    emittedToolCalls++;
+                }
+                if (thinkingParser) {
+                    thinkingParser.finalize();
+                    textBlockIndex = thinkingParser.getTextBlockIndex();
+                }
+                // Fallback: extract bracket-style tool calls from content if no native tool calls
+                if (!sawAnyToolCalls && textBlockIndex !== null) {
+                    const textBlock = output.content[textBlockIndex];
+                    const bracketResult = parseBracketToolCalls(textBlock.text);
+                    if (bracketResult.toolCalls.length > 0) {
+                        sawAnyToolCalls = true;
+                        textBlock.text = bracketResult.cleanedText;
+                        for (const btc of bracketResult.toolCalls) {
+                            if (emitToolCall({
+                                toolUseId: btc.toolUseId,
+                                name: btc.name,
+                                input: JSON.stringify(btc.arguments),
+                            }, output, stream)) {
+                                emittedToolCalls++;
+                            }
+                        }
+                    }
+                }
+                // Strip echo noise: when tool calls are present and the text content
+                // is just "." or similar short echo from history padding, remove it.
+                // This prevents the echo from accumulating in conversation history
+                // and reinforcing the pattern in future turns.
+                if (emittedToolCalls > 0 && textBlockIndex !== null) {
+                    const textBlock = output.content[textBlockIndex];
+                    if (/^\s*(\.+|continue)\s*$/i.test(textBlock.text)) {
+                        textBlock.text = "";
+                    }
+                }
+                if (textBlockIndex !== null)
+                    stream.push({
+                        type: "text_end",
+                        contentIndex: textBlockIndex,
+                        content: output.content[textBlockIndex].text,
+                        partial: output,
+                    });
+                // The Kiro streaming API does not reliably emit per-response output
+                // token counts (unlike Anthropic's `output_tokens` or Bedrock's
+                // `usage.outputTokens`). When the `usage` event is missing or only
+                // reports `inputTokens`, fall back to a tiktoken estimate over
+                // everything the assistant emitted — text plus tool-call input JSON
+                // (accumulated into `totalContent` above). Otherwise tool-call-only
+                // turns report 0 output tokens and break consumers like the TPS
+                // extension that watch `usage.output`.
+                if (usageEvent?.inputTokens !== undefined)
+                    output.usage.input = usageEvent.inputTokens;
+                output.usage.output =
+                    usageEvent?.outputTokens ?? countTokens(totalContent);
+                output.usage.totalTokens = output.usage.input + output.usage.output;
+                try {
+                    PiAi.calculateCost(model, output.usage);
+                }
+                catch {
+                    // Model might not have cost info, use zeros
+                    output.usage.cost = {
+                        input: 0,
+                        output: 0,
+                        cacheRead: 0,
+                        cacheWrite: 0,
+                        total: 0,
+                    };
+                }
+                // Detect degenerate responses: the API returned 200 but produced no
+                // usable content at all — no text and no tool calls (not even broken
+                // ones). This happens when the stream is truncated early or the API
+                // returns only a contextUsage event. Retry with backoff.
+                //
+                // Also detect "Continue" echo loops: the model's entire response is
+                // just "continue" (case-insensitive) with no tool calls. This happens
+                // when synthetic history padding teaches the model to echo "Continue"
+                // as a valid response, causing an infinite loop where pi sends
+                // "continue" back and the model echoes it again.
+                //
+                // When tool calls *were* present but all got dropped (empty/unparseable
+                // input), don't retry — the API did respond, it just sent malformed
+                // tool calls. Retrying would likely produce the same result. The
+                // stopReason fix below prevents the agent loop stall.
+                const hasText = textBlockIndex !== null &&
+                    output.content[textBlockIndex].text.length > 0;
+                const responseText = hasText
+                    ? output.content[textBlockIndex].text
+                    : "";
+                const isEchoLoop = hasText &&
+                    !sawAnyToolCalls &&
+                    /^\s*(continue|\.+)\s*$/i.test(responseText);
+                if ((!hasText && !sawAnyToolCalls) || isEchoLoop) {
+                    if (retryCount < maxRetries) {
+                        retryCount++;
+                        const delayMs = exponentialBackoff(retryCount - 1, 1000, MAX_RETRY_DELAY);
+                        console.warn(`[pi-provider-kiro] ${isEchoLoop ? 'Echo loop detected (model responded with just "Continue")' : "Empty response (no text, no tool calls)"} — retrying (${retryCount}/${maxRetries})`);
+                        // Reset output content for the retry
+                        output.content = [];
+                        textBlockIndex = null;
+                        await abortableDelay(delayMs, options?.signal);
+                        continue;
+                    }
+                    if (isEchoLoop) {
+                        // After max retries, strip the echo text to prevent the agent
+                        // loop from interpreting "Continue" as a continuation signal.
+                        output.content[textBlockIndex].text = "";
+                        console.warn(`[pi-provider-kiro] Echo loop persisted after ${maxRetries} retries — stripping "Continue" response`);
+                    }
+                    else {
+                        console.warn(`[pi-provider-kiro] Empty response after ${maxRetries} retries — returning stopReason:"stop" to avoid agent loop stall`);
+                    }
+                }
+                // Use emittedToolCalls (not toolCalls.length) to avoid stopReason:"toolUse"
+                // when all tool calls were skipped due to empty/unparseable input — that
+                // combination (empty content + toolUse stop) causes pi's agent loop to
+                // stall waiting for tool results that will never arrive.
+                if (!receivedContextUsage && emittedToolCalls === 0) {
+                    output.stopReason = "length";
+                }
+                else {
+                    output.stopReason = emittedToolCalls > 0 ? "toolUse" : "stop";
+                }
+                stream.push({
+                    type: "done",
+                    reason: output.stopReason,
+                    message: output,
+                });
+                debugLog("response.done", {
+                    stopReason: output.stopReason,
+                    emittedToolCalls,
+                    sawAnyToolCalls,
+                    textLen: textBlockIndex !== null
+                        ? output.content[textBlockIndex].text.length
+                        : 0,
+                    usage: output.usage,
+                    content: output.content,
+                });
+                stream.end();
+                break;
+            }
+        }
+        catch (error) {
+            output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+            output.errorMessage =
+                error instanceof Error ? error.message : String(error);
+            debugLog("response.caught", {
+                stopReason: output.stopReason,
+                error: output.errorMessage,
+            });
+            stream.push({ type: "error", reason: output.stopReason, error: output });
+            stream.end();
+        }
+    })().catch(() => {
+        // Safety net: catch any rejection that escapes the inner try/catch
+        // (e.g., AbortError during signal teardown). Without this, the
+        // fire-and-forget IIFE produces an unhandled rejection that crashes pi.
+        try {
+            stream.end();
+        }
+        catch { }
+    });
+    return stream;
+}
+//# sourceMappingURL=stream.js.map