npm - @arcote.tech/arc-chat - Versions diffs - 0.7.7 → 0.7.9 - Mend

@arcote.tech/arc-chat 0.7.7 → 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +7 -6
package/src/aggregates/message.ts +131 -7
package/src/chat-builder.ts +8 -1
package/src/listeners/ai-generation-listener.ts +176 -36
package/src/react/chat-component.tsx +283 -164
package/src/routes/chat-stream-route.ts +7 -2
package/src/streaming/stream-registry.ts +24 -5

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@arcote.tech/arc-chat",
   "type": "module",
-  "version": "0.7.7",
+  "version": "0.7.9",
   "private": false,
   "description": "Chat module with AI integration for Arc framework",
   "main": "./src/index.ts",
@@ -10,11 +10,12 @@
     "type-check": "tsc --noEmit"
   },
   "peerDependencies": {
-    "@arcote.tech/arc": "^0.7.7",
-    "@arcote.tech/arc-ai": "^0.7.7",
-    "@arcote.tech/arc-auth": "^0.7.7",
-    "@arcote.tech/arc-ds": "^0.7.7",
-    "@arcote.tech/platform": "^0.7.7",
+    "@arcote.tech/arc": "^0.7.9",
+    "@arcote.tech/arc-ai": "^0.7.9",
+    "@arcote.tech/arc-ai-voice": "^0.7.9",
+    "@arcote.tech/arc-auth": "^0.7.9",
+    "@arcote.tech/arc-ds": "^0.7.9",
+    "@arcote.tech/platform": "^0.7.9",
     "lucide-react": ">=0.400.0",
     "react": ">=18.0.0",
     "typescript": "^5.0.0"

package/src/aggregates/message.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import {
   boolean,
   date,
   id,
+  number,
   string,
   type ArcId,
 } from "@arcote.tech/arc";
@@ -78,6 +79,18 @@ export const createMessageAggregate = <
     previousResponseId: string().optional(),
     isGenerating: boolean().optional(),
     usage: string().optional(),
+    /**
+     * Partial snapshot blocks (JSON-serialized AssistantContentBlock[])
+     * zapisywane w trakcie streamingu co kilka chunków. Pozwala klientowi
+     * po reload przeglądarki przywrócić stan i kontynuować SSE od
+     * `partialLastSeq`. Czyszczone po `assistantTurnCompleted`.
+     */
+    partialBlocks: string().optional(),
+    /**
+     * Ostatni seq SSE event'u zaaplikowany do `partialBlocks`. Klient
+     * wysyła `?afterSeq=partialLastSeq` przy SSE resume.
+     */
+    partialLastSeq: number().optional(),
     createdAt: date(),
   })
@@ -92,6 +105,14 @@ export const createMessageAggregate = <
         content: string(),
         model: string().optional(),
         isGenerating: boolean().optional(),
+        /**
+         * Pre-utworzone messageId pustego assistant row'a który zostaje
+         * stworzony w tej samej mutacji (atomowo z `messageSent`). Listener
+         * AI generation używa go zamiast wołać `startAssistantTurn`. Dzięki
+         * temu klient widzi assistant row natychmiast (przez useQuery push),
+         * otwiera SSE i streaming jest visible od pierwszego chunka.
+         */
+        assistantMessageId: messageId.optional(),
       },
       async (ctx, event) => {
         const p = event.payload;
@@ -132,10 +153,30 @@ export const createMessageAggregate = <
       },
     )
+    // ─── assistantTurnProgressSnapshot — checkpoint w trakcie streamingu ─
+    // Listener emituje co N chunków lub T sekund — klient po reload czyta
+    // `partialBlocks` + `partialLastSeq` i kontynuuje SSE od miejsca w
+    // którym był.
+    .publicEvent(
+      "assistantTurnProgressSnapshot",
+      {
+        messageId,
+        partialBlocks: string(),
+        partialLastSeq: number(),
+      },
+      async (ctx, event) => {
+        const p = event.payload;
+        await ctx.modify(p.messageId, {
+          partialBlocks: p.partialBlocks,
+          partialLastSeq: p.partialLastSeq,
+        } as any);
+      },
+    )
     // ─── assistantTurnCompleted — finalize an in-progress turn row ───
     // Partial update on the SAME row — fills `blocks`, flips
     // `isGenerating` to false, optionally records `previousResponseId`,
-    // `usage`, or `error`.
+    // `usage`, or `error`. Czyści `partialBlocks` / `partialLastSeq`.
     .publicEvent(
       "assistantTurnCompleted",
       {
@@ -152,6 +193,8 @@ export const createMessageAggregate = <
           previousResponseId: p.previousResponseId,
           usage: p.usage,
           isGenerating: false,
+          partialBlocks: undefined,
+          partialLastSeq: undefined,
         } as any);
       },
     )
@@ -192,6 +235,8 @@ export const createMessageAggregate = <
         toolName: string(),
         toolCallId: string(),
         content: string(),
+        /** Patrz dokumentacja `messageSent.assistantMessageId`. */
+        assistantMessageId: messageId.optional(),
       },
       async (ctx, event) => {
         const p = event.payload;
@@ -208,6 +253,11 @@ export const createMessageAggregate = <
     )
     // ─── sendMessage — user sends message, creates session ──────
+    // Emit'uje DWA eventy w jednej transakcji: messageSent (user row) +
+    // assistantTurnStarted (empty assistant row z isGenerating=true). Dzięki
+    // temu klient widzi placeholder asystenta natychmiast (przez useQuery
+    // push) i otwiera SSE zanim AI listener zacznie emit chunków → streaming
+    // od pierwszego znaku visible.
     .mutateMethod(
       "sendMessage",
       (fn) => fn.withParams({
@@ -218,8 +268,23 @@ export const createMessageAggregate = <
         ONLY_SERVER &&
         (async (ctx, params) => {
           const userMsgId = messageId.generate();
+          const assistantMsgId = messageId.generate();
           const sessionId = `session_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
+          // KOLEJNOŚĆ EMIT WAŻNA: assistantTurnStarted PRZED messageSent.
+          // aiGenerationListener listens to `messageSent` (async). Async
+          // listeners w arc fire'ują się "synchronicznie" w trakcie publish
+          // (handler startuje, suspendsna pierwszym await). Gdybyśmy emit'owali
+          // messageSent jako pierwsze, listener mógłby zacząć pracować zanim
+          // assistantTurnStarted skomituje assistant row do DB → listener
+          // tries to use messageId którego nie ma jeszcze w stores.
+          await ctx.assistantTurnStarted.emit({
+            messageId: assistantMsgId,
+            scopeId: params.scopeId,
+            sessionId,
+            model: params.model,
+          });
           await ctx.messageSent.emit({
             messageId: userMsgId,
             scopeId: params.scopeId,
@@ -227,8 +292,10 @@ export const createMessageAggregate = <
             role: "user",
             content: params.content,
             model: params.model,
+            assistantMessageId: assistantMsgId,
           });
-          return { messageId: userMsgId, sessionId };
+          return { messageId: userMsgId, sessionId, assistantMessageId: assistantMsgId };
         }),
       ),
     )
@@ -258,6 +325,26 @@ export const createMessageAggregate = <
       ),
     )
+    // ─── saveProgressSnapshot — zapis partial JSON w trakcie streamingu ─
+    .mutateMethod(
+      "saveProgressSnapshot",
+      (fn) => fn.withParams({
+        messageId,
+        partialBlocks: string(),
+        partialLastSeq: number(),
+      }).handle(
+        ONLY_SERVER &&
+        (async (ctx, params) => {
+          await ctx.assistantTurnProgressSnapshot.emit({
+            messageId: params.messageId,
+            partialBlocks: params.partialBlocks,
+            partialLastSeq: params.partialLastSeq,
+          });
+          return { ok: true };
+        }),
+      ),
+    )
     // ─── completeAssistantTurn — partial update of the open turn row ─
     .mutateMethod(
       "completeAssistantTurn",
@@ -311,6 +398,9 @@ export const createMessageAggregate = <
     )
     // ─── respondToTool — user answers interactive tool ──────────
+    // Patrz `sendMessage` — analogicznie tworzy assistant row w tej samej
+    // transakcji, żeby resume listener wypełnił istniejący row a klient
+    // widział streaming live.
     .mutateMethod(
       "respondToTool",
       (fn) => fn.withParams({
@@ -322,8 +412,16 @@ export const createMessageAggregate = <
         ONLY_SERVER &&
         (async (ctx, params) => {
           const msgId = messageId.generate();
+          const assistantMsgId = messageId.generate();
           const sessionId = `session_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
+          // KOLEJNOŚĆ EMIT — patrz komentarz w `sendMessage`.
+          await ctx.assistantTurnStarted.emit({
+            messageId: assistantMsgId,
+            scopeId: params.scopeId,
+            sessionId,
+          });
           await ctx.userResponded.emit({
             messageId: msgId,
             scopeId: params.scopeId,
@@ -331,8 +429,10 @@ export const createMessageAggregate = <
             toolName: params.toolName,
             toolCallId: params.toolCallId,
             content: params.result,
+            assistantMessageId: assistantMsgId,
           });
-          return { messageId: msgId, sessionId };
+          return { messageId: msgId, sessionId, assistantMessageId: assistantMsgId };
         }),
       ),
     )
@@ -351,7 +451,17 @@ export const createMessageAggregate = <
         ONLY_SERVER &&
         (async (ctx, params) => {
           const msgId = messageId.generate();
+          const assistantMsgId = messageId.generate();
           const sessionId = `session_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
+          const model = params.model ?? "gpt-5";
+          // KOLEJNOŚĆ EMIT — patrz komentarz w `sendMessage`.
+          await ctx.assistantTurnStarted.emit({
+            messageId: assistantMsgId,
+            scopeId: params.scopeId,
+            sessionId,
+            model,
+          });
           await ctx.messageSent.emit({
             messageId: msgId,
@@ -359,9 +469,11 @@ export const createMessageAggregate = <
             sessionId,
             role: "system",
             content: "Rozpocznij ten etap. Przywitaj się i zadaj pierwsze pytanie.",
-            model: params.model ?? "gpt-5.4-mini",
+            model,
+            assistantMessageId: assistantMsgId,
           });
-          return { messageId: msgId, sessionId };
+          return { messageId: msgId, sessionId, assistantMessageId: assistantMsgId };
         }),
       ),
     )
@@ -382,7 +494,17 @@ export const createMessageAggregate = <
         ONLY_SERVER &&
         (async (ctx, params) => {
           const msgId = messageId.generate();
+          const assistantMsgId = messageId.generate();
           const sessionId = `session_${Date.now()}_${Math.random().toString(36).slice(2, 9)}`;
+          const model = params.model ?? "gpt-5";
+          // KOLEJNOŚĆ EMIT — patrz komentarz w `sendMessage`.
+          await ctx.assistantTurnStarted.emit({
+            messageId: assistantMsgId,
+            scopeId: params.scopeId,
+            sessionId,
+            model,
+          });
           await ctx.messageSent.emit({
             messageId: msgId,
@@ -390,9 +512,11 @@ export const createMessageAggregate = <
             sessionId,
             role: "system",
             content: params.content,
-            model: params.model ?? "gpt-5.4-mini",
+            model,
+            assistantMessageId: assistantMsgId,
           });
-          return { messageId: msgId, sessionId };
+          return { messageId: msgId, sessionId, assistantMessageId: assistantMsgId };
         }),
       ),
     )

package/src/chat-builder.ts CHANGED Viewed

@@ -17,7 +17,7 @@ import { createMessageId, createMessageAggregate } from "./aggregates/message";
 import { createAiGenerationListener, createAiResumeListener } from "./listeners/ai-generation-listener";
 import { createChatStreamRoute } from "./routes/chat-stream-route";
 import { createChatComponent } from "./react/chat-component";
-import type { ChatLabels } from "@arcote.tech/arc-ds";
+import type { ChatInputTextareaSlotProps, ChatLabels } from "@arcote.tech/arc-ds";
 import type { ComponentType, ReactNode } from "react";
 export interface ChatReactComponentOptions {
@@ -33,6 +33,12 @@ export interface ChatReactComponentOptions {
     onClick: () => void;
     disabled: boolean;
   }) => ReactNode;
+  /**
+   * Slot na pole tekstowe ChatInput. Pozwala podpiąć `VoiceTextarea` z
+   * `@arcote.tech/arc-ai-voice` żeby włączyć dyktowanie głosowe w chacie.
+   * Bez tego propsa używany jest domyślny `TextareaField`.
+   */
+  renderTextarea?: (props: ChatInputTextareaSlotProps) => ReactNode;
   /** Partial overrides for chat i18n labels. Falls back to English defaults. */
   labels?: Partial<ChatLabels>;
   /**
@@ -282,6 +288,7 @@ export class ArcChat<const Data extends ArcChatData = DefaultChatData> {
         showModelSelector: options.showModelSelector,
         showWebSearch: options.showWebSearch,
         renderSendButton: options.renderSendButton,
+        renderTextarea: options.renderTextarea,
         labels: options.labels,
         footer: options.footer,
       });

package/src/listeners/ai-generation-listener.ts CHANGED Viewed

@@ -3,6 +3,7 @@ import { listener, type ArcContextElement, type ArcFunction } from "@arcote.tech
 import type {
   ArcToolAny,
   AssistantContentBlock,
+  ChatStreamEvent,
   Conversation,
   ConversationTurn,
   LLMProvider,
@@ -187,6 +188,13 @@ interface RunLoopConfig {
   maxExecutionCount: number;
   toolChoice?: "auto" | "required" | { type: "function"; name: string };
   instruction?: ArcFunction<any>;
+  /** ID pustego assistant row'a utworzonego synchronicznie w mutacji
+   *  triggerującej generację (`sendMessage`/`systemMessage`/`startStage`/
+   *  `respondToTool`). Listener używa go w PIERWSZEJ iteracji zamiast
+   *  wołać `startAssistantTurn`. Dzięki temu klient widzi assistant row
+   *  natychmiast po mutacji i otwiera SSE zanim chunki zaczną lecieć.
+   *  Następne iteracje (multi-turn po server tool exec) tworzą fresh rows. */
+  preCreatedAssistantMessageId?: string;
 }
 async function runGenerationLoop(config: RunLoopConfig) {
@@ -208,11 +216,34 @@ async function runGenerationLoop(config: RunLoopConfig) {
   let history = config.history;
   let newTurnsStartIdx = config.initialNewTurnsStartIdx;
   let executionCount = 0;
-  /** The in-progress assistant row for the CURRENT iteration. Set at the top
-   *  of every iteration via `startAssistantTurn`; closed at the bottom via
-   *  `completeAssistantTurn`. The error handler uses it to mark the open turn
-   *  as failed. */
-  let currentTurnId: string | undefined;
+  /** The in-progress assistant row for the CURRENT iteration. Pre-set z
+   *  `preCreatedAssistantMessageId` dla pierwszej iteracji (atomowo utworzony
+   *  w mutacji). Wartość `undefined` przy iteracjach 2+ → loop wywoła
+   *  `startAssistantTurn` jak wcześniej. Closed at the bottom via
+   *  `completeAssistantTurn`. Error handler używa do mark open turn jako
+   *  failed. */
+  let currentTurnId: string | undefined = config.preCreatedAssistantMessageId;
+  /** True gdy w bieżącej iteracji `currentTurnId` był pre-utworzony przez
+   *  mutację. Wtedy skipujemy ponowne `startAssistantTurn`. */
+  let usingPreCreatedTurn = config.preCreatedAssistantMessageId != null;
+  /** Monotonicznie rosnący sequence number na całą sesję — klient po stronie
+   *  React trzyma `lastSeq` i dedupuje. */
+  let seqCounter = 0;
+  /** Wrapper na broadcast — wstrzykuje seq + messageId (gdy znany). */
+  const send = (
+    evt: Omit<ChatStreamEvent, "seq" | "sessionId"> & {
+      seq?: number;
+      sessionId?: string;
+    },
+  ) => {
+    seqCounter += 1;
+    broadcast(sessionId, {
+      ...evt,
+      sessionId,
+      seq: seqCounter,
+      messageId: evt.messageId ?? currentTurnId,
+    } as ChatStreamEvent);
+  };
   try {
     while (executionCount <= maxExecutionCount) {
@@ -236,10 +267,50 @@ async function runGenerationLoop(config: RunLoopConfig) {
       // Open a new in-progress assistant row before the stream starts. The
       // frontend detects `isGenerating: true` on this row and subscribes to
       // the SSE stream identified by `sessionId`.
-      const turnStart = await ctx
-        .mutate(messageElement)
-        .startAssistantTurn({ scopeId, sessionId, model });
-      currentTurnId = turnStart.messageId;
+      //
+      // Pierwsza iteracja: row już utworzony w mutacji triggerującej (przez
+      // `preCreatedAssistantMessageId`) → skipujemy. Kolejne iteracje
+      // (multi-turn po server tool exec): tworzymy fresh row.
+      if (usingPreCreatedTurn) {
+        usingPreCreatedTurn = false; // tylko dla 1. iteracji
+      } else {
+        const turnStart = await ctx
+          .mutate(messageElement)
+          .startAssistantTurn({ scopeId, sessionId, model });
+        currentTurnId = turnStart.messageId;
+      }
+      // Snapshot policy — co N=20 chunków LUB co T=2s zapisujemy `partialBlocks`
+      // do DB. Page reload mid-stream → klient czyta partial + kontynuuje SSE.
+      let chunksSinceSnapshot = 0;
+      let lastSnapshotAt = Date.now();
+      const SNAPSHOT_EVERY_N = 20;
+      const SNAPSHOT_EVERY_MS = 2000;
+      /** Aktualnie budowane bloki — accumulator dla snapshotu. */
+      const liveBlocks: AssistantContentBlock[] = [];
+      const liveToolCalls = new Map<
+        string,
+        { name: string; argumentsBuffer: string }
+      >();
+      const maybeSnapshot = async (force = false) => {
+        chunksSinceSnapshot += 1;
+        const due =
+          force ||
+          chunksSinceSnapshot >= SNAPSHOT_EVERY_N ||
+          Date.now() - lastSnapshotAt >= SNAPSHOT_EVERY_MS;
+        if (!due || !currentTurnId) return;
+        chunksSinceSnapshot = 0;
+        lastSnapshotAt = Date.now();
+        try {
+          await ctx.mutate(messageElement).saveProgressSnapshot({
+            messageId: currentTurnId,
+            partialBlocks: JSON.stringify(liveBlocks),
+            partialLastSeq: seqCounter,
+          });
+        } catch {
+          // snapshot best-effort — pojawi się przy kolejnym chunku
+        }
+      };
       const result = await provider.streamComplete(
         {
@@ -248,20 +319,76 @@ async function runGenerationLoop(config: RunLoopConfig) {
           conversation,
           tools: effectiveToolDefs,
           toolChoice,
+          // Skraca time-to-first-token dla gpt-5 / o-series — pomija reasoning
+          // step. Adaptery bez wsparcia ignorują.
+          reasoningEffort: "minimal",
         },
         (chunk) => {
-          if (chunk.type === "content_delta" && chunk.content) {
-            broadcast(sessionId, {
-              type: "content_delta",
-              sessionId,
-              content: chunk.content,
+          if (chunk.type === "text_delta" && chunk.textDelta) {
+            // accumulate w liveBlocks (last text block lub nowy)
+            const last = liveBlocks[liveBlocks.length - 1];
+            if (last && last.type === "text") {
+              last.text += chunk.textDelta;
+            } else {
+              liveBlocks.push({ type: "text", text: chunk.textDelta });
+            }
+            send({ type: "text_delta", textDelta: chunk.textDelta });
+            void maybeSnapshot();
+          } else if (chunk.type === "tool_call_started" && chunk.toolCallId) {
+            liveToolCalls.set(chunk.toolCallId, {
+              name: chunk.toolCallName ?? "",
+              argumentsBuffer: "",
             });
-          } else if (chunk.type === "usage_update") {
-            broadcast(sessionId, {
-              type: "usage_update",
-              sessionId,
-              usage: chunk.usage,
+            liveBlocks.push({
+              type: "tool_call",
+              id: chunk.toolCallId,
+              name: chunk.toolCallName ?? "",
+              arguments: {},
+            });
+            send({
+              type: "tool_call_pending",
+              toolCallId: chunk.toolCallId,
+              toolCallName: chunk.toolCallName,
+            });
+            void maybeSnapshot(true);
+          } else if (
+            chunk.type === "tool_call_arguments_delta" &&
+            chunk.toolCallId &&
+            chunk.argumentsDelta
+          ) {
+            const tc = liveToolCalls.get(chunk.toolCallId);
+            if (tc) tc.argumentsBuffer += chunk.argumentsDelta;
+            send({
+              type: "tool_call_arguments_delta",
+              toolCallId: chunk.toolCallId,
+              argumentsDelta: chunk.argumentsDelta,
             });
+          } else if (
+            chunk.type === "tool_call_arguments_complete" &&
+            chunk.toolCallId
+          ) {
+            // update accumulated block z complete args
+            const args = chunk.arguments ?? {};
+            const block = liveBlocks.find(
+              (b): b is Extract<AssistantContentBlock, { type: "tool_call" }> =>
+                b.type === "tool_call" && b.id === chunk.toolCallId,
+            );
+            if (block) block.arguments = args;
+            // toolCallName z liveBlocks (provider zna nazwę od tool_call_started)
+            // — bez tego klient pushuje tool z `toolName: ""` i nie znajduje
+            // viewComponent w toolsMap → fallback do generic ChatToolLog
+            // ("Wykonuję..."), AskQuestionsView nigdy nie mountuje się.
+            const toolCallName =
+              liveToolCalls.get(chunk.toolCallId)?.name ?? block?.name;
+            send({
+              type: "tool_call_arguments_complete",
+              toolCallId: chunk.toolCallId,
+              toolCallName,
+              arguments: args,
+            });
+            void maybeSnapshot(true);
+          } else if (chunk.type === "usage_update") {
+            send({ type: "usage_update", usage: chunk.usage });
           }
         },
       );
@@ -297,12 +424,12 @@ async function runGenerationLoop(config: RunLoopConfig) {
       currentTurnId = undefined;
       if (!hasToolCalls) {
-        broadcast(sessionId, {
+        send({
           type: "done",
-          sessionId,
           usage: result.usage,
           finishReason: result.finishReason,
           executionCount,
+          lastSeq: seqCounter,
         });
         endStream(sessionId);
         return;
@@ -316,10 +443,13 @@ async function runGenerationLoop(config: RunLoopConfig) {
       // Execute server tools — append each result to history as a separate turn
       const newToolResults: ConversationTurn[] = [];
       for (const tc of serverCalls) {
-        broadcast(sessionId, {
-          type: "server_tool_start",
-          sessionId,
-          toolCall: tc,
+        // `tool_call_pending` poszło już ze streamingu (przy `started`).
+        // Teraz `executing` po stronie servera.
+        send({
+          type: "tool_call_arguments_complete",
+          toolCallId: tc.id,
+          toolCallName: tc.name,
+          arguments: tc.arguments,
           executionCount,
         });
@@ -352,10 +482,10 @@ async function runGenerationLoop(config: RunLoopConfig) {
           isError,
         });
-        broadcast(sessionId, {
-          type: "server_tool_result",
-          sessionId,
-          toolCall: tc,
+        send({
+          type: "tool_call_executed",
+          toolCallId: tc.id,
+          toolCallName: tc.name,
           toolResult: {
             toolCallId: tc.id,
             name: tc.name,
@@ -378,9 +508,8 @@ async function runGenerationLoop(config: RunLoopConfig) {
       // The assistant turn (with the interactive tool_call) is already
       // persisted above. Listener B will resume.
       if (interactiveCalls.length > 0) {
-        broadcast(sessionId, {
+        send({
           type: "interactive_tool_request",
-          sessionId,
           toolCalls: interactiveCalls,
           executionCount,
         });
@@ -397,9 +526,8 @@ async function runGenerationLoop(config: RunLoopConfig) {
     }
   } catch (err) {
     const errorMsg = `AI error: ${err instanceof Error ? err.message : String(err)}`;
-    broadcast(sessionId, {
+    send({
       type: "error",
-      sessionId,
       error: errorMsg,
       executionCount,
     });
@@ -451,9 +579,11 @@ export function createAiGenerationListener(config: AiGenerationListenerConfig) {
         scopeId,
         content: userContent,
         model: modelName,
-      } = event.payload;
+        role,
+        assistantMessageId,
+      } = event.payload as any;
-      const model = modelName ?? "gpt-5.4-mini";
+      const model = modelName ?? "gpt-5";
       const provider = resolveProvider(model, scopeId);
       if (!provider) return;
@@ -483,6 +613,12 @@ export function createAiGenerationListener(config: AiGenerationListenerConfig) {
         maxExecutionCount,
         toolChoice: config.toolChoice,
         instruction,
+        // Pre-utworzony empty assistant row z mutacji `sendMessage`/
+        // `systemMessage`/`startStage` — pierwsza iteracja używa go zamiast
+        // wołać `startAssistantTurn`.
+        preCreatedAssistantMessageId: (
+          event.payload as { assistantMessageId?: string }
+        ).assistantMessageId,
       });
     });
 }
@@ -549,7 +685,7 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
       const lastAssistantRow = [...dbMessages]
         .reverse()
         .find((m: any) => m.role === "assistant" && m.model);
-      const model = lastAssistantRow?.model ?? "gpt-5.4-mini";
+      const model = lastAssistantRow?.model ?? "gpt-5";
       const provider = resolveProvider(model, scopeId);
       if (!provider) return;
@@ -572,6 +708,10 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
         maxExecutionCount,
         toolChoice: config.toolChoice,
         instruction,
+        // Pre-utworzony empty assistant row z mutacji `respondToTool`.
+        preCreatedAssistantMessageId: (
+          event.payload as { assistantMessageId?: string }
+        ).assistantMessageId,
       });
     });
 }