@arcote.tech/arc-chat 0.7.10 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,13 +3,17 @@ import { listener, type ArcContextElement, type ArcFunction } from "@arcote.tech
3
3
  import type {
4
4
  ArcToolAny,
5
5
  AssistantContentBlock,
6
- ChatStreamEvent,
7
6
  Conversation,
8
7
  ConversationTurn,
9
8
  LLMProvider,
10
9
  ToolCall,
11
10
  } from "@arcote.tech/arc-ai";
12
- import { broadcast, endStream } from "../streaming/stream-registry";
11
+ import {
12
+ finalize,
13
+ publish,
14
+ startStream,
15
+ type PublishableEvent,
16
+ } from "../streaming/stream-registry";
13
17
 
14
18
  // ─── Config ─────────────────────────────────────────────────────
15
19
 
@@ -24,6 +28,37 @@ export interface AiGenerationListenerConfig {
24
28
  allMutationElements: ArcContextElement<any>[];
25
29
  maxExecutionCount: number;
26
30
  toolChoice?: "auto" | "required" | { type: "function"; name: string };
31
+ /**
32
+ * Billing alias for this chat — written to the `usageRecorded` event
33
+ * payload so admin reports can attribute cost back to which chat (e.g.
34
+ * `"chat-identity"`, `"chat-create-content"`). Defaults to `name` if the
35
+ * builder didn't override via `.alias(...)`.
36
+ */
37
+ alias?: string;
38
+ /**
39
+ * Optional billing hook from `ai()` factory. Called after every
40
+ * `completeAssistantTurn` (each turn, including those that close with
41
+ * tool calls) so the credit ledger view sees consistent usage events.
42
+ * No-op when undefined (ai() built without `billing` config).
43
+ */
44
+ recordUsage?: (
45
+ ctx: any,
46
+ params: {
47
+ scopeId: string;
48
+ alias: string;
49
+ model: string;
50
+ usage: import("@arcote.tech/arc-ai").TokenUsage;
51
+ metadata?: Record<string, unknown>;
52
+ },
53
+ ) => Promise<void>;
54
+ /**
55
+ * Consumer-supplied function from chat-builder's `.billTo(...)` — maps
56
+ * decoded params of the chat's protection token (snapshotted at
57
+ * `messageSent` emit time) to the ledger scopeId we charge. Required when
58
+ * `recordUsage` is set (chat-builder enforces this at build time), so the
59
+ * listener can treat the pair as always-present in the call site.
60
+ */
61
+ billTo?: (tokenParams: Record<string, any>) => string;
27
62
  }
28
63
 
29
64
  // ─── History reconstruction ─────────────────────────────────────
@@ -125,7 +160,7 @@ async function buildInstructions(
125
160
  scopeId,
126
161
  };
127
162
  const result = await (instruction.handler as Function)(instructionCtx);
128
- if (typeof result === "string") return { prompt: result };
163
+ if (typeof result === "string") return result as unknown as InstructionResult;
129
164
  if (result && typeof result === "object" && "prompt" in result) return result as InstructionResult;
130
165
  return { prompt: "" };
131
166
  }
@@ -136,11 +171,6 @@ async function buildInstructions(
136
171
  * Decide whether to ask the provider for a continuation (delta) or send the
137
172
  * full conversation. Continuation is only used when the provider supports it
138
173
  * AND we have a known `responseId` to anchor the request.
139
- *
140
- * @param history Full conversation history including any new turns appended
141
- * for this call.
142
- * @param newTurnsStartIdx Index in `history` where "new" turns begin
143
- * (everything before is "already known" by the model).
144
174
  */
145
175
  function makeConversation(
146
176
  provider: LLMProvider,
@@ -190,11 +220,19 @@ interface RunLoopConfig {
190
220
  instruction?: ArcFunction<any>;
191
221
  /** ID pustego assistant row'a utworzonego synchronicznie w mutacji
192
222
  * triggerującej generację (`sendMessage`/`systemMessage`/`startStage`/
193
- * `respondToTool`). Listener używa go w PIERWSZEJ iteracji zamiast
194
- * wołać `startAssistantTurn`. Dzięki temu klient widzi assistant row
195
- * natychmiast po mutacji i otwiera SSE zanim chunki zaczną lecieć.
196
- * Następne iteracje (multi-turn po server tool exec) tworzą fresh rows. */
223
+ * `respondToTool`/`retryGeneration`). Listener używa go w PIERWSZEJ
224
+ * iteracji zamiast wołać `startAssistantTurn`. Dzięki temu klient widzi
225
+ * assistant row natychmiast po mutacji i otwiera SSE zanim chunki zaczną
226
+ * lecieć. Następne iteracje (multi-turn po server tool exec) tworzą fresh
227
+ * rows. */
197
228
  preCreatedAssistantMessageId?: string;
229
+ /** Billing alias — written to `usageRecorded` event payload. Optional;
230
+ * no-op when paired `recordUsage` is undefined. */
231
+ alias?: string;
232
+ /** Billing hook — see `AiGenerationListenerConfig.recordUsage`. */
233
+ recordUsage?: AiGenerationListenerConfig["recordUsage"];
234
+ /** Token-params → scopeId mapper from chat-builder `.billTo(...)`. */
235
+ billTo?: AiGenerationListenerConfig["billTo"];
198
236
  }
199
237
 
200
238
  async function runGenerationLoop(config: RunLoopConfig) {
@@ -213,36 +251,22 @@ async function runGenerationLoop(config: RunLoopConfig) {
213
251
  instruction,
214
252
  } = config;
215
253
 
216
- let history = config.history;
254
+ const history = config.history;
217
255
  let newTurnsStartIdx = config.initialNewTurnsStartIdx;
218
256
  let executionCount = 0;
219
257
  /** The in-progress assistant row for the CURRENT iteration. Pre-set z
220
- * `preCreatedAssistantMessageId` dla pierwszej iteracji (atomowo utworzony
221
- * w mutacji). Wartość `undefined` przy iteracjach 2+ → loop wywoła
222
- * `startAssistantTurn` jak wcześniej. Closed at the bottom via
223
- * `completeAssistantTurn`. Error handler używa do mark open turn jako
224
- * failed. */
258
+ * `preCreatedAssistantMessageId` dla pierwszej iteracji. Wartość
259
+ * `undefined` przy iteracjach 2+ → loop wywoła `startAssistantTurn`. */
225
260
  let currentTurnId: string | undefined = config.preCreatedAssistantMessageId;
226
261
  /** True gdy w bieżącej iteracji `currentTurnId` był pre-utworzony przez
227
262
  * mutację. Wtedy skipujemy ponowne `startAssistantTurn`. */
228
263
  let usingPreCreatedTurn = config.preCreatedAssistantMessageId != null;
229
- /** Monotonicznie rosnący sequence number na całą sesję — klient po stronie
230
- * React trzyma `lastSeq` i dedupuje. */
231
- let seqCounter = 0;
232
- /** Wrapper na broadcast — wstrzykuje seq + messageId (gdy znany). */
233
- const send = (
234
- evt: Omit<ChatStreamEvent, "seq" | "sessionId"> & {
235
- seq?: number;
236
- sessionId?: string;
237
- },
238
- ) => {
239
- seqCounter += 1;
240
- broadcast(sessionId, {
241
- ...evt,
242
- sessionId,
243
- seq: seqCounter,
244
- messageId: evt.messageId ?? currentTurnId,
245
- } as ChatStreamEvent);
264
+
265
+ /** Pushuje event do in-memory stream-registry per `currentTurnId`. Registry
266
+ * akumuluje `currentBlocks` i broadcast'uje do wszystkich subscriberów. */
267
+ const send = (event: PublishableEvent) => {
268
+ if (!currentTurnId) return;
269
+ publish(currentTurnId, event);
246
270
  };
247
271
 
248
272
  try {
@@ -266,7 +290,7 @@ async function runGenerationLoop(config: RunLoopConfig) {
266
290
 
267
291
  // Open a new in-progress assistant row before the stream starts. The
268
292
  // frontend detects `isGenerating: true` on this row and subscribes to
269
- // the SSE stream identified by `sessionId`.
293
+ // the per-messageId SSE stream.
270
294
  //
271
295
  // Pierwsza iteracja: row już utworzony w mutacji triggerującej (przez
272
296
  // `preCreatedAssistantMessageId`) → skipujemy. Kolejne iteracje
@@ -280,37 +304,11 @@ async function runGenerationLoop(config: RunLoopConfig) {
280
304
  currentTurnId = turnStart.messageId;
281
305
  }
282
306
 
283
- // Snapshot policy co N=20 chunków LUB co T=2s zapisujemy `partialBlocks`
284
- // do DB. Page reload mid-stream klient czyta partial + kontynuuje SSE.
285
- let chunksSinceSnapshot = 0;
286
- let lastSnapshotAt = Date.now();
287
- const SNAPSHOT_EVERY_N = 20;
288
- const SNAPSHOT_EVERY_MS = 2000;
289
- /** Aktualnie budowane bloki — accumulator dla snapshotu. */
290
- const liveBlocks: AssistantContentBlock[] = [];
291
- const liveToolCalls = new Map<
292
- string,
293
- { name: string; argumentsBuffer: string }
294
- >();
295
- const maybeSnapshot = async (force = false) => {
296
- chunksSinceSnapshot += 1;
297
- const due =
298
- force ||
299
- chunksSinceSnapshot >= SNAPSHOT_EVERY_N ||
300
- Date.now() - lastSnapshotAt >= SNAPSHOT_EVERY_MS;
301
- if (!due || !currentTurnId) return;
302
- chunksSinceSnapshot = 0;
303
- lastSnapshotAt = Date.now();
304
- try {
305
- await ctx.mutate(messageElement).saveProgressSnapshot({
306
- messageId: currentTurnId,
307
- partialBlocks: JSON.stringify(liveBlocks),
308
- partialLastSeq: seqCounter,
309
- });
310
- } catch {
311
- // snapshot best-effort — pojawi się przy kolejnym chunku
312
- }
313
- };
307
+ // Open the in-memory stream od teraz `subscribe(currentTurnId)`
308
+ // zwraca live SSE z `init` + zachowanymi chunkami. Idempotent: jeśli
309
+ // klient zdąży się zasubskrybować wcześniej (race), startStream nie
310
+ // robi nic.
311
+ startStream(currentTurnId!);
314
312
 
315
313
  const result = await provider.streamComplete(
316
314
  {
@@ -325,39 +323,18 @@ async function runGenerationLoop(config: RunLoopConfig) {
325
323
  },
326
324
  (chunk) => {
327
325
  if (chunk.type === "text_delta" && chunk.textDelta) {
328
- // accumulate w liveBlocks (last text block lub nowy)
329
- const last = liveBlocks[liveBlocks.length - 1];
330
- if (last && last.type === "text") {
331
- last.text += chunk.textDelta;
332
- } else {
333
- liveBlocks.push({ type: "text", text: chunk.textDelta });
334
- }
335
326
  send({ type: "text_delta", textDelta: chunk.textDelta });
336
- void maybeSnapshot();
337
327
  } else if (chunk.type === "tool_call_started" && chunk.toolCallId) {
338
- liveToolCalls.set(chunk.toolCallId, {
339
- name: chunk.toolCallName ?? "",
340
- argumentsBuffer: "",
341
- });
342
- liveBlocks.push({
343
- type: "tool_call",
344
- id: chunk.toolCallId,
345
- name: chunk.toolCallName ?? "",
346
- arguments: {},
347
- });
348
328
  send({
349
329
  type: "tool_call_pending",
350
330
  toolCallId: chunk.toolCallId,
351
331
  toolCallName: chunk.toolCallName,
352
332
  });
353
- void maybeSnapshot(true);
354
333
  } else if (
355
334
  chunk.type === "tool_call_arguments_delta" &&
356
335
  chunk.toolCallId &&
357
336
  chunk.argumentsDelta
358
337
  ) {
359
- const tc = liveToolCalls.get(chunk.toolCallId);
360
- if (tc) tc.argumentsBuffer += chunk.argumentsDelta;
361
338
  send({
362
339
  type: "tool_call_arguments_delta",
363
340
  toolCallId: chunk.toolCallId,
@@ -367,27 +344,13 @@ async function runGenerationLoop(config: RunLoopConfig) {
367
344
  chunk.type === "tool_call_arguments_complete" &&
368
345
  chunk.toolCallId
369
346
  ) {
370
- // update accumulated block z complete args
371
- const args = chunk.arguments ?? {};
372
- const block = liveBlocks.find(
373
- (b): b is Extract<AssistantContentBlock, { type: "tool_call" }> =>
374
- b.type === "tool_call" && b.id === chunk.toolCallId,
375
- );
376
- if (block) block.arguments = args;
377
- // toolCallName z liveBlocks (provider zna nazwę od tool_call_started)
378
- // — bez tego klient pushuje tool z `toolName: ""` i nie znajduje
379
- // viewComponent w toolsMap → fallback do generic ChatToolLog
380
- // ("Wykonuję..."), AskQuestionsView nigdy nie mountuje się.
381
- const toolCallName =
382
- liveToolCalls.get(chunk.toolCallId)?.name ?? block?.name;
383
347
  send({
384
348
  type: "tool_call_arguments_complete",
385
349
  toolCallId: chunk.toolCallId,
386
- toolCallName,
387
- arguments: args,
350
+ toolCallName: chunk.toolCallName,
351
+ arguments: chunk.arguments ?? {},
388
352
  });
389
- void maybeSnapshot(true);
390
- } else if (chunk.type === "usage_update") {
353
+ } else if (chunk.type === "usage_update" && chunk.usage) {
391
354
  send({ type: "usage_update", usage: chunk.usage });
392
355
  }
393
356
  },
@@ -412,47 +375,78 @@ async function runGenerationLoop(config: RunLoopConfig) {
412
375
  const hasToolCalls =
413
376
  result.finishReason === "tool_call" && toolCalls.length > 0;
414
377
 
415
- // Close the turn row — same row that was opened above. The final turn
416
- // (no tool calls) carries the usage; intermediate turns carry only the
417
- // blocks + responseId.
378
+ // Close the turn row — same row that was opened above. Final blocks
379
+ // are the SINGLE persistent write of message content in this turn.
380
+ // Usage zapisywane ZAWSZE (intermediate tool turns też zużywają tokeny
381
+ // i muszą być rozliczone w `recordUsage` poniżej; klient dostaje pełen
382
+ // history of cost per turn from message rows).
418
383
  await ctx.mutate(messageElement).completeAssistantTurn({
419
384
  messageId: currentTurnId!,
420
385
  blocks: JSON.stringify(result.blocks),
421
386
  previousResponseId: result.responseId,
422
- usage: hasToolCalls ? undefined : JSON.stringify(result.usage),
387
+ usage: JSON.stringify(result.usage),
423
388
  });
424
- currentTurnId = undefined;
425
389
 
426
- if (!hasToolCalls) {
427
- send({
428
- type: "done",
429
- usage: result.usage,
430
- finishReason: result.finishReason,
431
- executionCount,
432
- lastSeq: seqCounter,
433
- });
434
- endStream(sessionId);
435
- return;
390
+ // Billing hook — emit usageRecorded event for the credit ledger view.
391
+ // Called after `completeAssistantTurn` so the message row exists in DB
392
+ // before its cost is attributed.
393
+ //
394
+ // chat-builder enforces `.billTo()` is present whenever `recordUsage`
395
+ // is wired, so the only legitimate "skip" path is "billing not
396
+ // configured at all" — both undefined. We still guard defensively.
397
+ if (config.recordUsage && config.alias && config.billTo) {
398
+ const billingScopeId = config.billTo(
399
+ ((ctx as any).$auth?.params as Record<string, any>) ?? {},
400
+ );
401
+ try {
402
+ await config.recordUsage(ctx, {
403
+ scopeId: billingScopeId,
404
+ alias: config.alias,
405
+ model,
406
+ usage: result.usage,
407
+ metadata: {
408
+ messageId: currentTurnId!,
409
+ sessionId,
410
+ turnIndex: executionCount,
411
+ chatScopeId: scopeId,
412
+ },
413
+ });
414
+ } catch (err) {
415
+ // Best-effort: billing failure shouldn't break generation.
416
+ console.error("[arc-chat] recordUsage failed:", err);
417
+ }
436
418
  }
437
419
 
420
+ // Tear down the in-memory stream: broadcast `done` do subscriberów,
421
+ // close controllery, drop registry entry po grace window. Klient z
422
+ // `done` flippuje isStreaming=false i renderuje final blocks z DB.
423
+ finalize(
424
+ currentTurnId!,
425
+ hasToolCalls
426
+ ? undefined
427
+ : {
428
+ usage: result.usage,
429
+ finishReason: result.finishReason,
430
+ executionCount,
431
+ },
432
+ );
433
+ currentTurnId = undefined;
434
+
435
+ if (!hasToolCalls) return;
436
+
438
437
  const serverCalls = toolCalls.filter((tc) => serverToolsMap.has(tc.name));
439
438
  const interactiveCalls = toolCalls.filter((tc) =>
440
439
  interactiveToolNames.has(tc.name),
441
440
  );
442
441
 
443
- // Execute server tools — append each result to history as a separate turn
442
+ // Execute server tools — append each result to history as a separate turn.
443
+ // Note: ten turn (`finalize`d powyżej) już jest zamknięty, więc kolejne
444
+ // `send()` byłyby no-opem. Server-tool execution emit'uje eventy poprzez
445
+ // mutację `saveToolResult` (która tworzy tool_result row w DB) — klient
446
+ // dostaje je via aggregate query update. Nie korzystamy ze stream-registry
447
+ // dla tool execution.
444
448
  const newToolResults: ConversationTurn[] = [];
445
449
  for (const tc of serverCalls) {
446
- // `tool_call_pending` poszło już ze streamingu (przy `started`).
447
- // Teraz `executing` po stronie servera.
448
- send({
449
- type: "tool_call_arguments_complete",
450
- toolCallId: tc.id,
451
- toolCallName: tc.name,
452
- arguments: tc.arguments,
453
- executionCount,
454
- });
455
-
456
450
  const tool = serverToolsMap.get(tc.name);
457
451
  let resultContent: string;
458
452
  let isError = false;
@@ -482,19 +476,6 @@ async function runGenerationLoop(config: RunLoopConfig) {
482
476
  isError,
483
477
  });
484
478
 
485
- send({
486
- type: "tool_call_executed",
487
- toolCallId: tc.id,
488
- toolCallName: tc.name,
489
- toolResult: {
490
- toolCallId: tc.id,
491
- name: tc.name,
492
- content: resultContent,
493
- isError,
494
- },
495
- executionCount,
496
- });
497
-
498
479
  newToolResults.push({
499
480
  role: "tool_result",
500
481
  toolCallId: tc.id,
@@ -505,16 +486,9 @@ async function runGenerationLoop(config: RunLoopConfig) {
505
486
  }
506
487
 
507
488
  // Interactive tools — stop the loop, wait for userResponded.
508
- // The assistant turn (with the interactive tool_call) is already
509
- // persisted above. Listener B will resume.
510
- if (interactiveCalls.length > 0) {
511
- send({
512
- type: "interactive_tool_request",
513
- toolCalls: interactiveCalls,
514
- executionCount,
515
- });
516
- return;
517
- }
489
+ // The assistant turn (with the interactive tool_call) was already
490
+ // finalized above. Listener B (resume) will create a fresh turn.
491
+ if (interactiveCalls.length > 0) return;
518
492
 
519
493
  // Append tool results to history; mark them as the "new turns" for the
520
494
  // next iteration's continuation request.
@@ -526,12 +500,12 @@ async function runGenerationLoop(config: RunLoopConfig) {
526
500
  }
527
501
  } catch (err) {
528
502
  const errorMsg = `AI error: ${err instanceof Error ? err.message : String(err)}`;
529
- send({
530
- type: "error",
531
- error: errorMsg,
532
- executionCount,
533
- });
534
503
  if (currentTurnId) {
504
+ publish(currentTurnId, {
505
+ type: "error",
506
+ error: errorMsg,
507
+ executionCount,
508
+ });
535
509
  try {
536
510
  await ctx.mutate(messageElement).completeAssistantTurn({
537
511
  messageId: currentTurnId,
@@ -539,8 +513,8 @@ async function runGenerationLoop(config: RunLoopConfig) {
539
513
  error: errorMsg,
540
514
  });
541
515
  } catch {}
516
+ finalize(currentTurnId, { error: errorMsg, executionCount });
542
517
  }
543
- endStream(sessionId);
544
518
  }
545
519
  }
546
520
 
@@ -579,8 +553,6 @@ export function createAiGenerationListener(config: AiGenerationListenerConfig) {
579
553
  scopeId,
580
554
  content: userContent,
581
555
  model: modelName,
582
- role,
583
- assistantMessageId,
584
556
  } = event.payload as any;
585
557
 
586
558
  const model = modelName ?? "gpt-5";
@@ -613,9 +585,9 @@ export function createAiGenerationListener(config: AiGenerationListenerConfig) {
613
585
  maxExecutionCount,
614
586
  toolChoice: config.toolChoice,
615
587
  instruction,
616
- // Pre-utworzony empty assistant row z mutacji `sendMessage`/
617
- // `systemMessage`/`startStage` — pierwsza iteracja używa go zamiast
618
- // wołać `startAssistantTurn`.
588
+ alias: config.alias,
589
+ recordUsage: config.recordUsage,
590
+ billTo: config.billTo,
619
591
  preCreatedAssistantMessageId: (
620
592
  event.payload as { assistantMessageId?: string }
621
593
  ).assistantMessageId,
@@ -653,13 +625,7 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
653
625
  .query([messageElement, ...allQueryElements])
654
626
  .mutate([messageElement, ...allMutationElements])
655
627
  .handle(async (ctx, event) => {
656
- const {
657
- sessionId,
658
- scopeId,
659
- toolCallId,
660
- toolName,
661
- content: toolResult,
662
- } = event.payload;
628
+ const { sessionId, scopeId, toolCallId } = event.payload;
663
629
 
664
630
  const dbMessages = await ctx
665
631
  .query(messageElement)
@@ -670,8 +636,6 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
670
636
  const history = buildHistory(dbMessages);
671
637
 
672
638
  // Compute "new turns start" — index of the just-arrived tool_result.
673
- // Anything before it is "already known" (assistant emitted the matching
674
- // tool_call earlier and OpenAI has it server-side).
675
639
  let newTurnsStartIdx = history.length;
676
640
  for (let i = history.length - 1; i >= 0; i--) {
677
641
  const t = history[i];
@@ -681,7 +645,6 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
681
645
  }
682
646
  }
683
647
 
684
- // Determine the model from the most recent assistant row in DB
685
648
  const lastAssistantRow = [...dbMessages]
686
649
  .reverse()
687
650
  .find((m: any) => m.role === "assistant" && m.model);
@@ -690,9 +653,6 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
690
653
  const provider = resolveProvider(model, scopeId);
691
654
  if (!provider) return;
692
655
 
693
- void toolName;
694
- void toolResult;
695
-
696
656
  await runGenerationLoop({
697
657
  ctx,
698
658
  messageElement,
@@ -708,10 +668,105 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
708
668
  maxExecutionCount,
709
669
  toolChoice: config.toolChoice,
710
670
  instruction,
711
- // Pre-utworzony empty assistant row z mutacji `respondToTool`.
671
+ alias: config.alias,
672
+ recordUsage: config.recordUsage,
673
+ billTo: config.billTo,
712
674
  preCreatedAssistantMessageId: (
713
675
  event.payload as { assistantMessageId?: string }
714
676
  ).assistantMessageId,
715
677
  });
716
678
  });
717
679
  }
680
+
681
+ // ─── Listener C: retryRequested → AI retry ──────────────────────
682
+
683
+ /**
684
+ * Reaguje na `retryRequested` emit'owany przez mutację `retryGeneration`
685
+ * (klient klika Retry po SSE 410). Interrupted assistant row jest już
686
+ * usunięty z DB przez projection; w event payload mamy fresh
687
+ * `preCreatedAssistantMessageId`. Listener buduje historię z DB (kończy
688
+ * się na ostatniej user message, bo fresh assistant ma `isGenerating=true`
689
+ * bez `blocks` → skip'owany przez `buildHistory`) i odpala generation loop.
690
+ */
691
+ export function createAiRetryListener(config: AiGenerationListenerConfig) {
692
+ const {
693
+ name,
694
+ messageElement,
695
+ resolveProvider,
696
+ instruction,
697
+ serverTools,
698
+ interactiveTools,
699
+ allQueryElements,
700
+ allMutationElements,
701
+ maxExecutionCount,
702
+ } = config;
703
+
704
+ const retryRequestedEvent = messageElement.getEvent("retryRequested");
705
+ const serverToolsMap = new Map(serverTools.map((t) => [t.name, t]));
706
+ const interactiveToolNames = new Set(interactiveTools.map((t) => t.name));
707
+ const allToolsForLLM = [...serverTools, ...interactiveTools];
708
+ const toolDefs =
709
+ allToolsForLLM.length > 0
710
+ ? allToolsForLLM.map((t) => t.toJsonSchema())
711
+ : undefined;
712
+
713
+ return listener(`${name}AiRetry`)
714
+ .listenTo([retryRequestedEvent])
715
+ .async()
716
+ .query([messageElement, ...allQueryElements])
717
+ .mutate([messageElement, ...allMutationElements])
718
+ .handle(async (ctx, event) => {
719
+ const {
720
+ messageId: assistantMsgId,
721
+ sessionId,
722
+ scopeId,
723
+ model: modelName,
724
+ } = event.payload as any;
725
+
726
+ const dbMessages = await ctx
727
+ .query(messageElement)
728
+ .getByScope({ scopeId });
729
+
730
+ const history = buildHistory(dbMessages);
731
+
732
+ // Find the last user turn — that's the boundary for "new turns" so the
733
+ // continuation request only sends it as delta. Pre-existing assistant
734
+ // turns w historii zatrzymują się przed nią.
735
+ let newTurnsStartIdx = history.length;
736
+ for (let i = history.length - 1; i >= 0; i--) {
737
+ if (history[i].role === "user") {
738
+ newTurnsStartIdx = i;
739
+ break;
740
+ }
741
+ }
742
+
743
+ const lastAssistantRow = [...dbMessages]
744
+ .reverse()
745
+ .find((m: any) => m.role === "assistant" && m.model);
746
+ const model = modelName ?? lastAssistantRow?.model ?? "gpt-5";
747
+
748
+ const provider = resolveProvider(model, scopeId);
749
+ if (!provider) return;
750
+
751
+ await runGenerationLoop({
752
+ ctx,
753
+ messageElement,
754
+ provider,
755
+ model,
756
+ history,
757
+ initialNewTurnsStartIdx: newTurnsStartIdx,
758
+ toolDefs,
759
+ serverToolsMap,
760
+ interactiveToolNames,
761
+ scopeId,
762
+ sessionId,
763
+ maxExecutionCount,
764
+ toolChoice: config.toolChoice,
765
+ instruction,
766
+ alias: config.alias,
767
+ recordUsage: config.recordUsage,
768
+ billTo: config.billTo,
769
+ preCreatedAssistantMessageId: assistantMsgId,
770
+ });
771
+ });
772
+ }