npm - @arcote.tech/arc-chat - Versions diffs - 0.7.10 → 0.7.12 - Mend

@arcote.tech/arc-chat 0.7.10 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +258 -0
package/package.json +7 -7
package/src/aggregates/message.ts +74 -58
package/src/chat-builder.ts +80 -6
package/src/index.ts +14 -2
package/src/listeners/ai-generation-listener.ts +234 -179
package/src/react/chat-component.tsx +241 -204
package/src/routes/chat-stream-route.ts +21 -10
package/src/streaming/stream-registry.ts +252 -118

package/src/listeners/ai-generation-listener.ts CHANGED Viewed

@@ -3,13 +3,17 @@ import { listener, type ArcContextElement, type ArcFunction } from "@arcote.tech
 import type {
   ArcToolAny,
   AssistantContentBlock,
-  ChatStreamEvent,
   Conversation,
   ConversationTurn,
   LLMProvider,
   ToolCall,
 } from "@arcote.tech/arc-ai";
-import { broadcast, endStream } from "../streaming/stream-registry";
+import {
+  finalize,
+  publish,
+  startStream,
+  type PublishableEvent,
+} from "../streaming/stream-registry";
 // ─── Config ─────────────────────────────────────────────────────
@@ -24,6 +28,37 @@ export interface AiGenerationListenerConfig {
   allMutationElements: ArcContextElement<any>[];
   maxExecutionCount: number;
   toolChoice?: "auto" | "required" | { type: "function"; name: string };
+  /**
+   * Billing alias for this chat — written to the `usageRecorded` event
+   * payload so admin reports can attribute cost back to which chat (e.g.
+   * `"chat-identity"`, `"chat-create-content"`). Defaults to `name` if the
+   * builder didn't override via `.alias(...)`.
+   */
+  alias?: string;
+  /**
+   * Optional billing hook from `ai()` factory. Called after every
+   * `completeAssistantTurn` (each turn, including those that close with
+   * tool calls) so the credit ledger view sees consistent usage events.
+   * No-op when undefined (ai() built without `billing` config).
+   */
+  recordUsage?: (
+    ctx: any,
+    params: {
+      scopeId: string;
+      alias: string;
+      model: string;
+      usage: import("@arcote.tech/arc-ai").TokenUsage;
+      metadata?: Record<string, unknown>;
+    },
+  ) => Promise<void>;
+  /**
+   * Consumer-supplied function from chat-builder's `.billTo(...)` — maps
+   * decoded params of the chat's protection token (snapshotted at
+   * `messageSent` emit time) to the ledger scopeId we charge. Required when
+   * `recordUsage` is set (chat-builder enforces this at build time), so the
+   * listener can treat the pair as always-present in the call site.
+   */
+  billTo?: (tokenParams: Record<string, any>) => string;
 }
 // ─── History reconstruction ─────────────────────────────────────
@@ -125,7 +160,7 @@ async function buildInstructions(
     scopeId,
   };
   const result = await (instruction.handler as Function)(instructionCtx);
-  if (typeof result === "string") return { prompt: result };
+  if (typeof result === "string") return result as unknown as InstructionResult;
   if (result && typeof result === "object" && "prompt" in result) return result as InstructionResult;
   return { prompt: "" };
 }
@@ -136,11 +171,6 @@ async function buildInstructions(
  * Decide whether to ask the provider for a continuation (delta) or send the
  * full conversation. Continuation is only used when the provider supports it
  * AND we have a known `responseId` to anchor the request.
- *
- * @param history    Full conversation history including any new turns appended
- *                   for this call.
- * @param newTurnsStartIdx  Index in `history` where "new" turns begin
- *                          (everything before is "already known" by the model).
  */
 function makeConversation(
   provider: LLMProvider,
@@ -190,11 +220,19 @@ interface RunLoopConfig {
   instruction?: ArcFunction<any>;
   /** ID pustego assistant row'a utworzonego synchronicznie w mutacji
    *  triggerującej generację (`sendMessage`/`systemMessage`/`startStage`/
-   *  `respondToTool`). Listener używa go w PIERWSZEJ iteracji zamiast
-   *  wołać `startAssistantTurn`. Dzięki temu klient widzi assistant row
-   *  natychmiast po mutacji i otwiera SSE zanim chunki zaczną lecieć.
-   *  Następne iteracje (multi-turn po server tool exec) tworzą fresh rows. */
+   *  `respondToTool`/`retryGeneration`). Listener używa go w PIERWSZEJ
+   *  iteracji zamiast wołać `startAssistantTurn`. Dzięki temu klient widzi
+   *  assistant row natychmiast po mutacji i otwiera SSE zanim chunki zaczną
+   *  lecieć. Następne iteracje (multi-turn po server tool exec) tworzą fresh
+   *  rows. */
   preCreatedAssistantMessageId?: string;
+  /** Billing alias — written to `usageRecorded` event payload. Optional;
+   *  no-op when paired `recordUsage` is undefined. */
+  alias?: string;
+  /** Billing hook — see `AiGenerationListenerConfig.recordUsage`. */
+  recordUsage?: AiGenerationListenerConfig["recordUsage"];
+  /** Token-params → scopeId mapper from chat-builder `.billTo(...)`. */
+  billTo?: AiGenerationListenerConfig["billTo"];
 }
 async function runGenerationLoop(config: RunLoopConfig) {
@@ -213,36 +251,22 @@ async function runGenerationLoop(config: RunLoopConfig) {
     instruction,
   } = config;
-  let history = config.history;
+  const history = config.history;
   let newTurnsStartIdx = config.initialNewTurnsStartIdx;
   let executionCount = 0;
   /** The in-progress assistant row for the CURRENT iteration. Pre-set z
-   *  `preCreatedAssistantMessageId` dla pierwszej iteracji (atomowo utworzony
-   *  w mutacji). Wartość `undefined` przy iteracjach 2+ → loop wywoła
-   *  `startAssistantTurn` jak wcześniej. Closed at the bottom via
-   *  `completeAssistantTurn`. Error handler używa do mark open turn jako
-   *  failed. */
+   *  `preCreatedAssistantMessageId` dla pierwszej iteracji. Wartość
+   *  `undefined` przy iteracjach 2+ → loop wywoła `startAssistantTurn`. */
   let currentTurnId: string | undefined = config.preCreatedAssistantMessageId;
   /** True gdy w bieżącej iteracji `currentTurnId` był pre-utworzony przez
    *  mutację. Wtedy skipujemy ponowne `startAssistantTurn`. */
   let usingPreCreatedTurn = config.preCreatedAssistantMessageId != null;
-  /** Monotonicznie rosnący sequence number na całą sesję — klient po stronie
-   *  React trzyma `lastSeq` i dedupuje. */
-  let seqCounter = 0;
-  /** Wrapper na broadcast — wstrzykuje seq + messageId (gdy znany). */
-  const send = (
-    evt: Omit<ChatStreamEvent, "seq" | "sessionId"> & {
-      seq?: number;
-      sessionId?: string;
-    },
-  ) => {
-    seqCounter += 1;
-    broadcast(sessionId, {
-      ...evt,
-      sessionId,
-      seq: seqCounter,
-      messageId: evt.messageId ?? currentTurnId,
-    } as ChatStreamEvent);
+  /** Pushuje event do in-memory stream-registry per `currentTurnId`. Registry
+   *  akumuluje `currentBlocks` i broadcast'uje do wszystkich subscriberów. */
+  const send = (event: PublishableEvent) => {
+    if (!currentTurnId) return;
+    publish(currentTurnId, event);
   };
   try {
@@ -266,7 +290,7 @@ async function runGenerationLoop(config: RunLoopConfig) {
       // Open a new in-progress assistant row before the stream starts. The
       // frontend detects `isGenerating: true` on this row and subscribes to
-      // the SSE stream identified by `sessionId`.
+      // the per-messageId SSE stream.
       //
       // Pierwsza iteracja: row już utworzony w mutacji triggerującej (przez
       // `preCreatedAssistantMessageId`) → skipujemy. Kolejne iteracje
@@ -280,37 +304,11 @@ async function runGenerationLoop(config: RunLoopConfig) {
         currentTurnId = turnStart.messageId;
       }
-      // Snapshot policy — co N=20 chunków LUB co T=2s zapisujemy `partialBlocks`
-      // do DB. Page reload mid-stream → klient czyta partial + kontynuuje SSE.
-      let chunksSinceSnapshot = 0;
-      let lastSnapshotAt = Date.now();
-      const SNAPSHOT_EVERY_N = 20;
-      const SNAPSHOT_EVERY_MS = 2000;
-      /** Aktualnie budowane bloki — accumulator dla snapshotu. */
-      const liveBlocks: AssistantContentBlock[] = [];
-      const liveToolCalls = new Map<
-        string,
-        { name: string; argumentsBuffer: string }
-      >();
-      const maybeSnapshot = async (force = false) => {
-        chunksSinceSnapshot += 1;
-        const due =
-          force ||
-          chunksSinceSnapshot >= SNAPSHOT_EVERY_N ||
-          Date.now() - lastSnapshotAt >= SNAPSHOT_EVERY_MS;
-        if (!due || !currentTurnId) return;
-        chunksSinceSnapshot = 0;
-        lastSnapshotAt = Date.now();
-        try {
-          await ctx.mutate(messageElement).saveProgressSnapshot({
-            messageId: currentTurnId,
-            partialBlocks: JSON.stringify(liveBlocks),
-            partialLastSeq: seqCounter,
-          });
-        } catch {
-          // snapshot best-effort — pojawi się przy kolejnym chunku
-        }
-      };
+      // Open the in-memory stream — od teraz `subscribe(currentTurnId)`
+      // zwraca live SSE z `init` + zachowanymi chunkami. Idempotent: jeśli
+      // klient zdąży się zasubskrybować wcześniej (race), startStream nie
+      // robi nic.
+      startStream(currentTurnId!);
       const result = await provider.streamComplete(
         {
@@ -325,39 +323,18 @@ async function runGenerationLoop(config: RunLoopConfig) {
         },
         (chunk) => {
           if (chunk.type === "text_delta" && chunk.textDelta) {
-            // accumulate w liveBlocks (last text block lub nowy)
-            const last = liveBlocks[liveBlocks.length - 1];
-            if (last && last.type === "text") {
-              last.text += chunk.textDelta;
-            } else {
-              liveBlocks.push({ type: "text", text: chunk.textDelta });
-            }
             send({ type: "text_delta", textDelta: chunk.textDelta });
-            void maybeSnapshot();
           } else if (chunk.type === "tool_call_started" && chunk.toolCallId) {
-            liveToolCalls.set(chunk.toolCallId, {
-              name: chunk.toolCallName ?? "",
-              argumentsBuffer: "",
-            });
-            liveBlocks.push({
-              type: "tool_call",
-              id: chunk.toolCallId,
-              name: chunk.toolCallName ?? "",
-              arguments: {},
-            });
             send({
               type: "tool_call_pending",
               toolCallId: chunk.toolCallId,
               toolCallName: chunk.toolCallName,
             });
-            void maybeSnapshot(true);
           } else if (
             chunk.type === "tool_call_arguments_delta" &&
             chunk.toolCallId &&
             chunk.argumentsDelta
           ) {
-            const tc = liveToolCalls.get(chunk.toolCallId);
-            if (tc) tc.argumentsBuffer += chunk.argumentsDelta;
             send({
               type: "tool_call_arguments_delta",
               toolCallId: chunk.toolCallId,
@@ -367,27 +344,13 @@ async function runGenerationLoop(config: RunLoopConfig) {
             chunk.type === "tool_call_arguments_complete" &&
             chunk.toolCallId
           ) {
-            // update accumulated block z complete args
-            const args = chunk.arguments ?? {};
-            const block = liveBlocks.find(
-              (b): b is Extract<AssistantContentBlock, { type: "tool_call" }> =>
-                b.type === "tool_call" && b.id === chunk.toolCallId,
-            );
-            if (block) block.arguments = args;
-            // toolCallName z liveBlocks (provider zna nazwę od tool_call_started)
-            // — bez tego klient pushuje tool z `toolName: ""` i nie znajduje
-            // viewComponent w toolsMap → fallback do generic ChatToolLog
-            // ("Wykonuję..."), AskQuestionsView nigdy nie mountuje się.
-            const toolCallName =
-              liveToolCalls.get(chunk.toolCallId)?.name ?? block?.name;
             send({
               type: "tool_call_arguments_complete",
               toolCallId: chunk.toolCallId,
-              toolCallName,
-              arguments: args,
+              toolCallName: chunk.toolCallName,
+              arguments: chunk.arguments ?? {},
             });
-            void maybeSnapshot(true);
-          } else if (chunk.type === "usage_update") {
+          } else if (chunk.type === "usage_update" && chunk.usage) {
             send({ type: "usage_update", usage: chunk.usage });
           }
         },
@@ -412,47 +375,78 @@ async function runGenerationLoop(config: RunLoopConfig) {
       const hasToolCalls =
         result.finishReason === "tool_call" && toolCalls.length > 0;
-      // Close the turn row — same row that was opened above. The final turn
-      // (no tool calls) carries the usage; intermediate turns carry only the
-      // blocks + responseId.
+      // Close the turn row — same row that was opened above. Final blocks
+      // are the SINGLE persistent write of message content in this turn.
+      // Usage zapisywane ZAWSZE (intermediate tool turns też zużywają tokeny
+      // i muszą być rozliczone w `recordUsage` poniżej; klient dostaje pełen
+      // history of cost per turn from message rows).
       await ctx.mutate(messageElement).completeAssistantTurn({
         messageId: currentTurnId!,
         blocks: JSON.stringify(result.blocks),
         previousResponseId: result.responseId,
-        usage: hasToolCalls ? undefined : JSON.stringify(result.usage),
+        usage: JSON.stringify(result.usage),
       });
-      currentTurnId = undefined;
-      if (!hasToolCalls) {
-        send({
-          type: "done",
-          usage: result.usage,
-          finishReason: result.finishReason,
-          executionCount,
-          lastSeq: seqCounter,
-        });
-        endStream(sessionId);
-        return;
+      // Billing hook — emit usageRecorded event for the credit ledger view.
+      // Called after `completeAssistantTurn` so the message row exists in DB
+      // before its cost is attributed.
+      //
+      // chat-builder enforces `.billTo()` is present whenever `recordUsage`
+      // is wired, so the only legitimate "skip" path is "billing not
+      // configured at all" — both undefined. We still guard defensively.
+      if (config.recordUsage && config.alias && config.billTo) {
+        const billingScopeId = config.billTo(
+          ((ctx as any).$auth?.params as Record<string, any>) ?? {},
+        );
+        try {
+          await config.recordUsage(ctx, {
+            scopeId: billingScopeId,
+            alias: config.alias,
+            model,
+            usage: result.usage,
+            metadata: {
+              messageId: currentTurnId!,
+              sessionId,
+              turnIndex: executionCount,
+              chatScopeId: scopeId,
+            },
+          });
+        } catch (err) {
+          // Best-effort: billing failure shouldn't break generation.
+          console.error("[arc-chat] recordUsage failed:", err);
+        }
       }
+      // Tear down the in-memory stream: broadcast `done` do subscriberów,
+      // close controllery, drop registry entry po grace window. Klient z
+      // `done` flippuje isStreaming=false i renderuje final blocks z DB.
+      finalize(
+        currentTurnId!,
+        hasToolCalls
+          ? undefined
+          : {
+              usage: result.usage,
+              finishReason: result.finishReason,
+              executionCount,
+            },
+      );
+      currentTurnId = undefined;
+      if (!hasToolCalls) return;
       const serverCalls = toolCalls.filter((tc) => serverToolsMap.has(tc.name));
       const interactiveCalls = toolCalls.filter((tc) =>
         interactiveToolNames.has(tc.name),
       );
-      // Execute server tools — append each result to history as a separate turn
+      // Execute server tools — append each result to history as a separate turn.
+      // Note: ten turn (`finalize`d powyżej) już jest zamknięty, więc kolejne
+      // `send()` byłyby no-opem. Server-tool execution emit'uje eventy poprzez
+      // mutację `saveToolResult` (która tworzy tool_result row w DB) — klient
+      // dostaje je via aggregate query update. Nie korzystamy ze stream-registry
+      // dla tool execution.
       const newToolResults: ConversationTurn[] = [];
       for (const tc of serverCalls) {
-        // `tool_call_pending` poszło już ze streamingu (przy `started`).
-        // Teraz `executing` po stronie servera.
-        send({
-          type: "tool_call_arguments_complete",
-          toolCallId: tc.id,
-          toolCallName: tc.name,
-          arguments: tc.arguments,
-          executionCount,
-        });
         const tool = serverToolsMap.get(tc.name);
         let resultContent: string;
         let isError = false;
@@ -482,19 +476,6 @@ async function runGenerationLoop(config: RunLoopConfig) {
           isError,
         });
-        send({
-          type: "tool_call_executed",
-          toolCallId: tc.id,
-          toolCallName: tc.name,
-          toolResult: {
-            toolCallId: tc.id,
-            name: tc.name,
-            content: resultContent,
-            isError,
-          },
-          executionCount,
-        });
         newToolResults.push({
           role: "tool_result",
           toolCallId: tc.id,
@@ -505,16 +486,9 @@ async function runGenerationLoop(config: RunLoopConfig) {
       }
       // Interactive tools — stop the loop, wait for userResponded.
-      // The assistant turn (with the interactive tool_call) is already
-      // persisted above. Listener B will resume.
-      if (interactiveCalls.length > 0) {
-        send({
-          type: "interactive_tool_request",
-          toolCalls: interactiveCalls,
-          executionCount,
-        });
-        return;
-      }
+      // The assistant turn (with the interactive tool_call) was already
+      // finalized above. Listener B (resume) will create a fresh turn.
+      if (interactiveCalls.length > 0) return;
       // Append tool results to history; mark them as the "new turns" for the
       // next iteration's continuation request.
@@ -526,12 +500,12 @@ async function runGenerationLoop(config: RunLoopConfig) {
     }
   } catch (err) {
     const errorMsg = `AI error: ${err instanceof Error ? err.message : String(err)}`;
-    send({
-      type: "error",
-      error: errorMsg,
-      executionCount,
-    });
     if (currentTurnId) {
+      publish(currentTurnId, {
+        type: "error",
+        error: errorMsg,
+        executionCount,
+      });
       try {
         await ctx.mutate(messageElement).completeAssistantTurn({
           messageId: currentTurnId,
@@ -539,8 +513,8 @@ async function runGenerationLoop(config: RunLoopConfig) {
           error: errorMsg,
         });
       } catch {}
+      finalize(currentTurnId, { error: errorMsg, executionCount });
     }
-    endStream(sessionId);
   }
 }
@@ -579,8 +553,6 @@ export function createAiGenerationListener(config: AiGenerationListenerConfig) {
         scopeId,
         content: userContent,
         model: modelName,
-        role,
-        assistantMessageId,
       } = event.payload as any;
       const model = modelName ?? "gpt-5";
@@ -613,9 +585,9 @@ export function createAiGenerationListener(config: AiGenerationListenerConfig) {
         maxExecutionCount,
         toolChoice: config.toolChoice,
         instruction,
-        // Pre-utworzony empty assistant row z mutacji `sendMessage`/
-        // `systemMessage`/`startStage` — pierwsza iteracja używa go zamiast
-        // wołać `startAssistantTurn`.
+        alias: config.alias,
+        recordUsage: config.recordUsage,
+        billTo: config.billTo,
         preCreatedAssistantMessageId: (
           event.payload as { assistantMessageId?: string }
         ).assistantMessageId,
@@ -653,13 +625,7 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
     .query([messageElement, ...allQueryElements])
     .mutate([messageElement, ...allMutationElements])
     .handle(async (ctx, event) => {
-      const {
-        sessionId,
-        scopeId,
-        toolCallId,
-        toolName,
-        content: toolResult,
-      } = event.payload;
+      const { sessionId, scopeId, toolCallId } = event.payload;
       const dbMessages = await ctx
         .query(messageElement)
@@ -670,8 +636,6 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
       const history = buildHistory(dbMessages);
       // Compute "new turns start" — index of the just-arrived tool_result.
-      // Anything before it is "already known" (assistant emitted the matching
-      // tool_call earlier and OpenAI has it server-side).
       let newTurnsStartIdx = history.length;
       for (let i = history.length - 1; i >= 0; i--) {
         const t = history[i];
@@ -681,7 +645,6 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
         }
       }
-      // Determine the model from the most recent assistant row in DB
       const lastAssistantRow = [...dbMessages]
         .reverse()
         .find((m: any) => m.role === "assistant" && m.model);
@@ -690,9 +653,6 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
       const provider = resolveProvider(model, scopeId);
       if (!provider) return;
-      void toolName;
-      void toolResult;
       await runGenerationLoop({
         ctx,
         messageElement,
@@ -708,10 +668,105 @@ export function createAiResumeListener(config: AiGenerationListenerConfig) {
         maxExecutionCount,
         toolChoice: config.toolChoice,
         instruction,
-        // Pre-utworzony empty assistant row z mutacji `respondToTool`.
+        alias: config.alias,
+        recordUsage: config.recordUsage,
+        billTo: config.billTo,
         preCreatedAssistantMessageId: (
           event.payload as { assistantMessageId?: string }
         ).assistantMessageId,
       });
     });
 }
+// ─── Listener C: retryRequested → AI retry ──────────────────────
+/**
+ * Reaguje na `retryRequested` emit'owany przez mutację `retryGeneration`
+ * (klient klika Retry po SSE 410). Interrupted assistant row jest już
+ * usunięty z DB przez projection; w event payload mamy fresh
+ * `preCreatedAssistantMessageId`. Listener buduje historię z DB (kończy
+ * się na ostatniej user message, bo fresh assistant ma `isGenerating=true`
+ * bez `blocks` → skip'owany przez `buildHistory`) i odpala generation loop.
+ */
+export function createAiRetryListener(config: AiGenerationListenerConfig) {
+  const {
+    name,
+    messageElement,
+    resolveProvider,
+    instruction,
+    serverTools,
+    interactiveTools,
+    allQueryElements,
+    allMutationElements,
+    maxExecutionCount,
+  } = config;
+  const retryRequestedEvent = messageElement.getEvent("retryRequested");
+  const serverToolsMap = new Map(serverTools.map((t) => [t.name, t]));
+  const interactiveToolNames = new Set(interactiveTools.map((t) => t.name));
+  const allToolsForLLM = [...serverTools, ...interactiveTools];
+  const toolDefs =
+    allToolsForLLM.length > 0
+      ? allToolsForLLM.map((t) => t.toJsonSchema())
+      : undefined;
+  return listener(`${name}AiRetry`)
+    .listenTo([retryRequestedEvent])
+    .async()
+    .query([messageElement, ...allQueryElements])
+    .mutate([messageElement, ...allMutationElements])
+    .handle(async (ctx, event) => {
+      const {
+        messageId: assistantMsgId,
+        sessionId,
+        scopeId,
+        model: modelName,
+      } = event.payload as any;
+      const dbMessages = await ctx
+        .query(messageElement)
+        .getByScope({ scopeId });
+      const history = buildHistory(dbMessages);
+      // Find the last user turn — that's the boundary for "new turns" so the
+      // continuation request only sends it as delta. Pre-existing assistant
+      // turns w historii zatrzymują się przed nią.
+      let newTurnsStartIdx = history.length;
+      for (let i = history.length - 1; i >= 0; i--) {
+        if (history[i].role === "user") {
+          newTurnsStartIdx = i;
+          break;
+        }
+      }
+      const lastAssistantRow = [...dbMessages]
+        .reverse()
+        .find((m: any) => m.role === "assistant" && m.model);
+      const model = modelName ?? lastAssistantRow?.model ?? "gpt-5";
+      const provider = resolveProvider(model, scopeId);
+      if (!provider) return;
+      await runGenerationLoop({
+        ctx,
+        messageElement,
+        provider,
+        model,
+        history,
+        initialNewTurnsStartIdx: newTurnsStartIdx,
+        toolDefs,
+        serverToolsMap,
+        interactiveToolNames,
+        scopeId,
+        sessionId,
+        maxExecutionCount,
+        toolChoice: config.toolChoice,
+        instruction,
+        alias: config.alias,
+        recordUsage: config.recordUsage,
+        billTo: config.billTo,
+        preCreatedAssistantMessageId: assistantMsgId,
+      });
+    });
+}