@arcote.tech/arc-chat 0.7.9 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,18 +7,29 @@ export function createChatStreamRoute(config: {
7
7
  userToken: Token;
8
8
  }) {
9
9
  return route(`${config.name}ChatStream`)
10
- .path(`/chat/${config.name}/stream/:streamId`)
10
+ .path(`/chat/${config.name}/stream/:messageId`)
11
11
  .protectBy(config.userToken, () => true)
12
12
  .handle({
13
- GET: async (_ctx, req: Request, params: Record<string, string>) => {
14
- // Klient po reload przekazuje `?afterSeq=N` z `partialLastSeq`
15
- // odczytanego z DB. Replay buffer pomija eventy już zaaplikowane.
16
- const url = new URL(req.url);
17
- const afterSeqRaw = url.searchParams.get("afterSeq");
18
- const afterSeq = afterSeqRaw ? Number.parseInt(afterSeqRaw, 10) || 0 : 0;
19
- const stream = subscribe(params.streamId, afterSeq);
20
-
21
- return new Response(stream, {
13
+ GET: async (_ctx, _req: Request, params: Record<string, string>) => {
14
+ const result = subscribe(params.messageId);
15
+ if (!result) {
16
+ // Brak in-memory streamu: generacja zakończona poza grace window
17
+ // ALBO proces zrestartował się mid-stream. Klient rozróżnia
18
+ // sytuacje po `isGenerating` w DB: jeśli row ma `isGenerating=false`
19
+ // użyj final `blocks`, jeśli `true` → "Generation interrupted"
20
+ // + retry.
21
+ return new Response(
22
+ JSON.stringify({
23
+ error: "stream_not_found",
24
+ messageId: params.messageId,
25
+ }),
26
+ {
27
+ status: 410,
28
+ headers: { "Content-Type": "application/json" },
29
+ },
30
+ );
31
+ }
32
+ return new Response(result.stream, {
22
33
  headers: {
23
34
  "Content-Type": "text/event-stream",
24
35
  "Cache-Control": "no-cache",
@@ -1,23 +1,46 @@
1
- import type { ChatStreamEvent } from "@arcote.tech/arc-ai";
1
+ import type {
2
+ AssistantContentBlock,
3
+ ChatStreamEvent,
4
+ FinishReason,
5
+ TokenUsage,
6
+ ToolCall,
7
+ ToolResult,
8
+ } from "@arcote.tech/arc-ai";
2
9
 
3
- // ─── ChatStreamManager per session SSE registry with replay buffer ───
10
+ // ─── Per-message in-memory stream registry ──────────────────────────
4
11
  //
5
- // Per-session state:
6
- // - `streams[sessionId]` live controllers currently subscribed
7
- // - `buffers[sessionId]` every event broadcast since the session started,
8
- // so a late subscriber (e.g. after a page refresh mid-generation) gets
9
- // the full prefix replayed before going live
10
- // - `keepAliveIntervals[sessionId]` — heartbeat ping interval
11
-
12
- const streams = new Map<string, Set<ReadableStreamDefaultController<Uint8Array>>>();
13
- const buffers = new Map<string, ChatStreamEvent[]>();
14
- const keepAliveIntervals = new Map<string, ReturnType<typeof setInterval>>();
15
- const encoder = new TextEncoder();
12
+ // Live state of an in-progress assistant message lives ONLY here, never in
13
+ // DB. `currentBlocks` accumulates text + tool_call blocks as the LLM streams.
14
+ // Each `publish()` mutates it AND broadcasts the SSE event to subscribers.
15
+ // A new subscriber receives `{ type: "init", currentBlocks }` as the first
16
+ // event — that's the snapshot of the in-memory state — and then live events.
17
+ //
18
+ // After `finalize()` the stream is kept for FINALIZE_GRACE_MS so a client
19
+ // subscribing in that small window gets a `done` event instead of 410. After
20
+ // grace, the stream is dropped — the client falls back to the final `blocks`
21
+ // from DB via the normal aggregate query.
22
+ //
23
+ // Server restart / process crash → all in-memory state is lost. A client
24
+ // observing `isGenerating: true` in DB but getting 410 from `subscribe()`
25
+ // renders an "interrupted" state with a retry button.
26
+
27
+ interface MessageStream {
28
+ messageId: string;
29
+ currentBlocks: AssistantContentBlock[];
30
+ toolCallsById: Map<
31
+ string,
32
+ Extract<AssistantContentBlock, { type: "tool_call" }>
33
+ >;
34
+ subscribers: Set<ReadableStreamDefaultController<Uint8Array>>;
35
+ keepAliveInterval?: ReturnType<typeof setInterval>;
36
+ finalized: boolean;
37
+ finalEvent?: ChatStreamEvent;
38
+ }
16
39
 
17
- /** Hard cap on per-session buffer size. Each typical generation produces a
18
- * few hundred chunks; 5000 is generous but bounds memory if a stream
19
- * somehow runs without `endStream`. */
20
- const MAX_BUFFER = 5000;
40
+ const streams = new Map<string, MessageStream>();
41
+ const FINALIZE_GRACE_MS = 5_000;
42
+ const KEEPALIVE_INTERVAL_MS = 5_000;
43
+ const encoder = new TextEncoder();
21
44
 
22
45
  function encode(event: ChatStreamEvent): Uint8Array {
23
46
  return encoder.encode(`data: ${JSON.stringify(event)}\n\n`);
@@ -27,126 +50,237 @@ function encodePing(): Uint8Array {
27
50
  return encoder.encode(`: ping\n\n`);
28
51
  }
29
52
 
30
- export function broadcast(sessionId: string, event: ChatStreamEvent): void {
31
- // Append to the replay buffer first — even if no client is currently
32
- // subscribed (initial connect race) the event survives for replay.
33
- let buf = buffers.get(sessionId);
34
- if (!buf) {
35
- buf = [];
36
- buffers.set(sessionId, buf);
53
+ function ensureKeepAlive(s: MessageStream): void {
54
+ if (s.keepAliveInterval) return;
55
+ s.keepAliveInterval = setInterval(() => {
56
+ if (s.subscribers.size === 0) return;
57
+ const ping = encodePing();
58
+ for (const ctrl of s.subscribers) {
59
+ try {
60
+ ctrl.enqueue(ping);
61
+ } catch {
62
+ s.subscribers.delete(ctrl);
63
+ }
64
+ }
65
+ }, KEEPALIVE_INTERVAL_MS);
66
+ }
67
+
68
+ /**
69
+ * Open an in-memory stream for `messageId`. Idempotent — safe to call when
70
+ * the assistant row was pre-created by a mutation (`preCreatedAssistantMessageId`).
71
+ *
72
+ * Listener calls this before the first `publish()`. From this point on
73
+ * `subscribe(messageId)` returns a live stream; before it, `subscribe()`
74
+ * returns null.
75
+ */
76
+ export function startStream(messageId: string): void {
77
+ if (streams.has(messageId)) return;
78
+ streams.set(messageId, {
79
+ messageId,
80
+ currentBlocks: [],
81
+ toolCallsById: new Map(),
82
+ subscribers: new Set(),
83
+ finalized: false,
84
+ });
85
+ }
86
+
87
+ /**
88
+ * Event shape that the listener pushes into the stream. `messageId` is
89
+ * injected by the registry — caller doesn't pass it.
90
+ */
91
+ export type PublishableEvent =
92
+ | { type: "text_delta"; textDelta: string }
93
+ | { type: "tool_call_pending"; toolCallId: string; toolCallName?: string }
94
+ | {
95
+ type: "tool_call_arguments_delta";
96
+ toolCallId: string;
97
+ argumentsDelta: string;
98
+ }
99
+ | {
100
+ type: "tool_call_arguments_complete";
101
+ toolCallId: string;
102
+ toolCallName?: string;
103
+ arguments: Record<string, unknown>;
104
+ executionCount?: number;
105
+ }
106
+ | {
107
+ type: "tool_call_executed";
108
+ toolCallId: string;
109
+ toolCallName: string;
110
+ toolResult: ToolResult;
111
+ executionCount?: number;
112
+ }
113
+ | {
114
+ type: "interactive_tool_request";
115
+ toolCalls: ToolCall[];
116
+ executionCount?: number;
117
+ }
118
+ | { type: "usage_update"; usage: TokenUsage }
119
+ | { type: "error"; error: string; executionCount?: number };
120
+
121
+ /**
122
+ * Mutate the in-memory `currentBlocks` for the relevant event types, then
123
+ * broadcast the event to all active subscribers as SSE.
124
+ *
125
+ * No-op if the stream has been finalized or never started (race with a
126
+ * client disconnect / listener teardown).
127
+ */
128
+ export function publish(messageId: string, event: PublishableEvent): void {
129
+ const s = streams.get(messageId);
130
+ if (!s || s.finalized) return;
131
+
132
+ switch (event.type) {
133
+ case "text_delta": {
134
+ const last = s.currentBlocks[s.currentBlocks.length - 1];
135
+ if (last && last.type === "text") {
136
+ last.text += event.textDelta;
137
+ } else {
138
+ s.currentBlocks.push({ type: "text", text: event.textDelta });
139
+ }
140
+ break;
141
+ }
142
+ case "tool_call_pending": {
143
+ const block: Extract<AssistantContentBlock, { type: "tool_call" }> = {
144
+ type: "tool_call",
145
+ id: event.toolCallId,
146
+ name: event.toolCallName ?? "",
147
+ arguments: {},
148
+ };
149
+ s.currentBlocks.push(block);
150
+ s.toolCallsById.set(event.toolCallId, block);
151
+ break;
152
+ }
153
+ case "tool_call_arguments_complete": {
154
+ const block = s.toolCallsById.get(event.toolCallId);
155
+ if (block) {
156
+ block.arguments = event.arguments;
157
+ if (event.toolCallName) block.name = event.toolCallName;
158
+ }
159
+ break;
160
+ }
161
+ // tool_call_arguments_delta, tool_call_executed, interactive_tool_request,
162
+ // usage_update, error — broadcast-only. They don't mutate `currentBlocks`:
163
+ // - args_delta is incremental JSON the listener replaces via args_complete
164
+ // - executed / interactive_tool_request relate to tool execution flow
165
+ // (tool_result rows exist separately in DB)
166
+ // - usage_update / error are transient signals to the UI
37
167
  }
38
- if (buf.length < MAX_BUFFER) buf.push(event);
39
168
 
40
- const controllers = streams.get(sessionId);
41
- if (!controllers || controllers.size === 0) return;
42
- const data = encode(event);
43
- for (const controller of controllers) {
169
+ const payload = { ...event, messageId } as ChatStreamEvent;
170
+ const data = encode(payload);
171
+ for (const ctrl of s.subscribers) {
44
172
  try {
45
- controller.enqueue(data);
173
+ ctrl.enqueue(data);
46
174
  } catch {
47
- controllers.delete(controller);
175
+ s.subscribers.delete(ctrl);
48
176
  }
49
177
  }
50
178
  }
51
179
 
52
- export function subscribe(
53
- sessionId: string,
54
- afterSeq = 0,
55
- ): ReadableStream<Uint8Array> {
56
- return new ReadableStream<Uint8Array>({
57
- async start(controller) {
58
- // Replay buffered events with seq > afterSeq. Klient po reload czyta
59
- // `partialLastSeq` z DB i przekazuje go jako afterSeq buffer pomija
60
- // już-zaaplikowane chunki (eliminuje duplikację typu "Dobrze Dobrze —").
61
- //
62
- // Co 10 eventów yield (`setTimeout(16)`) — bez tego cały bufor leci
63
- // w jednym chunku TCP, klient nie ma szansy zrobić reader.read() +
64
- // render między burstami → React batchuje setTimeline w jeden render,
65
- // streaming niewidoczny. 16ms ≈ rAF tick: 100 eventów = 160ms widocznego
66
- // streamu zamiast 0ms burst.
67
- const buf = buffers.get(sessionId);
68
- if (buf) {
69
- let count = 0;
70
- for (const e of buf) {
71
- if (e.seq <= afterSeq) continue;
72
- try {
73
- controller.enqueue(encode(e));
74
- } catch {
75
- return;
76
- }
77
- count++;
78
- if (count % 10 === 0) {
79
- await new Promise<void>((r) => setTimeout(r, 16));
80
- }
81
- }
82
- }
180
+ /**
181
+ * Open an SSE stream for a subscriber. Returns null if the stream has been
182
+ * dropped (server restart / past the FINALIZE_GRACE_MS window). The route
183
+ * handler maps null → HTTP 410, which signals the UI to show "Generation
184
+ * interrupted" + Retry.
185
+ *
186
+ * The stream starts with an `init` event carrying the current in-memory
187
+ * snapshot of `currentBlocks` so the subscriber sees what's already been
188
+ * generated. If the stream is already finalized within grace, the controller
189
+ * receives `init` then `done` then closes.
190
+ */
191
+ export function subscribe(messageId: string): {
192
+ stream: ReadableStream<Uint8Array>;
193
+ currentBlocks: AssistantContentBlock[];
194
+ } | null {
195
+ const s = streams.get(messageId);
196
+ if (!s) return null;
83
197
 
84
- let set = streams.get(sessionId);
85
- if (!set) {
86
- set = new Set();
87
- streams.set(sessionId, set);
88
- }
89
- set.add(controller);
90
-
91
- // Start keep-alive if not running
92
- if (!keepAliveIntervals.has(sessionId)) {
93
- const interval = setInterval(() => {
94
- const s = streams.get(sessionId);
95
- if (s && s.size > 0) {
96
- const ping = encodePing();
97
- for (const c of s) {
98
- try { c.enqueue(ping); } catch { s.delete(c); }
99
- }
100
- } else if (!buffers.has(sessionId)) {
101
- // Stream truly inactive: no live clients AND no buffer. Stop
102
- // pinging. We never proactively drop the buffer here — that
103
- // happens in `endStream` so a late re-subscribe still gets the
104
- // full replay.
105
- cleanup(sessionId);
106
- }
107
- }, 5000);
108
- keepAliveIntervals.set(sessionId, interval);
198
+ // Deep snapshot — the subscriber sees a stable `init` payload even if the
199
+ // listener mutates `currentBlocks` on a parallel publish.
200
+ const snapshot: AssistantContentBlock[] = JSON.parse(
201
+ JSON.stringify(s.currentBlocks),
202
+ );
203
+
204
+ const stream = new ReadableStream<Uint8Array>({
205
+ start(controller) {
206
+ controller.enqueue(
207
+ encode({
208
+ type: "init",
209
+ messageId,
210
+ currentBlocks: snapshot,
211
+ }),
212
+ );
213
+ if (s.finalized) {
214
+ if (s.finalEvent) controller.enqueue(encode(s.finalEvent));
215
+ try {
216
+ controller.close();
217
+ } catch {}
218
+ return;
109
219
  }
220
+ s.subscribers.add(controller);
221
+ ensureKeepAlive(s);
110
222
  },
111
223
  cancel() {
112
- // One client disconnected don't tear down session state. The buffer
113
- // and other subscribers (if any) remain.
224
+ // Subscriber disconnected. Garbage-collection of dead controllers
225
+ // happens in `publish()` via the enqueue-catch path.
114
226
  },
115
227
  });
228
+
229
+ return { stream, currentBlocks: snapshot };
116
230
  }
117
231
 
118
- /** Called by the AI generation listener when a turn finishes (success or
119
- * error). Closes all live SSE streams and drops the replay buffer. After
120
- * this, a fresh `subscribe(sessionId)` returns an empty stream the
121
- * client should fall back to reading the final `blocks` from DB. */
122
- export function endStream(sessionId: string): void {
123
- const controllers = streams.get(sessionId);
124
- if (controllers) {
125
- // seq dla `done` — przerzucamy ostatni z bufora lub 0 gdy pusto.
126
- const buf = buffers.get(sessionId);
127
- const lastSeq = buf && buf.length > 0 ? buf[buf.length - 1].seq : 0;
128
- const done = encode({ type: "done", sessionId, seq: lastSeq + 1 } as ChatStreamEvent);
129
- for (const controller of controllers) {
130
- try {
131
- controller.enqueue(done);
132
- controller.close();
133
- } catch {}
134
- }
232
+ /**
233
+ * Called by the listener after `completeAssistantTurn` returns. Broadcasts
234
+ * `done` to all live subscribers, closes the controllers, then drops the
235
+ * stream from the registry after FINALIZE_GRACE_MS.
236
+ */
237
+ export function finalize(
238
+ messageId: string,
239
+ finalDetails?: {
240
+ usage?: TokenUsage;
241
+ finishReason?: FinishReason;
242
+ error?: string;
243
+ executionCount?: number;
244
+ },
245
+ ): void {
246
+ const s = streams.get(messageId);
247
+ if (!s || s.finalized) return;
248
+ s.finalized = true;
249
+
250
+ const done: ChatStreamEvent = {
251
+ type: "done",
252
+ messageId,
253
+ ...finalDetails,
254
+ };
255
+ s.finalEvent = done;
256
+
257
+ const data = encode(done);
258
+ for (const ctrl of s.subscribers) {
259
+ try {
260
+ ctrl.enqueue(data);
261
+ ctrl.close();
262
+ } catch {}
263
+ }
264
+ s.subscribers.clear();
265
+ if (s.keepAliveInterval) {
266
+ clearInterval(s.keepAliveInterval);
267
+ s.keepAliveInterval = undefined;
135
268
  }
136
- cleanup(sessionId);
269
+
270
+ setTimeout(() => {
271
+ streams.delete(messageId);
272
+ }, FINALIZE_GRACE_MS);
137
273
  }
138
274
 
139
- export function hasActiveStream(sessionId: string): boolean {
140
- const s = streams.get(sessionId);
141
- return !!s && s.size > 0;
275
+ export function isActive(messageId: string): boolean {
276
+ const s = streams.get(messageId);
277
+ return !!s && !s.finalized;
142
278
  }
143
279
 
144
- function cleanup(sessionId: string): void {
145
- const interval = keepAliveIntervals.get(sessionId);
146
- if (interval) {
147
- clearInterval(interval);
148
- keepAliveIntervals.delete(sessionId);
149
- }
150
- streams.delete(sessionId);
151
- buffers.delete(sessionId);
280
+ /** Test/debug readonly snapshot of `currentBlocks` for a stream. */
281
+ export function getCurrentBlocks(
282
+ messageId: string,
283
+ ): AssistantContentBlock[] | null {
284
+ const s = streams.get(messageId);
285
+ return s ? s.currentBlocks : null;
152
286
  }
@@ -40,12 +40,15 @@ function AskQuestionsView({
40
40
  const { answerBelowLabel } = useChatLabels();
41
41
 
42
42
  useEffect(() => {
43
- if (calling) {
43
+ // `params.questions` może być undefined podczas streamingu tool args —
44
+ // partial JSON nie ma jeszcze tablicy. Czekamy aż streaming skończy
45
+ // wypełniać argumenty.
46
+ if (calling && Array.isArray(params.questions)) {
44
47
  const questions: Question[] = params.questions.map((q) => ({
45
48
  id: q.id,
46
49
  label: q.label,
47
50
  description: q.description,
48
- options: [...q.options],
51
+ options: [...(q.options ?? [])],
49
52
  }));
50
53
 
51
54
  registerInputOverride(