lumiverse-spindle-types 0.4.21 → 0.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lumiverse-spindle-types",
3
- "version": "0.4.21",
3
+ "version": "0.4.24",
4
4
  "types": "./src/index.ts",
5
5
  "keywords": [
6
6
  "lumiverse",
package/src/api.ts CHANGED
@@ -66,8 +66,67 @@ export interface GenerationRequestDTO {
66
66
  * is inferred from the extension owner and can be omitted.
67
67
  */
68
68
  userId?: string;
69
+ /**
70
+ * Optional `AbortSignal` to cancel an in-flight generation. When the
71
+ * signal fires, the upstream LLM HTTP request is torn down and the
72
+ * returned promise rejects with an `AbortError` (`err.name === "AbortError"`).
73
+ *
74
+ * The signal is consumed inside the extension worker and never crosses
75
+ * the host boundary — it is stripped before the RPC message is posted.
76
+ * The worker notifies the host via an internal `cancel_generation`
77
+ * message so the host can abort the in-flight request.
78
+ *
79
+ * @example
80
+ * ```ts
81
+ * const controller = new AbortController()
82
+ * const timer = setTimeout(() => controller.abort(), 10_000)
83
+ * try {
84
+ * const result = await spindle.generate.raw({
85
+ * provider: "openai",
86
+ * model: "gpt-4o-mini",
87
+ * messages: [{ role: "user", content: "hello" }],
88
+ * signal: controller.signal,
89
+ * })
90
+ * } catch (err) {
91
+ * if (err instanceof Error && err.name === "AbortError") {
92
+ * // user/timeout cancelled — not an error condition
93
+ * }
94
+ * } finally {
95
+ * clearTimeout(timer)
96
+ * }
97
+ * ```
98
+ */
99
+ signal?: AbortSignal;
69
100
  }
70
101
 
102
+ /**
103
+ * Streamed chunk yielded by `spindle.generate.rawStream()` and
104
+ * `spindle.generate.quietStream()`.
105
+ *
106
+ * The stream emits one or more `token` / `reasoning` chunks and then
107
+ * exactly one terminal `done` chunk carrying the aggregated response.
108
+ * If the stream fails or is aborted, the async generator rejects instead
109
+ * of emitting `done`.
110
+ */
111
+ export type StreamChunkDTO =
112
+ /** Incremental content token. */
113
+ | { type: "token"; token: string }
114
+ /** Incremental chain-of-thought / reasoning token. */
115
+ | { type: "reasoning"; token: string }
116
+ /** Terminal chunk — emitted exactly once, on successful completion. */
117
+ | {
118
+ type: "done";
119
+ content: string;
120
+ reasoning?: string;
121
+ finish_reason: string;
122
+ tool_calls?: ToolCallDTO[];
123
+ usage?: {
124
+ prompt_tokens: number;
125
+ completion_tokens: number;
126
+ total_tokens: number;
127
+ };
128
+ };
129
+
71
130
  export interface RequestInitDTO {
72
131
  method?: string;
73
132
  headers?: Record<string, string>;
@@ -924,6 +983,18 @@ export type WorkerToHost =
924
983
  | { type: "register_tool"; tool: ToolRegistrationDTO }
925
984
  | { type: "unregister_tool"; name: string }
926
985
  | { type: "request_generation"; requestId: string; input: GenerationRequestDTO }
986
+ /**
987
+ * Start a streaming generation. The host responds asynchronously with
988
+ * one or more `generation_stream_chunk` messages, terminating with a
989
+ * `done` chunk on success or a `generation_stream_error` on failure.
990
+ */
991
+ | { type: "request_generation_stream"; requestId: string; input: GenerationRequestDTO }
992
+ /**
993
+ * Cancel an in-flight generation started via `request_generation` or
994
+ * `request_generation_stream`. `requestId` matches the original request.
995
+ * The host aborts the upstream LLM fetch and responds with an `AbortError`.
996
+ */
997
+ | { type: "cancel_generation"; requestId: string }
927
998
  | { type: "storage_read"; requestId: string; path: string }
928
999
  | { type: "storage_write"; requestId: string; path: string; data: string }
929
1000
  | { type: "storage_read_binary"; requestId: string; path: string }
@@ -1243,4 +1314,16 @@ export type HostToWorker =
1243
1314
  commandId: string;
1244
1315
  context: SpindleCommandContextDTO;
1245
1316
  userId: string;
1246
- };
1317
+ }
1318
+ /**
1319
+ * One streamed chunk for a generation started via
1320
+ * `request_generation_stream`. Multiple `token` / `reasoning` chunks
1321
+ * may arrive, terminating with exactly one `done` chunk on success.
1322
+ */
1323
+ | { type: "generation_stream_chunk"; requestId: string; chunk: StreamChunkDTO }
1324
+ /**
1325
+ * Terminal failure for a generation started via
1326
+ * `request_generation_stream`. Mutually exclusive with the `done`
1327
+ * chunk in `generation_stream_chunk`. Aborts surface here too.
1328
+ */
1329
+ | { type: "generation_stream_error"; requestId: string; error: string };
package/src/index.ts CHANGED
@@ -20,6 +20,7 @@ export type {
20
20
  ToolSchemaDTO,
21
21
  ToolCallDTO,
22
22
  GenerationRequestDTO,
23
+ StreamChunkDTO,
23
24
  RequestInitDTO,
24
25
  ConnectionProfileDTO,
25
26
  PermissionDeniedDetail,
@@ -45,6 +45,7 @@ import type {
45
45
  GenerationStoppedPayloadDTO,
46
46
  GenerationObserver,
47
47
  MessageSwipedPayloadDTO,
48
+ StreamChunkDTO,
48
49
  } from "./api";
49
50
 
50
51
  /** The global `spindle` object available in backend extension workers */
@@ -102,11 +103,76 @@ export interface SpindleAPI {
102
103
  /** Unregister an LLM tool */
103
104
  unregisterTool(name: string): void;
104
105
 
105
- /** Generation helpers */
106
+ /**
107
+ * Generation helpers.
108
+ *
109
+ * All three entry points (`raw`, `quiet`, `batch`) accept a standard
110
+ * `AbortSignal` via `input.signal`. Aborting the signal tears down the
111
+ * upstream LLM HTTP request and rejects the returned promise with an
112
+ * `AbortError` (`err.name === "AbortError"`). This is the same pattern
113
+ * `fetch()` uses, so it composes with `AbortSignal.timeout()` and
114
+ * `AbortSignal.any([...])`.
115
+ *
116
+ * @example
117
+ * ```ts
118
+ * const controller = new AbortController()
119
+ * const result = spindle.generate.raw({
120
+ * provider: "openai",
121
+ * model: "gpt-4o-mini",
122
+ * messages,
123
+ * signal: controller.signal,
124
+ * })
125
+ * // Cancel from elsewhere — e.g. user closed the panel
126
+ * controller.abort()
127
+ * ```
128
+ */
106
129
  generate: {
107
130
  raw(input: GenerationRequestDTO): Promise<unknown>;
108
131
  quiet(input: GenerationRequestDTO): Promise<unknown>;
109
132
  batch(input: GenerationRequestDTO): Promise<unknown>;
133
+ /**
134
+ * Streaming variant of {@link raw}. Returns an async generator that
135
+ * yields incremental {@link StreamChunkDTO} values:
136
+ *
137
+ * - `{ type: 'token', token }` — content chunk
138
+ * - `{ type: 'reasoning', token }` — chain-of-thought chunk
139
+ * - `{ type: 'done', ... }` — final aggregated response (emitted exactly once)
140
+ *
141
+ * Tool-call deltas, finish reason, and token usage live on the terminal
142
+ * `done` chunk. If the upstream call fails or the request is aborted
143
+ * via `input.signal`, the generator rejects with the underlying error
144
+ * (`AbortError` for cancellations).
145
+ *
146
+ * @example
147
+ * ```ts
148
+ * const ctrl = new AbortController()
149
+ * setTimeout(() => ctrl.abort(), 30_000)
150
+ * try {
151
+ * for await (const chunk of spindle.generate.rawStream({
152
+ * provider: 'openai',
153
+ * model: 'gpt-4o-mini',
154
+ * messages,
155
+ * signal: ctrl.signal,
156
+ * })) {
157
+ * if (chunk.type === 'token') process.stdout.write(chunk.token)
158
+ * else if (chunk.type === 'done') usage = chunk.usage
159
+ * }
160
+ * } catch (err) {
161
+ * if (err instanceof Error && err.name === 'AbortError') return
162
+ * throw err
163
+ * }
164
+ * ```
165
+ */
166
+ rawStream(input: GenerationRequestDTO): AsyncGenerator<StreamChunkDTO, void, void>;
167
+ /**
168
+ * Streaming variant of {@link quiet}. Same chunk schema and abort
169
+ * semantics as {@link rawStream}.
170
+ *
171
+ * Note: streaming is not exposed for `batch` — compose multiple
172
+ * `rawStream` / `quietStream` calls yourself if you need parallel
173
+ * streamed responses.
174
+ */
175
+ quietStream(input: GenerationRequestDTO): AsyncGenerator<StreamChunkDTO, void, void>;
110
176
  /** Run a dry-run prompt assembly without calling the LLM. */
111
177
  dryRun(input: DryRunRequestDTO, userId?: string): Promise<DryRunResultDTO>;
112
178
  /**