blazen 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -10,7 +10,7 @@ export type JsActiveWorkflowSnapshot = ActiveWorkflowSnapshot
10
10
  /** The result of an agent run. */
11
11
  export declare class AgentResult {
12
12
  /** The final completion response from the model. */
13
- get response(): JsCompletionResponse
13
+ get response(): JsModelResponse
14
14
  /** Full message history including all tool calls and results. */
15
15
  get messages(): Array<any>
16
16
  /** Number of tool-calling iterations that occurred. */
@@ -42,13 +42,13 @@ export declare class AnthropicProvider {
42
42
  /** Get the model ID. */
43
43
  get modelId(): string
44
44
  /** Perform a chat completion. */
45
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
45
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
46
46
  /** Perform a chat completion with additional options. */
47
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
47
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
48
48
  /** Stream a chat completion. */
49
49
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
50
50
  /** Stream a chat completion with additional options. */
51
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
51
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
52
52
  }
53
53
  export type JsAnthropicProvider = AnthropicProvider
54
54
 
@@ -113,6 +113,53 @@ export declare class AssignmentContext {
113
113
  }
114
114
  export type JsAssignmentContext = AssignmentContext
115
115
 
116
+ /**
117
+ * AudioGen text-to-SFX + text-to-music backend.
118
+ *
119
+ * Use the [`JsAudioGenBackend::create`] factory to construct an instance.
120
+ */
121
+ export declare class AudioGenBackend {
122
+ /** Construct an AudioGen backend handle. */
123
+ static create(options?: JsAudioGenOptions | undefined | null): AudioGenBackend
124
+ /** Backend identifier, always `"audiogen-medium"`. */
125
+ get modelId(): string
126
+ /**
127
+ * Generate music conditioned on `prompt`.
128
+ *
129
+ * # Errors
130
+ * Returns `MusicInvalidInputError` for empty prompts or non-positive
131
+ * / out-of-range durations, `MusicHfHubError` on weight-download
132
+ * failure, `MusicCandleError` on inference failure, or
133
+ * `MusicEngineNotAvailableError` when the engine feature was
134
+ * compiled out.
135
+ */
136
+ generateMusic(prompt: string, durationSeconds: number): Promise<JsMusicResult>
137
+ /**
138
+ * Generate sound-effect audio conditioned on `prompt`.
139
+ *
140
+ * # Errors
141
+ * Same surface as [`Self::generate_music`].
142
+ */
143
+ generateSfx(prompt: string, durationSeconds: number): Promise<JsMusicResult>
144
+ /**
145
+ * Stream music generation, invoking `onChunk` for each emitted
146
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
147
+ *
148
+ * # Errors
149
+ * Same surface as [`Self::generate_music`].
150
+ */
151
+ streamGenerateMusic(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
152
+ /**
153
+ * Stream SFX generation, invoking `onChunk` for each emitted
154
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
155
+ *
156
+ * # Errors
157
+ * Same surface as [`Self::generate_music`].
158
+ */
159
+ streamGenerateSfx(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
160
+ }
161
+ export type JsAudioGenBackend = AudioGenBackend
162
+
116
163
  export declare class AudioMusicProviderDefaults {
117
164
  /** Construct role-specific defaults. */
118
165
  constructor(base?: BaseProviderDefaults | undefined | null, before?: BeforeRoleTsfn | undefined | null)
@@ -151,13 +198,13 @@ export declare class AzureOpenAiProvider {
151
198
  /** Get the model ID. */
152
199
  get modelId(): string
153
200
  /** Perform a chat completion. */
154
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
201
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
155
202
  /** Perform a chat completion with additional options. */
156
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
203
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
157
204
  /** Stream a chat completion. */
158
205
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
159
206
  /** Stream a chat completion with additional options. */
160
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
207
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
161
208
  }
162
209
  export type JsAzureOpenAiProvider = AzureOpenAiProvider
163
210
 
@@ -193,118 +240,6 @@ export declare class BackgroundRemovalProviderDefaults {
193
240
  }
194
241
  export type JsBackgroundRemovalProviderDefaults = BackgroundRemovalProviderDefaults
195
242
 
196
- /**
197
- * A completion provider wrapper that applies a
198
- * [`JsCompletionProviderDefaults`] to every completion request before
199
- * delegating to the inner model.
200
- *
201
- * `BaseProvider` is intended to be subclassed from JavaScript:
202
- *
203
- * ```javascript
204
- * import { BaseProvider, CompletionModel } from "blazen";
205
- *
206
- * class TerseLlm extends BaseProvider {
207
- * constructor() {
208
- * const inner = CompletionModel.openai({ apiKey: "sk-..." });
209
- * super(inner);
210
- * this.withSystemPrompt("Be terse.");
211
- * }
212
- * }
213
- * ```
214
- *
215
- * Today (V1) the constructor stores an opaque reference to the inner
216
- * object — Phase D will wire `class extends` to fire the JS `complete`
217
- * override before falling back to the inner Rust model.
218
- */
219
- export declare class BaseProvider {
220
- /**
221
- * Construct a new [`BaseProvider`].
222
- *
223
- * `inner` is the underlying completion model — pass a
224
- * [`JsCompletionModel`] instance. JS subclasses that fully
225
- * override `complete` may pass `null` here (Phase D will wire
226
- * subclass dispatch end-to-end; today calls to `complete` on a
227
- * subclass-only provider report unsupported).
228
- *
229
- * `defaults` optionally seeds the
230
- * [`JsCompletionProviderDefaults`]; when omitted, an empty
231
- * defaults bag is created.
232
- */
233
- constructor(inner?: JsCompletionModel | undefined | null, defaults?: JsCompletionProviderDefaults | undefined | null)
234
- /**
235
- * Set the default system prompt prepended to requests when no
236
- * system message is already present.
237
- */
238
- withSystemPrompt(prompt: string): BaseProvider
239
- /** Replace the default tools appended to every completion request. */
240
- withTools(tools: Array<JsToolDefinition>): BaseProvider
241
- /** Set the default `responseFormat` (JSON Schema object). */
242
- withResponseFormat(format: any): BaseProvider
243
- /**
244
- * Set the universal `beforeRequest` hook (fires for any request
245
- * type). V1: stored only — Phase B wires dispatch.
246
- */
247
- withBeforeRequest(hook: BeforeRequestTsfn): BaseProvider
248
- /**
249
- * Set the typed `beforeCompletion` hook (fires after the universal
250
- * hook, with a typed completion request). V1: stored only — Phase
251
- * B wires dispatch.
252
- */
253
- withBeforeCompletion(hook: BeforeCompletionTsfn): BaseProvider
254
- /** Replace the entire defaults bag. */
255
- withDefaults(defaults: JsCompletionProviderDefaults): BaseProvider
256
- /** The currently-configured defaults. */
257
- get defaults(): JsCompletionProviderDefaults
258
- /**
259
- * The inner model's `modelId`. Returns the empty string when the
260
- * provider was constructed without a Rust-side `inner` (JS subclass
261
- * path).
262
- */
263
- get modelId(): string
264
- /**
265
- * The provider identifier used for logging. Defaults to the inner
266
- * model's `modelId` when present, otherwise `"base"`. Subclasses
267
- * may override.
268
- */
269
- get providerId(): string
270
- /**
271
- * Typed structured extraction.
272
- *
273
- * Sends a completion request with a JSON Schema `response_format`
274
- * envelope and parses the model's response as JSON. The schema
275
- * argument is a plain JSON Schema object (callers using zod can
276
- * convert with `zodToJsonSchema(zSchema)` from the `zod-to-json-schema`
277
- * package).
278
- *
279
- * The `response_format` is wired up as the `OpenAI`-style
280
- * `{"type":"json_schema","json_schema":{"name":"Extract","schema":...,"strict":true}}`
281
- * envelope; provider implementations that don't natively support
282
- * structured outputs fall back to a system-instruction shim (see
283
- * `crates/blazen-llm/src/providers/anthropic.rs::build_json_schema_system_instruction`).
284
- *
285
- * Returns the parsed JSON value. The TypeScript surface declares
286
- * the return as `any` because the schema shape is only known at
287
- * runtime; callers can narrow via TS generics on their wrapper.
288
- *
289
- * ```typescript
290
- * const schema = {
291
- * type: "object",
292
- * properties: {
293
- * name: { type: "string" },
294
- * age: { type: "integer" },
295
- * },
296
- * required: ["name", "age"],
297
- * };
298
- * const result = await provider.extract(schema, [
299
- * ChatMessage.user("My name is Alice and I am 30."),
300
- * ]);
301
- * // -> { name: "Alice", age: 30 }
302
- * ```
303
- */
304
- extract(schema: any, messages: Array<JsChatMessage>): Promise<any>
305
- }
306
- export type JsBaseProvider = BaseProvider
307
-
308
243
  /**
309
244
  * Universal provider defaults applicable to every provider role.
310
245
  *
@@ -372,7 +307,7 @@ export type JsBatchConfig = BatchConfig
372
307
  */
373
308
  export declare class BatchResult {
374
309
  /** One response per input request. `null` for failed requests. */
375
- get responses(): Array<JsCompletionResponse | undefined | null>
310
+ get responses(): Array<JsModelResponse | undefined | null>
376
311
  /** One error message per input request. `null` for successful requests. */
377
312
  get errors(): Array<string | undefined | null>
378
313
  /** Aggregated token usage across all successful responses. */
@@ -408,13 +343,13 @@ export declare class BedrockProvider {
408
343
  /** Get the model ID. */
409
344
  get modelId(): string
410
345
  /** Perform a chat completion. */
411
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
346
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
412
347
  /** Perform a chat completion with additional options. */
413
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
348
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
414
349
  /** Stream a chat completion. */
415
350
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
416
351
  /** Stream a chat completion with additional options. */
417
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
352
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
418
353
  }
419
354
  export type JsBedrockProvider = BedrockProvider
420
355
 
@@ -505,27 +440,27 @@ export type JsBytesWrapper = BytesWrapper
505
440
  * inner model.
506
441
  *
507
442
  * ```javascript
508
- * const cached = new CachedCompletionModel(
509
- * CompletionModel.openai(),
443
+ * const cached = new CachedModel(
444
+ * Model.openai(),
510
445
  * { ttlSeconds: 300, maxEntries: 1000 },
511
446
  * );
512
447
  * ```
513
448
  */
514
- export declare class CachedCompletionModel {
449
+ export declare class CachedModel {
515
450
  /** Wrap `model` with an in-memory response cache. */
516
- constructor(model: CompletionModel, config?: JsCacheConfig | undefined | null)
451
+ constructor(model: Model, config?: JsCacheConfig | undefined | null)
517
452
  /** The wrapped model's id. */
518
453
  get modelId(): string
519
454
  /**
520
455
  * Perform a chat completion, returning a cached response on a
521
456
  * hit and otherwise delegating to the inner model.
522
457
  */
523
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
458
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
524
459
  /**
525
460
  * Perform a chat completion with options. The full options object
526
461
  * is included in the cache key.
527
462
  */
528
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
463
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
529
464
  /**
530
465
  * Stream a chat completion. Streaming requests bypass the cache
531
466
  * entirely.
@@ -535,19 +470,19 @@ export declare class CachedCompletionModel {
535
470
  * Stream a chat completion with options. Streaming requests bypass
536
471
  * the cache entirely.
537
472
  */
538
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
473
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
539
474
  /**
540
- * Convert this cache wrapper into a plain [`JsCompletionModel`] so
475
+ * Convert this cache wrapper into a plain [`JsModel`] so
541
476
  * it can be passed to APIs that expect the base type.
542
477
  */
543
- toCompletionModel(): CompletionModel
478
+ toModel(): Model
544
479
  }
545
- export type JsCachedCompletionModel = CachedCompletionModel
480
+ export type JsCachedModel = CachedModel
546
481
 
547
482
  /**
548
483
  * Built-in middleware that wraps the inner model with an in-memory
549
484
  * response cache. Equivalent to constructing a
550
- * [`super::wrappers::JsCachedCompletionModel`] but composable inside a
485
+ * [`super::wrappers::JsCachedModel`] but composable inside a
551
486
  * [`JsMiddlewareStack`].
552
487
  *
553
488
  * ```javascript
@@ -659,9 +594,9 @@ export declare class CandleLlmProvider {
659
594
  /** Get the model ID. */
660
595
  get modelId(): string
661
596
  /** Perform a chat completion. */
662
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
597
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
663
598
  /** Perform a chat completion with additional options. */
664
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
599
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
665
600
  /** Stream a chat completion. */
666
601
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
667
602
  /** Explicitly load the model weights into memory / `VRAM`. */
@@ -879,7 +814,7 @@ export type JsCheckpointStore = CheckpointStore
879
814
  export declare class Citation {
880
815
  /**
881
816
  * Construct a citation. Most callers receive these via
882
- * `CompletionResponse.citations` rather than building them by hand.
817
+ * `ModelResponse.citations` rather than building them by hand.
883
818
  */
884
819
  constructor(options: CitationOptions)
885
820
  /** The cited URL. */
@@ -899,6 +834,21 @@ export declare class Citation {
899
834
  }
900
835
  export type JsCitationClass = Citation
901
836
 
837
+ /**
838
+ * Typed handle wrapping an `EnCodec` neural audio codec backend.
839
+ *
840
+ * Mirrors [`blazen_llm::CodecBackendHandle`]. Construct it directly to
841
+ * get a default `facebook/encodec_24khz` handle; weights load lazily on
842
+ * first encode/decode.
843
+ */
844
+ export declare class CodecBackendHandle {
845
+ /** Build a default-configured `EnCodec` codec backend handle. */
846
+ constructor()
847
+ /** The wrapped backend's stable identifier. */
848
+ get id(): string
849
+ }
850
+ export type JsCodecBackendHandle = CodecBackendHandle
851
+
902
852
  /** A Cohere chat completion provider. */
903
853
  export declare class CohereProvider {
904
854
  /** Create a new Cohere provider. */
@@ -912,409 +862,94 @@ export declare class CohereProvider {
912
862
  */
913
863
  static embeddingModel(options?: JsProviderOptions | undefined | null): JsOpenAiCompatEmbeddingModel
914
864
  /** Perform a chat completion. */
915
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
865
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
916
866
  /** Perform a chat completion with additional options. */
917
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
867
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
918
868
  /** Stream a chat completion. */
919
869
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
920
870
  /** Stream a chat completion with additional options. */
921
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
871
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
922
872
  }
923
873
  export type JsCohereProvider = CohereProvider
924
874
 
925
875
  /**
926
- * A chat completion model.
927
- *
928
- * Use the static factory methods to create an instance for your provider:
929
- *
930
- * ```javascript
931
- * const model = CompletionModel.openai({ apiKey: "sk-..." });
932
- * const response = await model.complete([
933
- * ChatMessage.user("What is 2 + 2?")
934
- * ]);
935
- * ```
936
- *
937
- * Or extend the class to implement a custom provider:
876
+ * Pluggable registry for multimodal content. Wraps
877
+ * [`Arc<dyn blazen_llm::content::ContentStore>`].
938
878
  *
939
- * ```javascript
940
- * class MyLLM extends CompletionModel {
941
- * constructor() {
942
- * super({ modelId: "my-custom-model" });
943
- * }
944
- * async complete(messages) { /* ... *\/ }
945
- * }
946
- * ```
879
+ * Construct via the static factories (e.g. `ContentStore.inMemory()`,
880
+ * `ContentStore.custom({ put, resolve, fetchBytes })`) or by extending
881
+ * `ContentStore` and overriding the async methods. Stores are cheap to
882
+ * clone internally an `Arc` — so passing the same instance across
883
+ * multiple agents / requests is fine.
947
884
  */
948
- export declare class CompletionModel {
885
+ export declare class ContentStore {
949
886
  /**
950
- * Construct a base `CompletionModel`.
951
- *
952
- * Called by JavaScript subclasses via `super(config)`. The `config`
953
- * parameter is optional and carries metadata such as `modelId`.
954
- *
955
- * Instances created this way have no inner Rust provider -- calling
956
- * `complete()` or `stream()` without overriding them in the subclass
957
- * will throw.
887
+ * Base-class constructor. Call from your subclass via `super()`.
888
+ * On its own, the base class is not useful — the default method
889
+ * implementations raise.
958
890
  */
959
- constructor(config?: CompletionModelConfig | undefined | null)
960
- /** Create an `OpenAI` completion model. */
961
- static openai(options?: JsProviderOptions | undefined | null): CompletionModel
962
- /** Create an Anthropic completion model. */
963
- static anthropic(options?: JsProviderOptions | undefined | null): CompletionModel
964
- /** Create a Google Gemini completion model. */
965
- static gemini(options?: JsProviderOptions | undefined | null): CompletionModel
966
- /** Create an Azure `OpenAI` completion model. */
967
- static azure(options: JsAzureOptions): CompletionModel
891
+ constructor()
892
+ /** Build a default ephemeral in-memory store. */
893
+ static inMemory(): ContentStore
894
+ /** Build a store backed by the `OpenAI` Files API. */
895
+ static openaiFiles(apiKey: string, baseUrl?: string | undefined | null): ContentStore
896
+ /** Build a store backed by the Anthropic Files API. */
897
+ static anthropicFiles(apiKey: string, baseUrl?: string | undefined | null): ContentStore
898
+ /** Build a store backed by the Gemini Files API. */
899
+ static geminiFiles(apiKey: string, baseUrl?: string | undefined | null): ContentStore
900
+ /** Build a store backed by fal.ai's storage API. */
901
+ static falStorage(apiKey: string, baseUrl?: string | undefined | null): ContentStore
968
902
  /**
969
- * Create a fal.ai completion model.
903
+ * Build a store backed by user-supplied async callbacks.
970
904
  *
971
- * `options` optionally configures the LLM model, endpoint family,
972
- * enterprise tier, and modality auto-routing. Defaults to the
973
- * OpenAI-compatible chat-completions endpoint.
974
- */
975
- static fal(options?: JsFalOptions | undefined | null): CompletionModel
976
- /** Create an `OpenRouter` completion model. */
977
- static openrouter(options?: JsProviderOptions | undefined | null): CompletionModel
978
- /** Create a Groq completion model. */
979
- static groq(options?: JsProviderOptions | undefined | null): CompletionModel
980
- /** Create a Together AI completion model. */
981
- static together(options?: JsProviderOptions | undefined | null): CompletionModel
982
- /** Create a Mistral AI completion model. */
983
- static mistral(options?: JsProviderOptions | undefined | null): CompletionModel
984
- /** Create a `DeepSeek` completion model. */
985
- static deepseek(options?: JsProviderOptions | undefined | null): CompletionModel
986
- /** Create a Fireworks AI completion model. */
987
- static fireworks(options?: JsProviderOptions | undefined | null): CompletionModel
988
- /** Create a Perplexity completion model. */
989
- static perplexity(options?: JsProviderOptions | undefined | null): CompletionModel
990
- /** Create an xAI (Grok) completion model. */
991
- static xai(options?: JsProviderOptions | undefined | null): CompletionModel
992
- /** Create a Cohere completion model. */
993
- static cohere(options?: JsProviderOptions | undefined | null): CompletionModel
994
- /** Create an AWS Bedrock completion model. */
995
- static bedrock(options: JsBedrockOptions): CompletionModel
996
- /**
997
- * Create a local Ollama completion model.
905
+ * Mirrors the Rust [`CustomContentStore::builder`] API. The
906
+ * `options` object must provide at least `put`, `resolve`, and
907
+ * `fetchBytes`; `fetchStream` and `delete` are optional. All
908
+ * callbacks must be `async` (or return a `Promise`).
998
909
  *
999
- * Talks to a running Ollama server (defaults to `http://host:port/v1`).
1000
- * No API key is required.
910
+ * Argument shapes seen by JS:
1001
911
  *
1002
- * ```javascript
1003
- * const model = CompletionModel.ollama("localhost", 11434, "llama3.1:8b");
1004
- * ```
912
+ * - `put(body, hint)`: `body` is a JSON-tagged
913
+ * [`ContentBody`] (`{type: "bytes", data: number[]}`,
914
+ * `{type: "url", url: string}`, `{type: "local_path", path: string}`,
915
+ * or `{type: "provider_file", provider: string, id: string}`).
916
+ * `hint` is a [`ContentHint`] dict (all fields optional). Must
917
+ * resolve with a [`ContentHandle`]-shaped object
918
+ * `{id, kind, mimeType?, byteSize?, displayName?}`.
919
+ * - `resolve(handle)`: `handle` is a [`ContentHandle`] dict. Must
920
+ * resolve with a serialized [`MediaSource`] object
921
+ * (e.g. `{type: "url", url: "..."}`).
922
+ * - `fetchBytes(handle)`: must resolve with a `Buffer`,
923
+ * `Uint8Array`, or `number[]` of bytes.
924
+ * - `fetchStream(handle)` (optional): may resolve with either bytes
925
+ * (`Buffer` / `Uint8Array` / `number[]` / base64 string) or an
926
+ * `AsyncIterable<Uint8Array>` for chunk-by-chunk streaming.
927
+ * - `delete(handle)` (optional): must resolve with `undefined`.
1005
928
  */
1006
- static ollama(host: string, port: number, model: string): CompletionModel
929
+ static custom(options: CustomContentStoreOptions): ContentStore
1007
930
  /**
1008
- * Create a local LM Studio completion model.
1009
- *
1010
- * Talks to a running LM Studio server's OpenAI-compatible endpoint.
931
+ * Persist content and return a freshly-issued handle.
1011
932
  *
1012
- * ```javascript
1013
- * const model = CompletionModel.lmStudio("localhost", 1234, "my-model");
1014
- * ```
933
+ * `body` is either:
934
+ * - a `Buffer` inline bytes uploaded to the store, or
935
+ * - a `string` — interpreted as a URL when it contains `"://"` (the
936
+ * store records the reference) and as a local filesystem path
937
+ * otherwise (the store reads or copies the file as needed).
1015
938
  */
1016
- static lmStudio(host: string, port: number, model: string): CompletionModel
939
+ put(body: Buffer | string, options: PutOptions): Promise<JsContentHandle>
1017
940
  /**
1018
- * Create a generic OpenAI-compatible completion model.
1019
- *
1020
- * Drives any OpenAI-compatible chat-completions endpoint with the
1021
- * supplied [`JsOpenAiCompatConfig`].
1022
- *
1023
- * ```javascript
1024
- * const model = CompletionModel.openaiCompat("my-host", {
1025
- * providerName: "my-host",
1026
- * baseUrl: "https://api.example.com/v1",
1027
- * apiKey: "sk-...",
1028
- * defaultModel: "my-model",
1029
- * });
1030
- * ```
941
+ * Resolve a handle to a wire-renderable [`MediaSource`] (returned as a
942
+ * JS object — the same JSON shape Blazen's request builders accept).
1031
943
  */
1032
- static openaiCompat(providerId: string, config: JsOpenAiCompatConfig): CompletionModel
944
+ resolve(handle: JsContentHandle): Promise<any>
1033
945
  /**
1034
- * Create a fully user-defined completion model backed by a JavaScript
1035
- * host object.
1036
- *
1037
- * `hostObject` must expose Blazen capability methods (e.g.
1038
- * `complete`, `stream`) using the camelCase trait-method names. The
1039
- * optional `providerId` is used for logging; defaults to `"custom"`.
1040
- *
1041
- * ```javascript
1042
- * class MyProvider {
1043
- * async complete(request) { /* ... *\/ }
1044
- * }
1045
- * const model = CompletionModel.custom(new MyProvider(), "my-provider");
1046
- * ```
1047
- */
1048
- static custom(hostObject: object, providerId?: string | undefined | null): CompletionModel
1049
- /** Get the model ID. */
1050
- get modelId(): string
1051
- /**
1052
- * Wrap this model with automatic retry on transient failures.
1053
- *
1054
- * ```javascript
1055
- * const model = CompletionModel.openrouter({ apiKey: key });
1056
- * const withRetry = model.withRetry({ maxRetries: 3, initialDelayMs: 1000 });
1057
- * ```
1058
- */
1059
- withRetry(config?: JsRetryConfig | undefined | null): CompletionModel
1060
- /**
1061
- * Wrap this model with an in-memory response cache.
1062
- *
1063
- * Streaming requests are never cached and always delegate directly to the
1064
- * underlying model.
1065
- *
1066
- * ```javascript
1067
- * const cached = model.withCache({ ttlSeconds: 300, maxEntries: 1000 });
1068
- * ```
1069
- */
1070
- withCache(config?: JsCacheConfig | undefined | null): CompletionModel
1071
- /**
1072
- * Create a fallback model that tries multiple providers in order.
1073
- *
1074
- * When the primary provider fails with a transient error (rate limit,
1075
- * timeout, server error) the request is automatically forwarded to the
1076
- * next provider. Non-retryable errors short-circuit immediately.
1077
- *
1078
- * ```javascript
1079
- * const model = CompletionModel.withFallback([modelA, modelB]);
1080
- * ```
1081
- */
1082
- static withFallback(models: Array<CompletionModel>): CompletionModel
1083
- /**
1084
- * Perform a chat completion.
1085
- *
1086
- * Messages should be an array of `ChatMessage` instances.
1087
- *
1088
- * Returns a typed response with `content`, `toolCalls`, `usage`, `model`,
1089
- * and `finishReason` fields.
1090
- */
1091
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
1092
- /**
1093
- * Perform a chat completion with additional options.
1094
- *
1095
- * Options object may include:
1096
- * - `temperature` (number): Sampling temperature (0.0 - 2.0)
1097
- * - `maxTokens` (number): Maximum tokens to generate
1098
- * - `topP` (number): Nucleus sampling parameter
1099
- * - `model` (string): Override the default model
1100
- * - `tools` (array): Tool definitions for function calling
1101
- */
1102
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
1103
- /**
1104
- * Stream a chat completion.
1105
- *
1106
- * The `onChunk` callback receives each chunk as a typed `StreamChunk` with
1107
- * `delta`, `finishReason`, and `toolCalls` fields.
1108
- *
1109
- * ```javascript
1110
- * await model.stream(
1111
- * [ChatMessage.user("Tell me a story")],
1112
- * (chunk) => { if (chunk.delta) process.stdout.write(chunk.delta); }
1113
- * );
1114
- * ```
1115
- */
1116
- stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
1117
- /**
1118
- * Stream a chat completion with additional options.
1119
- *
1120
- * Options object may include:
1121
- * - `temperature` (number): Sampling temperature (0.0 - 2.0)
1122
- * - `maxTokens` (number): Maximum tokens to generate
1123
- * - `topP` (number): Nucleus sampling parameter
1124
- * - `model` (string): Override the default model
1125
- * - `tools` (array): Tool definitions for function calling
1126
- */
1127
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
1128
- /**
1129
- * Explicitly load the model weights into memory / `VRAM`.
1130
- *
1131
- * For remote providers (`OpenAI`, Anthropic, fal, etc.) this throws --
1132
- * there is no local model to load. For local providers (mistral.rs,
1133
- * llama.cpp, candle) this triggers the download + load synchronously,
1134
- * so the next inference call does not pay the startup cost.
1135
- *
1136
- * Idempotent: calling `load` on an already-loaded model is a no-op
1137
- * that resolves immediately.
1138
- */
1139
- load(): Promise<void>
1140
- /**
1141
- * Drop the loaded model and free its memory / `VRAM`.
1142
- *
1143
- * For remote providers this throws. For local providers this frees
1144
- * `GPU` memory so the process can load a different model. Idempotent.
1145
- */
1146
- unload(): Promise<void>
1147
- /**
1148
- * Whether the model is currently loaded in memory / `VRAM`.
1149
- *
1150
- * Always returns `false` for remote providers (they have no local
1151
- * model to load). Returns the real state for local providers.
1152
- */
1153
- isLoaded(): Promise<boolean>
1154
- /**
1155
- * Approximate memory footprint in bytes (host RAM if the
1156
- * provider targets the CPU, GPU VRAM otherwise), if the
1157
- * implementation can report it. Returns `null` for remote
1158
- * providers or for local providers that do not expose memory
1159
- * usage.
1160
- *
1161
- * Note: napi-rs exposes this as a JS `number`. The underlying
1162
- * [`blazen_llm::LocalModel::memory_bytes`] returns `u64`; we clamp
1163
- * to `i64::MAX` (~9.2 exabytes) when surfacing through
1164
- * `JSON`-compatible types, which is effectively lossless for any
1165
- * realistic footprint.
1166
- */
1167
- memoryBytes(): Promise<number | null>
1168
- /**
1169
- * Create a local mistral.rs completion model.
1170
- *
1171
- * Runs LLM inference entirely on-device -- no API key required.
1172
- *
1173
- * ```javascript
1174
- * const model = CompletionModel.mistralrs({
1175
- * modelId: "mistralai/Mistral-7B-Instruct-v0.3",
1176
- * });
1177
- * ```
1178
- */
1179
- static mistralrs(options: JsMistralRsOptions): CompletionModel
1180
- /**
1181
- * Wrap this model in a [`TracingCompletionModel`] that emits a
1182
- * structured `tracing` span around every `complete` and `stream`
1183
- * call.
1184
- *
1185
- * `name` is recorded on the span as the `provider` field. It is
1186
- * leaked into a `&'static str` because the underlying span macro
1187
- * captures it by reference for the process lifetime; this is
1188
- * intentional and bounded by the small set of distinct provider
1189
- * names a typical application uses.
1190
- *
1191
- * ```javascript
1192
- * const traced = CompletionModel.openai({ apiKey }).withTracing("openai");
1193
- * ```
1194
- */
1195
- withTracing(name: string): CompletionModel
1196
- }
1197
- export type JsCompletionModel = CompletionModel
1198
-
1199
- /**
1200
- * Completion-role provider defaults: system prompt, default tools,
1201
- * `responseFormat`, and a typed `beforeCompletion` hook.
1202
- *
1203
- * ```javascript
1204
- * import { BaseProviderDefaults, CompletionProviderDefaults } from "blazen";
1205
- *
1206
- * const d = new CompletionProviderDefaults(
1207
- * new BaseProviderDefaults(),
1208
- * "Be terse.",
1209
- * [], // default tools
1210
- * { type: "json_object" },
1211
- * async (request) => { /* mutate request *\/ },
1212
- * );
1213
- * ```
1214
- */
1215
- export declare class CompletionProviderDefaults {
1216
- /** Construct completion-role defaults. */
1217
- constructor(base?: BaseProviderDefaults | undefined | null, systemPrompt?: string | undefined | null, tools?: Array<JsToolDefinition> | undefined | null, responseFormat?: any | undefined | null, beforeCompletion?: BeforeCompletionTsfn | undefined | null)
1218
- /**
1219
- * The system prompt prepended to requests when the request itself
1220
- * carries no system message.
1221
- */
1222
- get systemPrompt(): string | null
1223
- /** Replace the system prompt. Pass `null` to clear. */
1224
- set systemPrompt(value: string | undefined | null)
1225
- /** The default tools appended to every completion request. */
1226
- get tools(): Array<JsToolDefinition>
1227
- /** Replace the default tools. */
1228
- set tools(value: Array<JsToolDefinition> | undefined | null)
1229
- /** Default `response_format` (JSON Schema or similar object). */
1230
- get responseFormat(): any | null
1231
- /** Replace the default `responseFormat`. Pass `null` to clear. */
1232
- set responseFormat(value: any | undefined | null)
1233
- /** Returns `true` when a `beforeCompletion` hook is configured. */
1234
- get hasBeforeCompletion(): boolean
1235
- /** Replace the typed `beforeCompletion` hook. Pass `null` to clear. */
1236
- set beforeCompletion(hook: BeforeCompletionTsfn | undefined | null)
1237
- }
1238
- export type JsCompletionProviderDefaults = CompletionProviderDefaults
1239
-
1240
- /**
1241
- * Pluggable registry for multimodal content. Wraps
1242
- * [`Arc<dyn blazen_llm::content::ContentStore>`].
1243
- *
1244
- * Construct via the static factories (e.g. `ContentStore.inMemory()`,
1245
- * `ContentStore.custom({ put, resolve, fetchBytes })`) or by extending
1246
- * `ContentStore` and overriding the async methods. Stores are cheap to
1247
- * clone — internally an `Arc` — so passing the same instance across
1248
- * multiple agents / requests is fine.
1249
- */
1250
- export declare class ContentStore {
1251
- /**
1252
- * Base-class constructor. Call from your subclass via `super()`.
1253
- * On its own, the base class is not useful — the default method
1254
- * implementations raise.
1255
- */
1256
- constructor()
1257
- /** Build a default ephemeral in-memory store. */
1258
- static inMemory(): ContentStore
1259
- /** Build a store backed by the `OpenAI` Files API. */
1260
- static openaiFiles(apiKey: string, baseUrl?: string | undefined | null): ContentStore
1261
- /** Build a store backed by the Anthropic Files API. */
1262
- static anthropicFiles(apiKey: string, baseUrl?: string | undefined | null): ContentStore
1263
- /** Build a store backed by the Gemini Files API. */
1264
- static geminiFiles(apiKey: string, baseUrl?: string | undefined | null): ContentStore
1265
- /** Build a store backed by fal.ai's storage API. */
1266
- static falStorage(apiKey: string, baseUrl?: string | undefined | null): ContentStore
1267
- /**
1268
- * Build a store backed by user-supplied async callbacks.
1269
- *
1270
- * Mirrors the Rust [`CustomContentStore::builder`] API. The
1271
- * `options` object must provide at least `put`, `resolve`, and
1272
- * `fetchBytes`; `fetchStream` and `delete` are optional. All
1273
- * callbacks must be `async` (or return a `Promise`).
1274
- *
1275
- * Argument shapes seen by JS:
1276
- *
1277
- * - `put(body, hint)`: `body` is a JSON-tagged
1278
- * [`ContentBody`] (`{type: "bytes", data: number[]}`,
1279
- * `{type: "url", url: string}`, `{type: "local_path", path: string}`,
1280
- * or `{type: "provider_file", provider: string, id: string}`).
1281
- * `hint` is a [`ContentHint`] dict (all fields optional). Must
1282
- * resolve with a [`ContentHandle`]-shaped object
1283
- * `{id, kind, mimeType?, byteSize?, displayName?}`.
1284
- * - `resolve(handle)`: `handle` is a [`ContentHandle`] dict. Must
1285
- * resolve with a serialized [`MediaSource`] object
1286
- * (e.g. `{type: "url", url: "..."}`).
1287
- * - `fetchBytes(handle)`: must resolve with a `Buffer`,
1288
- * `Uint8Array`, or `number[]` of bytes.
1289
- * - `fetchStream(handle)` (optional): may resolve with either bytes
1290
- * (`Buffer` / `Uint8Array` / `number[]` / base64 string) or an
1291
- * `AsyncIterable<Uint8Array>` for chunk-by-chunk streaming.
1292
- * - `delete(handle)` (optional): must resolve with `undefined`.
1293
- */
1294
- static custom(options: CustomContentStoreOptions): ContentStore
1295
- /**
1296
- * Persist content and return a freshly-issued handle.
1297
- *
1298
- * `body` is either:
1299
- * - a `Buffer` — inline bytes uploaded to the store, or
1300
- * - a `string` — interpreted as a URL when it contains `"://"` (the
1301
- * store records the reference) and as a local filesystem path
1302
- * otherwise (the store reads or copies the file as needed).
1303
- */
1304
- put(body: Buffer | string, options: PutOptions): Promise<JsContentHandle>
1305
- /**
1306
- * Resolve a handle to a wire-renderable [`MediaSource`] (returned as a
1307
- * JS object — the same JSON shape Blazen's request builders accept).
1308
- */
1309
- resolve(handle: JsContentHandle): Promise<any>
1310
- /**
1311
- * Fetch raw bytes for a handle. Tools that need to operate on the
1312
- * actual content (parse a PDF, transcribe audio) call this; most tools
1313
- * reason over the handle and let `resolve` produce the wire form.
1314
- */
1315
- fetchBytes(handle: JsContentHandle): Promise<Buffer>
1316
- /**
1317
- * Stream raw bytes for a handle chunk-by-chunk.
946
+ * Fetch raw bytes for a handle. Tools that need to operate on the
947
+ * actual content (parse a PDF, transcribe audio) call this; most tools
948
+ * reason over the handle and let `resolve` produce the wire form.
949
+ */
950
+ fetchBytes(handle: JsContentHandle): Promise<Buffer>
951
+ /**
952
+ * Stream raw bytes for a handle chunk-by-chunk.
1318
953
  *
1319
954
  * Returns a `Promise<AsyncIterable<Uint8Array>>`. Each `next()` call on
1320
955
  * the iterator pulls one chunk from the underlying [`ByteStream`]; the
@@ -1373,7 +1008,7 @@ export declare class Context {
1373
1008
  * The event will be routed to any step whose `eventTypes` list includes
1374
1009
  * its event type. The event object must have a `type` field.
1375
1010
  */
1376
- sendEvent(event: any): Promise<void>
1011
+ sendEvent(event: Event): Promise<void>
1377
1012
  /**
1378
1013
  * Publish an event to the external broadcast stream.
1379
1014
  *
@@ -1381,7 +1016,7 @@ export declare class Context {
1381
1016
  * Unlike `sendEvent`, this does NOT route the event through the
1382
1017
  * internal step registry.
1383
1018
  */
1384
- writeEventToStream(event: any): Promise<void>
1019
+ writeEventToStream(event: Event): Promise<void>
1385
1020
  /**
1386
1021
  * Store raw binary data under the given key.
1387
1022
  *
@@ -1784,13 +1419,13 @@ export declare class DeepSeekProvider {
1784
1419
  /** Get the model ID. */
1785
1420
  get modelId(): string
1786
1421
  /** Perform a chat completion. */
1787
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
1422
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
1788
1423
  /** Perform a chat completion with additional options. */
1789
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
1424
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
1790
1425
  /** Stream a chat completion. */
1791
1426
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
1792
1427
  /** Stream a chat completion with additional options. */
1793
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
1428
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
1794
1429
  }
1795
1430
  export type JsDeepSeekProvider = DeepSeekProvider
1796
1431
 
@@ -1925,9 +1560,35 @@ export declare class EmbeddingModel {
1925
1560
  export type JsEmbeddingModel = EmbeddingModel
1926
1561
 
1927
1562
  /**
1928
- * Embedding-role provider defaults. V1 just wraps a
1929
- * [`JsBaseProviderDefaults`].
1930
- */
1563
+ * r" Base class for vector-embedding providers.
1564
+ * r"
1565
+ * r" Mirrors the [`blazen_llm::providers::EmbeddingProvider`] capability
1566
+ * r" trait. Subclass and override `embed()` to implement a custom embedding
1567
+ * r" backend.
1568
+ */
1569
+ export declare class EmbeddingProvider {
1570
+ constructor(config: CapabilityProviderConfig)
1571
+ /** The provider identifier. */
1572
+ get providerId(): string | null
1573
+ /** The base URL, if set. */
1574
+ get baseUrl(): string | null
1575
+ /**
1576
+ * Estimated memory footprint in bytes (host RAM if the
1577
+ * provider targets the CPU, GPU VRAM otherwise), if set.
1578
+ */
1579
+ get memoryEstimateBytes(): number | null
1580
+ /**
1581
+ * r" Embed a batch of texts. Receives an array of strings and returns
1582
+ * r" an array of float vectors (one per input).
1583
+ */
1584
+ embed(texts: any): Promise<any>
1585
+ }
1586
+ export type JsEmbeddingProvider = EmbeddingProvider
1587
+
1588
+ /**
1589
+ * Embedding-role provider defaults. V1 just wraps a
1590
+ * [`JsBaseProviderDefaults`].
1591
+ */
1931
1592
  export declare class EmbeddingProviderDefaults {
1932
1593
  /** Construct embedding-role defaults. */
1933
1594
  constructor(base?: BaseProviderDefaults | undefined | null)
@@ -2046,26 +1707,26 @@ export declare class FallbackModel {
2046
1707
  * degenerate -- prefer using the underlying model directly in that
2047
1708
  * case.
2048
1709
  */
2049
- constructor(models: Array<CompletionModel>)
1710
+ constructor(models: Array<Model>)
2050
1711
  /** The model id of the primary provider. */
2051
1712
  get modelId(): string
2052
1713
  /** Perform a chat completion, falling back across providers. */
2053
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
1714
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
2054
1715
  /** Perform a chat completion with options, falling back across providers. */
2055
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
1716
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
2056
1717
  /**
2057
1718
  * Stream a chat completion, falling back across providers on
2058
1719
  * retryable initial-stream failures.
2059
1720
  */
2060
1721
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
2061
1722
  /** Stream a chat completion with options. */
2062
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
1723
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
2063
1724
  /**
2064
- * Convert this fallback wrapper into a plain [`JsCompletionModel`]
1725
+ * Convert this fallback wrapper into a plain [`JsModel`]
2065
1726
  * so it can be passed to APIs that expect the base type
2066
1727
  * (`Agent`, `Batch`, further wrappers, etc.).
2067
1728
  */
2068
- toCompletionModel(): CompletionModel
1729
+ toModel(): Model
2069
1730
  }
2070
1731
  export type JsFallbackModel = FallbackModel
2071
1732
 
@@ -2141,7 +1802,7 @@ export declare class FalProvider {
2141
1802
  /** Alias for [`awaitCompletion`](Self::await_completion). */
2142
1803
  result(handle: JsJobHandle): Promise<JsComputeResult>
2143
1804
  /** Perform a chat completion via fal.ai's `any-llm` proxy. */
2144
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
1805
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
2145
1806
  }
2146
1807
  export type JsFalProvider = FalProvider
2147
1808
 
@@ -2183,6 +1844,26 @@ export declare class FastEmbedModel {
2183
1844
  }
2184
1845
  export type JsFastEmbedModel = FastEmbedModel
2185
1846
 
1847
+ /**
1848
+ * The faster-whisper STT backend.
1849
+ *
1850
+ * Mirrors [`blazen_llm::FasterWhisperBackend`]. Construct with an
1851
+ * optional [`FasterWhisperConfig`](JsFasterWhisperConfig); weights load
1852
+ * lazily on first transcription.
1853
+ */
1854
+ export declare class FasterWhisperBackend {
1855
+ /**
1856
+ * Build a faster-whisper backend. No weights are loaded until the
1857
+ * first transcription call.
1858
+ */
1859
+ constructor(config?: FasterWhisperConfig | undefined | null)
1860
+ /** The stable backend identifier (`faster-whisper:<model_id>`). */
1861
+ get id(): string
1862
+ /** Wrap this backend in a typed [`SttBackendHandle`]. */
1863
+ intoHandle(): JsSttBackendHandle
1864
+ }
1865
+ export type JsFasterWhisperBackend = FasterWhisperBackend
1866
+
2186
1867
  /** A Fireworks AI chat completion provider. */
2187
1868
  export declare class FireworksProvider {
2188
1869
  /** Create a new Fireworks provider. */
@@ -2196,13 +1877,13 @@ export declare class FireworksProvider {
2196
1877
  */
2197
1878
  static embeddingModel(options?: JsProviderOptions | undefined | null): OpenAiCompatEmbeddingModel
2198
1879
  /** Perform a chat completion. */
2199
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
1880
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
2200
1881
  /** Perform a chat completion with additional options. */
2201
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
1882
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
2202
1883
  /** Stream a chat completion. */
2203
1884
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
2204
1885
  /** Stream a chat completion with additional options. */
2205
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
1886
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
2206
1887
  }
2207
1888
  export type JsFireworksProvider = FireworksProvider
2208
1889
 
@@ -2220,13 +1901,13 @@ export declare class GeminiProvider {
2220
1901
  /** Get the model ID. */
2221
1902
  get modelId(): string
2222
1903
  /** Perform a chat completion. */
2223
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
1904
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
2224
1905
  /** Perform a chat completion with additional options. */
2225
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
1906
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
2226
1907
  /** Stream a chat completion. */
2227
1908
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
2228
1909
  /** Stream a chat completion with additional options. */
2229
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
1910
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
2230
1911
  }
2231
1912
  export type JsGeminiProvider = GeminiProvider
2232
1913
 
@@ -2237,13 +1918,13 @@ export declare class GroqProvider {
2237
1918
  /** Get the model ID. */
2238
1919
  get modelId(): string
2239
1920
  /** Perform a chat completion. */
2240
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
1921
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
2241
1922
  /** Perform a chat completion with additional options. */
2242
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
1923
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
2243
1924
  /** Stream a chat completion. */
2244
1925
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
2245
1926
  /** Stream a chat completion with additional options. */
2246
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
1927
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
2247
1928
  }
2248
1929
  export type JsGroqProvider = GroqProvider
2249
1930
 
@@ -2412,6 +2093,31 @@ export declare class ImageGenerationProviderDefaults {
2412
2093
  }
2413
2094
  export type JsImageGenerationProviderDefaults = ImageGenerationProviderDefaults
2414
2095
 
2096
+ /**
2097
+ * r" Base class for 2D image-generation providers.
2098
+ * r"
2099
+ * r" Mirrors the [`blazen_llm::providers::ImageGenProvider`] capability
2100
+ * r" trait. Subclass and override `generateImage()` (and optionally
2101
+ * r" `upscaleImage()`) to implement a custom image backend.
2102
+ */
2103
+ export declare class ImageGenProvider {
2104
+ constructor(config: CapabilityProviderConfig)
2105
+ /** The provider identifier. */
2106
+ get providerId(): string | null
2107
+ /** The base URL, if set. */
2108
+ get baseUrl(): string | null
2109
+ /**
2110
+ * Estimated memory footprint in bytes (host RAM if the
2111
+ * provider targets the CPU, GPU VRAM otherwise), if set.
2112
+ */
2113
+ get memoryEstimateBytes(): number | null
2114
+ /** r" Generate images from a text prompt. */
2115
+ generateImage(request: any): Promise<any>
2116
+ /** r" Upscale an existing image. */
2117
+ upscaleImage(request: any): Promise<any>
2118
+ }
2119
+ export type JsImageGenProvider = ImageGenProvider
2120
+
2415
2121
  /**
2416
2122
  * Base class for custom image-generation providers.
2417
2123
  *
@@ -2689,6 +2395,26 @@ export declare class JsonlBackend {
2689
2395
  }
2690
2396
  export type JsJsonlBackend = JsonlBackend
2691
2397
 
2398
+ /**
2399
+ * JSONL-backed training dataset.
2400
+ *
2401
+ * Each line of the input file must deserialize to either
2402
+ * `{"messages": [{"role": ..., "content": ...}, ...]}` (OpenAI shape)
2403
+ * or `{"prompt": "...", "completion": "..."}` (legacy SFT).
2404
+ */
2405
+ export declare class JsonlDataset {
2406
+ /**
2407
+ * Load a JSONL training file using the tokenizer at `tokenizerPath`.
2408
+ *
2409
+ * # Errors
2410
+ *
2411
+ * Throws if the tokenizer cannot be loaded, the device string is
2412
+ * invalid, or the JSONL file fails to parse.
2413
+ */
2414
+ static fromPath(path: string, tokenizerPath: string, opts?: JsJsonlDatasetOptions | undefined | null): JsonlDataset
2415
+ }
2416
+ export type JsJsonlDataset = JsonlDataset
2417
+
2692
2418
  /**
2693
2419
  * Configuration for the Langfuse exporter.
2694
2420
  *
@@ -2831,9 +2557,9 @@ export declare class LlamaCppProvider {
2831
2557
  /** Get the model ID. */
2832
2558
  get modelId(): string
2833
2559
  /** Perform a chat completion. */
2834
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
2560
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
2835
2561
  /** Perform a chat completion with additional options. */
2836
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
2562
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
2837
2563
  /** Stream a chat completion. */
2838
2564
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
2839
2565
  /** Explicitly load the model weights into memory / `VRAM`. */
@@ -2850,6 +2576,149 @@ export declare class LlamaCppProvider {
2850
2576
  }
2851
2577
  export type JsLlamaCppProvider = LlamaCppProvider
2852
2578
 
2579
+ /**
2580
+ * r" Base class for large-language-model providers.
2581
+ * r"
2582
+ * r" Mirrors the [`blazen_llm::providers::LLMProvider`] capability trait.
2583
+ * r" Subclass and override `complete()` (and optionally `stream()`) to
2584
+ * r" implement a custom chat/completion backend.
2585
+ */
2586
+ export declare class LLMProvider {
2587
+ constructor(config: CapabilityProviderConfig)
2588
+ /** The provider identifier. */
2589
+ get providerId(): string | null
2590
+ /** The base URL, if set. */
2591
+ get baseUrl(): string | null
2592
+ /**
2593
+ * Estimated memory footprint in bytes (host RAM if the
2594
+ * provider targets the CPU, GPU VRAM otherwise), if set.
2595
+ */
2596
+ get memoryEstimateBytes(): number | null
2597
+ /**
2598
+ * r" Non-streaming completion. Receives a `ModelRequest`-shaped object
2599
+ * r" and returns a `ModelResponse`-shaped object.
2600
+ */
2601
+ complete(request: any): Promise<any>
2602
+ /**
2603
+ * r" Streaming completion. Receives a `ModelRequest`-shaped object and
2604
+ * r" returns the accumulated stream chunks.
2605
+ */
2606
+ stream(request: any): Promise<any>
2607
+ }
2608
+ export type JsLLMProvider = LLMProvider
2609
+
2610
+ /**
2611
+ * A completion provider wrapper that applies a
2612
+ * [`JsProviderDefaults`] to every completion request before
2613
+ * delegating to the inner model.
2614
+ *
2615
+ * `LlmProviderDefaults` is intended to be subclassed from JavaScript:
2616
+ *
2617
+ * ```javascript
2618
+ * import { LlmProviderDefaults, Model } from "blazen";
2619
+ *
2620
+ * class TerseLlm extends LlmProviderDefaults {
2621
+ * constructor() {
2622
+ * const inner = Model.openai({ apiKey: "sk-..." });
2623
+ * super(inner);
2624
+ * this.withSystemPrompt("Be terse.");
2625
+ * }
2626
+ * }
2627
+ * ```
2628
+ *
2629
+ * Today (V1) the constructor stores an opaque reference to the inner
2630
+ * object — Phase D will wire `class extends` to fire the JS `complete`
2631
+ * override before falling back to the inner Rust model.
2632
+ */
2633
+ export declare class LlmProviderDefaults {
2634
+ /**
2635
+ * Construct a new [`LlmProviderDefaults`].
2636
+ *
2637
+ * `inner` is the underlying completion model — pass a
2638
+ * [`JsModel`] instance. JS subclasses that fully
2639
+ * override `complete` may pass `null` here (Phase D will wire
2640
+ * subclass dispatch end-to-end; today calls to `complete` on a
2641
+ * subclass-only provider report unsupported).
2642
+ *
2643
+ * `defaults` optionally seeds the
2644
+ * [`JsProviderDefaults`]; when omitted, an empty
2645
+ * defaults bag is created.
2646
+ */
2647
+ constructor(inner?: JsModel | undefined | null, defaults?: JsProviderDefaults | undefined | null)
2648
+ /**
2649
+ * Set the default system prompt prepended to requests when no
2650
+ * system message is already present.
2651
+ */
2652
+ withSystemPrompt(prompt: string): LlmProviderDefaults
2653
+ /** Replace the default tools appended to every completion request. */
2654
+ withTools(tools: Array<JsToolDefinition>): LlmProviderDefaults
2655
+ /** Set the default `responseFormat` (JSON Schema object). */
2656
+ withResponseFormat(format: any): LlmProviderDefaults
2657
+ /**
2658
+ * Set the universal `beforeRequest` hook (fires for any request
2659
+ * type). V1: stored only — Phase B wires dispatch.
2660
+ */
2661
+ withBeforeRequest(hook: BeforeRequestTsfn): LlmProviderDefaults
2662
+ /**
2663
+ * Set the typed `beforeModel` hook (fires after the universal
2664
+ * hook, with a typed completion request). V1: stored only — Phase
2665
+ * B wires dispatch.
2666
+ */
2667
+ withBeforeModel(hook: BeforeModelTsfn): LlmProviderDefaults
2668
+ /** Replace the entire defaults bag. */
2669
+ withDefaults(defaults: JsProviderDefaults): LlmProviderDefaults
2670
+ /** The currently-configured defaults. */
2671
+ get defaults(): JsProviderDefaults
2672
+ /**
2673
+ * The inner model's `modelId`. Returns the empty string when the
2674
+ * provider was constructed without a Rust-side `inner` (JS subclass
2675
+ * path).
2676
+ */
2677
+ get modelId(): string
2678
+ /**
2679
+ * The provider identifier used for logging. Defaults to the inner
2680
+ * model's `modelId` when present, otherwise `"base"`. Subclasses
2681
+ * may override.
2682
+ */
2683
+ get providerId(): string
2684
+ /**
2685
+ * Typed structured extraction.
2686
+ *
2687
+ * Sends a completion request with a JSON Schema `response_format`
2688
+ * envelope and parses the model's response as JSON. The schema
2689
+ * argument is a plain JSON Schema object (callers using zod can
2690
+ * convert with `zodToJsonSchema(zSchema)` from the `zod-to-json-schema`
2691
+ * package).
2692
+ *
2693
+ * The `response_format` is wired up as the `OpenAI`-style
2694
+ * `{"type":"json_schema","json_schema":{"name":"Extract","schema":...,"strict":true}}`
2695
+ * envelope; provider implementations that don't natively support
2696
+ * structured outputs fall back to a system-instruction shim (see
2697
+ * `crates/blazen-llm/src/providers/anthropic.rs::build_json_schema_system_instruction`).
2698
+ *
2699
+ * Returns the parsed JSON value. The TypeScript surface declares
2700
+ * the return as `any` because the schema shape is only known at
2701
+ * runtime; callers can narrow via TS generics on their wrapper.
2702
+ *
2703
+ * ```typescript
2704
+ * const schema = {
2705
+ * type: "object",
2706
+ * properties: {
2707
+ * name: { type: "string" },
2708
+ * age: { type: "integer" },
2709
+ * },
2710
+ * required: ["name", "age"],
2711
+ * };
2712
+ * const result = await provider.extract(schema, [
2713
+ * ChatMessage.user("My name is Alice and I am 30."),
2714
+ * ]);
2715
+ * // -> { name: "Alice", age: 30 }
2716
+ * ```
2717
+ */
2718
+ extract(schema: any, messages: Array<JsChatMessage>): Promise<any>
2719
+ }
2720
+ export type JsBaseProvider = LlmProviderDefaults
2721
+
2853
2722
  /**
2854
2723
  * Base class for in-process model providers that load weights into
2855
2724
  * memory / VRAM.
@@ -2892,6 +2761,51 @@ export declare class LocalModel {
2892
2761
  }
2893
2762
  export type JsLocalModel = LocalModel
2894
2763
 
2764
+ /**
2765
+ * A pipeline stage that re-runs an inner stage until a hard iteration cap is
2766
+ * reached.
2767
+ *
2768
+ * The inner stage is a [`JsStage`] (sequential) or [`JsParallelStage`]
2769
+ * (parallel); it is consumed at construction time, so the same `Stage` /
2770
+ * `ParallelStage` instance cannot be reused. As noted in the module docs,
2771
+ * the v1 loop runs the inner stage exactly `maxIterations` times.
2772
+ *
2773
+ * ```typescript
2774
+ * const inner = new Stage("refine", wf);
2775
+ * const loop = new LoopStage("refine-loop", 3, inner);
2776
+ * ```
2777
+ */
2778
+ export declare class LoopStage {
2779
+ /**
2780
+ * Create a loop stage from a sequential [`JsStage`].
2781
+ *
2782
+ * `maxIterations` is the hard cap on the number of rounds. The inner
2783
+ * stage is consumed at construction time.
2784
+ */
2785
+ constructor(name: string, maxIterations: number, inner: JsStage)
2786
+ /**
2787
+ * Create a loop stage whose inner body is a parallel fan-out stage.
2788
+ *
2789
+ * `maxIterations` is the hard cap on the number of rounds. The inner
2790
+ * parallel stage is consumed at construction time.
2791
+ */
2792
+ static fromParallel(name: string, maxIterations: number, inner: JsParallelStage): LoopStage
2793
+ /**
2794
+ * The loop stage's human-readable name.
2795
+ *
2796
+ * Returns an empty string if the stage has already been consumed by a
2797
+ * `Pipeline`.
2798
+ */
2799
+ get name(): string
2800
+ /**
2801
+ * The hard iteration cap.
2802
+ *
2803
+ * Returns `0` if the stage has already been consumed by a `Pipeline`.
2804
+ */
2805
+ get maxIterations(): number
2806
+ }
2807
+ export type JsLoopStage = LoopStage
2808
+
2895
2809
  /**
2896
2810
  * A memory store that uses ELID for vector indexing and similarity search.
2897
2811
  *
@@ -3139,7 +3053,7 @@ export type JsMiddleware = Middleware
3139
3053
  * const stack = new MiddlewareStack();
3140
3054
  * stack.withRetry({ maxRetries: 3 });
3141
3055
  * stack.withCache({ ttlSeconds: 300 });
3142
- * const wrapped = stack.apply(CompletionModel.openai());
3056
+ * const wrapped = stack.apply(Model.openai());
3143
3057
  * ```
3144
3058
  */
3145
3059
  export declare class MiddlewareStack {
@@ -3171,12 +3085,12 @@ export declare class MiddlewareStack {
3171
3085
  get length(): number
3172
3086
  /**
3173
3087
  * Apply every registered layer to `model` and return the wrapped
3174
- * model as a fresh [`JsCompletionModel`].
3088
+ * model as a fresh [`JsModel`].
3175
3089
  *
3176
3090
  * The stack itself is left intact and can be re-applied to other
3177
3091
  * models.
3178
3092
  */
3179
- apply(model: CompletionModel): CompletionModel
3093
+ apply(model: JsModel): JsModel
3180
3094
  }
3181
3095
  export type JsMiddlewareStack = MiddlewareStack
3182
3096
 
@@ -3187,13 +3101,13 @@ export declare class MistralProvider {
3187
3101
  /** Get the model ID. */
3188
3102
  get modelId(): string
3189
3103
  /** Perform a chat completion. */
3190
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
3104
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
3191
3105
  /** Perform a chat completion with additional options. */
3192
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
3106
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
3193
3107
  /** Stream a chat completion. */
3194
3108
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
3195
3109
  /** Stream a chat completion with additional options. */
3196
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
3110
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
3197
3111
  }
3198
3112
  export type JsMistralProvider = MistralProvider
3199
3113
 
@@ -3216,9 +3130,9 @@ export declare class MistralRsProvider {
3216
3130
  /** Get the model ID. */
3217
3131
  get modelId(): string
3218
3132
  /** Perform a chat completion. */
3219
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
3133
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
3220
3134
  /** Perform a chat completion with additional options. */
3221
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
3135
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
3222
3136
  /** Stream a chat completion. */
3223
3137
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
3224
3138
  /** Explicitly load the model weights into memory / `VRAM`. */
@@ -3236,61 +3150,554 @@ export declare class MistralRsProvider {
3236
3150
  export type JsMistralRsProvider = MistralRsProvider
3237
3151
 
3238
3152
  /**
3239
- * Local cache for ML models downloaded from `HuggingFace` Hub.
3153
+ * A chat completion model.
3240
3154
  *
3241
- * Models are stored under `{cacheDir}/{repoId}/{filename}`. Files are
3242
- * downloaded only once; subsequent calls return the cached path immediately.
3155
+ * Use the static factory methods to create an instance for your provider:
3243
3156
  *
3244
3157
  * ```javascript
3245
- * import { ModelCache } from 'blazen';
3158
+ * const model = Model.openai({ apiKey: "sk-..." });
3159
+ * const response = await model.complete([
3160
+ * ChatMessage.user("What is 2 + 2?")
3161
+ * ]);
3162
+ * ```
3246
3163
  *
3247
- * const cache = ModelCache.create();
3248
- * if (!cache.isCached('bert-base-uncased', 'config.json')) {
3249
- * await cache.download('bert-base-uncased', 'config.json', (downloaded, total) => {
3250
- * if (total !== null) {
3251
- * console.log(`${(downloaded / total * 100).toFixed(1)}%`);
3252
- * }
3253
- * });
3164
+ * Or extend the class to implement a custom provider:
3165
+ *
3166
+ * ```javascript
3167
+ * class MyLLM extends Model {
3168
+ * constructor() {
3169
+ * super({ modelId: "my-custom-model" });
3170
+ * }
3171
+ * async complete(messages) { /* ... *\/ }
3254
3172
  * }
3255
3173
  * ```
3256
3174
  */
3257
- export declare class ModelCache {
3175
+ export declare class Model {
3258
3176
  /**
3259
- * Create a cache in the default location.
3177
+ * Construct a base `Model`.
3260
3178
  *
3261
- * Uses `$BLAZEN_CACHE_DIR/models/` if set, otherwise falls back to
3262
- * `~/.cache/blazen/models/`.
3179
+ * Called by JavaScript subclasses via `super(config)`. The `config`
3180
+ * parameter is optional and carries metadata such as `modelId`.
3181
+ *
3182
+ * Instances created this way have no inner Rust provider -- calling
3183
+ * `complete()` or `stream()` without overriding them in the subclass
3184
+ * will throw.
3263
3185
  */
3264
- static create(): ModelCache
3186
+ constructor(config?: ModelConfig | undefined | null)
3187
+ /** Create an `OpenAI` completion model. */
3188
+ static openai(options?: JsProviderOptions | undefined | null): Model
3189
+ /** Create an Anthropic completion model. */
3190
+ static anthropic(options?: JsProviderOptions | undefined | null): Model
3191
+ /** Create a Google Gemini completion model. */
3192
+ static gemini(options?: JsProviderOptions | undefined | null): Model
3193
+ /** Create an Azure `OpenAI` completion model. */
3194
+ static azure(options: JsAzureOptions): Model
3265
3195
  /**
3266
- * Create a cache rooted at a specific directory.
3196
+ * Create a fal.ai completion model.
3197
+ *
3198
+ * `options` optionally configures the LLM model, endpoint family,
3199
+ * enterprise tier, and modality auto-routing. Defaults to the
3200
+ * OpenAI-compatible chat-completions endpoint.
3201
+ */
3202
+ static fal(options?: JsFalOptions | undefined | null): Model
3203
+ /** Create an `OpenRouter` completion model. */
3204
+ static openrouter(options?: JsProviderOptions | undefined | null): Model
3205
+ /** Create a Groq completion model. */
3206
+ static groq(options?: JsProviderOptions | undefined | null): Model
3207
+ /** Create a Together AI completion model. */
3208
+ static together(options?: JsProviderOptions | undefined | null): Model
3209
+ /** Create a Mistral AI completion model. */
3210
+ static mistral(options?: JsProviderOptions | undefined | null): Model
3211
+ /** Create a `DeepSeek` completion model. */
3212
+ static deepseek(options?: JsProviderOptions | undefined | null): Model
3213
+ /** Create a Fireworks AI completion model. */
3214
+ static fireworks(options?: JsProviderOptions | undefined | null): Model
3215
+ /** Create a Perplexity completion model. */
3216
+ static perplexity(options?: JsProviderOptions | undefined | null): Model
3217
+ /** Create an xAI (Grok) completion model. */
3218
+ static xai(options?: JsProviderOptions | undefined | null): Model
3219
+ /** Create a Cohere completion model. */
3220
+ static cohere(options?: JsProviderOptions | undefined | null): Model
3221
+ /** Create an AWS Bedrock completion model. */
3222
+ static bedrock(options: JsBedrockOptions): Model
3223
+ /**
3224
+ * Create a local Ollama completion model.
3225
+ *
3226
+ * Talks to a running Ollama server (defaults to `http://host:port/v1`).
3227
+ * No API key is required.
3228
+ *
3229
+ * ```javascript
3230
+ * const model = Model.ollama("localhost", 11434, "llama3.1:8b");
3231
+ * ```
3232
+ */
3233
+ static ollama(host: string, port: number, model: string): Model
3234
+ /**
3235
+ * Create a local LM Studio completion model.
3236
+ *
3237
+ * Talks to a running LM Studio server's OpenAI-compatible endpoint.
3238
+ *
3239
+ * ```javascript
3240
+ * const model = Model.lmStudio("localhost", 1234, "my-model");
3241
+ * ```
3242
+ */
3243
+ static lmStudio(host: string, port: number, model: string): Model
3244
+ /**
3245
+ * Create a generic OpenAI-compatible completion model.
3246
+ *
3247
+ * Drives any OpenAI-compatible chat-completions endpoint with the
3248
+ * supplied [`JsOpenAiCompatConfig`].
3249
+ *
3250
+ * ```javascript
3251
+ * const model = Model.openaiCompat("my-host", {
3252
+ * providerName: "my-host",
3253
+ * baseUrl: "https://api.example.com/v1",
3254
+ * apiKey: "sk-...",
3255
+ * defaultModel: "my-model",
3256
+ * });
3257
+ * ```
3258
+ */
3259
+ static openaiCompat(providerId: string, config: JsOpenAiCompatConfig): Model
3260
+ /**
3261
+ * Create a fully user-defined completion model backed by a JavaScript
3262
+ * host object.
3263
+ *
3264
+ * `hostObject` must expose Blazen capability methods (e.g.
3265
+ * `complete`, `stream`) using the camelCase trait-method names. The
3266
+ * optional `providerId` is used for logging; defaults to `"custom"`.
3267
+ *
3268
+ * ```javascript
3269
+ * class MyProvider {
3270
+ * async complete(request) { /* ... *\/ }
3271
+ * }
3272
+ * const model = Model.custom(new MyProvider(), "my-provider");
3273
+ * ```
3274
+ */
3275
+ static custom(hostObject: object, providerId?: string | undefined | null): Model
3276
+ /** Get the model ID. */
3277
+ get modelId(): string
3278
+ /**
3279
+ * Wrap this model with automatic retry on transient failures.
3280
+ *
3281
+ * ```javascript
3282
+ * const model = Model.openrouter({ apiKey: key });
3283
+ * const withRetry = model.withRetry({ maxRetries: 3, initialDelayMs: 1000 });
3284
+ * ```
3285
+ */
3286
+ withRetry(config?: JsRetryConfig | undefined | null): Model
3287
+ /**
3288
+ * Wrap this model with an in-memory response cache.
3289
+ *
3290
+ * Streaming requests are never cached and always delegate directly to the
3291
+ * underlying model.
3292
+ *
3293
+ * ```javascript
3294
+ * const cached = model.withCache({ ttlSeconds: 300, maxEntries: 1000 });
3295
+ * ```
3296
+ */
3297
+ withCache(config?: JsCacheConfig | undefined | null): Model
3298
+ /**
3299
+ * Create a fallback model that tries multiple providers in order.
3300
+ *
3301
+ * When the primary provider fails with a transient error (rate limit,
3302
+ * timeout, server error) the request is automatically forwarded to the
3303
+ * next provider. Non-retryable errors short-circuit immediately.
3304
+ *
3305
+ * ```javascript
3306
+ * const model = Model.withFallback([modelA, modelB]);
3307
+ * ```
3308
+ */
3309
+ static withFallback(models: Array<Model>): Model
3310
+ /**
3311
+ * Perform a chat completion.
3312
+ *
3313
+ * Messages should be an array of `ChatMessage` instances.
3314
+ *
3315
+ * Returns a typed response with `content`, `toolCalls`, `usage`, `model`,
3316
+ * and `finishReason` fields.
3317
+ */
3318
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
3319
+ /**
3320
+ * Perform a chat completion with additional options.
3321
+ *
3322
+ * Options object may include:
3323
+ * - `temperature` (number): Sampling temperature (0.0 - 2.0)
3324
+ * - `maxTokens` (number): Maximum tokens to generate
3325
+ * - `topP` (number): Nucleus sampling parameter
3326
+ * - `model` (string): Override the default model
3327
+ * - `tools` (array): Tool definitions for function calling
3328
+ */
3329
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
3330
+ /**
3331
+ * Stream a chat completion.
3332
+ *
3333
+ * The `onChunk` callback receives each chunk as a typed `StreamChunk` with
3334
+ * `delta`, `finishReason`, and `toolCalls` fields.
3335
+ *
3336
+ * ```javascript
3337
+ * await model.stream(
3338
+ * [ChatMessage.user("Tell me a story")],
3339
+ * (chunk) => { if (chunk.delta) process.stdout.write(chunk.delta); }
3340
+ * );
3341
+ * ```
3342
+ */
3343
+ stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
3344
+ /**
3345
+ * Stream a chat completion with additional options.
3346
+ *
3347
+ * Options object may include:
3348
+ * - `temperature` (number): Sampling temperature (0.0 - 2.0)
3349
+ * - `maxTokens` (number): Maximum tokens to generate
3350
+ * - `topP` (number): Nucleus sampling parameter
3351
+ * - `model` (string): Override the default model
3352
+ * - `tools` (array): Tool definitions for function calling
3353
+ */
3354
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
3355
+ /**
3356
+ * Explicitly load the model weights into memory / `VRAM`.
3357
+ *
3358
+ * For remote providers (`OpenAI`, Anthropic, fal, etc.) this throws --
3359
+ * there is no local model to load. For local providers (mistral.rs,
3360
+ * llama.cpp, candle) this triggers the download + load synchronously,
3361
+ * so the next inference call does not pay the startup cost.
3362
+ *
3363
+ * Idempotent: calling `load` on an already-loaded model is a no-op
3364
+ * that resolves immediately.
3365
+ */
3366
+ load(): Promise<void>
3367
+ /**
3368
+ * Drop the loaded model and free its memory / `VRAM`.
3369
+ *
3370
+ * For remote providers this throws. For local providers this frees
3371
+ * `GPU` memory so the process can load a different model. Idempotent.
3372
+ */
3373
+ unload(): Promise<void>
3374
+ /**
3375
+ * Whether the model is currently loaded in memory / `VRAM`.
3376
+ *
3377
+ * Always returns `false` for remote providers (they have no local
3378
+ * model to load). Returns the real state for local providers.
3379
+ */
3380
+ isLoaded(): Promise<boolean>
3381
+ /**
3382
+ * Approximate memory footprint in bytes (host RAM if the
3383
+ * provider targets the CPU, GPU VRAM otherwise), if the
3384
+ * implementation can report it. Returns `null` for remote
3385
+ * providers or for local providers that do not expose memory
3386
+ * usage.
3387
+ *
3388
+ * Note: napi-rs exposes this as a JS `number`. The underlying
3389
+ * [`blazen_llm::LocalModel::memory_bytes`] returns `u64`; we clamp
3390
+ * to `i64::MAX` (~9.2 exabytes) when surfacing through
3391
+ * `JSON`-compatible types, which is effectively lossless for any
3392
+ * realistic footprint.
3393
+ */
3394
+ memoryBytes(): Promise<number | null>
3395
+ /**
3396
+ * Create a local mistral.rs completion model.
3397
+ *
3398
+ * Runs LLM inference entirely on-device -- no API key required.
3399
+ *
3400
+ * ```javascript
3401
+ * const model = Model.mistralrs({
3402
+ * modelId: "mistralai/Mistral-7B-Instruct-v0.3",
3403
+ * });
3404
+ * ```
3405
+ */
3406
+ static mistralrs(options: JsMistralRsOptions): Model
3407
+ /**
3408
+ * Wrap this model in a [`TracingModel`] that emits a
3409
+ * structured `tracing` span around every `complete` and `stream`
3410
+ * call.
3411
+ *
3412
+ * `name` is recorded on the span as the `provider` field plus the
3413
+ * `OpenInference` / `gen_ai.*` aliases (`gen_ai.system`, etc.). It is
3414
+ * leaked into a `&'static str` because the underlying span macro
3415
+ * captures it by reference for the process lifetime; this is
3416
+ * intentional and bounded by the small set of distinct provider
3417
+ * names a typical application uses.
3418
+ *
3419
+ * `captureMessages` (default `false`) opts into recording the raw
3420
+ * prompt + completion text as `llm.input_messages` /
3421
+ * `llm.output_messages` for Phoenix eval-grade ingest. Leave off for
3422
+ * privacy-sensitive deployments.
3423
+ *
3424
+ * ```javascript
3425
+ * const traced = Model.openai({ apiKey }).withTracing("openai");
3426
+ * const evalGrade = Model.openai({ apiKey }).withTracing("openai", true);
3427
+ * ```
3428
+ */
3429
+ withTracing(name: string, captureMessages?: boolean | undefined | null): Model
3430
+ /**
3431
+ * Wrap this model in a [`TracingModel`] using an explicit
3432
+ * [`TracingConfig`](JsTracingConfig) object.
3433
+ *
3434
+ * Equivalent to [`with_tracing`](Self::with_tracing) but takes the
3435
+ * structured config record (`{ captureMessages?: boolean }`) instead of
3436
+ * a positional boolean. `name` is leaked into a `&'static str` for the
3437
+ * span macro, exactly as in [`with_tracing`](Self::with_tracing).
3438
+ *
3439
+ * ```javascript
3440
+ * const traced = Model.openai({ apiKey })
3441
+ * .withTracingConfig("openai", { captureMessages: true });
3442
+ * ```
3443
+ */
3444
+ withTracingConfig(name: string, config: TracingConfig): Model
3445
+ }
3446
+ export type JsModel = Model
3447
+
3448
+ /**
3449
+ * Local cache for ML models downloaded from `HuggingFace` Hub.
3450
+ *
3451
+ * Models are stored under `{cacheDir}/{repoId}/{filename}`. Files are
3452
+ * downloaded only once; subsequent calls return the cached path immediately.
3453
+ *
3454
+ * ```javascript
3455
+ * import { ModelCache } from 'blazen';
3456
+ *
3457
+ * const cache = ModelCache.create();
3458
+ * if (!cache.isCached('bert-base-uncased', 'config.json')) {
3459
+ * await cache.download('bert-base-uncased', 'config.json', (downloaded, total) => {
3460
+ * if (total !== null) {
3461
+ * console.log(`${(downloaded / total * 100).toFixed(1)}%`);
3462
+ * }
3463
+ * });
3464
+ * }
3465
+ * ```
3466
+ */
3467
+ export declare class ModelCache {
3468
+ /**
3469
+ * Create a cache in the default location.
3470
+ *
3471
+ * Uses `$BLAZEN_CACHE_DIR/models/` if set, otherwise falls back to
3472
+ * `~/.cache/blazen/models/`.
3473
+ */
3474
+ static create(): ModelCache
3475
+ /**
3476
+ * Create a cache rooted at a specific directory.
3477
+ *
3478
+ * The directory does not need to exist yet; it will be created on the
3479
+ * first download.
3480
+ */
3481
+ static withDir(path: string): ModelCache
3482
+ /** The root cache directory path as a string. */
3483
+ get cacheDir(): string
3484
+ /** Check if a file is already present in the cache (without downloading). */
3485
+ isCached(repo: string, file: string): boolean
3486
+ /**
3487
+ * Download a file from `HuggingFace` Hub if it is not already cached.
3488
+ *
3489
+ * Returns the local filesystem path to the cached file.
3490
+ *
3491
+ * The optional `onProgress` argument accepts either:
3492
+ * - A raw callback `(downloaded: number, total: number | null) => void`
3493
+ * for a quick inline progress hook, or
3494
+ * - A [`JsProgressCallback`] subclass instance (recommended for stateful
3495
+ * reporters), whose `onProgress(downloaded, total)` method receives
3496
+ * byte counts as `bigint` values.
3497
+ *
3498
+ * `total` is `null` when the server does not report the file size up
3499
+ * front.
3500
+ */
3501
+ download(repo: string, file: string, onProgress?: ProgressTsfn | object | undefined | null): Promise<string>
3502
+ }
3503
+ export type JsModelCache = ModelCache
3504
+
3505
+ /**
3506
+ * gRPC client for the `BlazenModelServer` service.
3507
+ *
3508
+ * Connect with [`connect`](Self::connect) (plaintext) or
3509
+ * [`connectWithTls`](Self::connect_with_tls) (TLS / mTLS), then issue
3510
+ * RPCs. This wave exposes only the status RPCs; later waves add load /
3511
+ * unload / completions / embeddings / media / blobs.
3512
+ *
3513
+ * ```typescript
3514
+ * const client = await ModelClient.connect("http://model-server:50051");
3515
+ * const status = await client.status();
3516
+ * if (await client.isLoaded("gpt-oss-120b")) {
3517
+ * // ...
3518
+ * }
3519
+ * ```
3520
+ */
3521
+ export declare class ModelClient {
3522
+ /**
3523
+ * Open a plaintext connection to a `BlazenModelServer` at
3524
+ * `endpoint` (e.g. `"http://localhost:50051"`).
3525
+ */
3526
+ static connect(endpoint: string): Promise<ModelClient>
3527
+ /**
3528
+ * Open a TLS (or mTLS, when `opts.clientCert` + `opts.clientKey`
3529
+ * are supplied) connection to a `BlazenModelServer` at `endpoint`.
3530
+ */
3531
+ static connectWithTls(endpoint: string, opts: JsModelClientTlsOptions): Promise<ModelClient>
3532
+ /**
3533
+ * Fetch a snapshot of every registered model on the server.
3534
+ *
3535
+ * `model_id` is currently unused (the server returns every model
3536
+ * either way) but is reserved for a future per-model filter. Pass
3537
+ * `undefined` / omit the argument for the full snapshot.
3538
+ *
3539
+ * Returns a plain JS object with the wire shape of
3540
+ * [`StatusResponse`] (`{ envelopeVersion, models: [...] }`),
3541
+ * produced via `serde_json` so the model + adapter wires serialize
3542
+ * recursively without per-field napi glue.
3543
+ */
3544
+ status(modelId?: string): Promise<any>
3545
+ /** Liveness check for a specific registered model. */
3546
+ isLoaded(modelId: string): Promise<boolean>
3547
+ /**
3548
+ * Issue a `Load` RPC for a previously-registered model.
3549
+ *
3550
+ * `request` is a plain JS object matching the wire shape of
3551
+ * [`LoadRequest`] (`{ envelopeVersion?, modelId }`). The
3552
+ * `envelopeVersion` field is filled in from
3553
+ * [`MODEL_ENVELOPE_VERSION`] when omitted by the caller — pass the
3554
+ * shorthand `{ modelId: "qwen3-7b" }` and the binding will set the
3555
+ * rest. Returns the wire-shaped [`LoadResponse`] as a plain JS
3556
+ * object.
3557
+ */
3558
+ load(request: any): Promise<any>
3559
+ /**
3560
+ * Issue an `Unload` RPC to drop a loaded model from memory.
3561
+ *
3562
+ * `request` mirrors [`UnloadRequest`] on the wire
3563
+ * (`{ envelopeVersion?, modelId }`). Returns the wire-shaped
3564
+ * [`UnloadResponse`] as a plain JS object.
3565
+ */
3566
+ unload(request: any): Promise<any>
3567
+ /**
3568
+ * Issue a `LoadFromHf` RPC — register-and-load a model from a
3569
+ * Hugging Face Hub repo. Whether the server actually honors the
3570
+ * request depends on it having been built with the `hf-loader`
3571
+ * feature; the client speaks the wire either way.
3572
+ *
3573
+ * `request` matches [`LoadFromHfRequest`] on the wire
3574
+ * (`{ envelopeVersion?, modelId, repo, memoryEstimateBytes?,
3575
+ * backendHint?, ggufFile?, revision?, hfToken?,
3576
+ * extraOptionsJson? }`). Returns the wire-shaped
3577
+ * [`LoadFromHfResponse`] as a plain JS object.
3578
+ */
3579
+ loadFromHf(request: any): Promise<any>
3580
+ /**
3581
+ * Issue a `LoadAdapter` RPC.
3582
+ *
3583
+ * `request` matches [`LoadAdapterRequest`] on the wire. Returns the
3584
+ * wire-shaped [`LoadAdapterResponse`] as a plain JS object.
3585
+ */
3586
+ loadAdapter(request: any): Promise<any>
3587
+ /**
3588
+ * Issue an `UnloadAdapter` RPC.
3589
+ *
3590
+ * `request` matches [`UnloadAdapterRequest`] on the wire. Returns the
3591
+ * wire-shaped [`UnloadAdapterResponse`] as a plain JS object.
3592
+ */
3593
+ unloadAdapter(request: any): Promise<any>
3594
+ /**
3595
+ * Issue a `ListAdapters` RPC.
3596
+ *
3597
+ * `request` matches [`ListAdaptersRequest`] on the wire. Returns the
3598
+ * wire-shaped [`ListAdaptersResponse`] as a plain JS object.
3599
+ */
3600
+ listAdapters(request: any): Promise<any>
3601
+ /**
3602
+ * Issue a `Complete` RPC.
3603
+ *
3604
+ * `request` matches [`CompleteRequest`] on the wire. Returns the
3605
+ * wire-shaped [`CompleteResponse`] as a plain JS object.
3606
+ */
3607
+ complete(request: any): Promise<any>
3608
+ /**
3609
+ * Issue an `Embed` RPC.
3610
+ *
3611
+ * `request` matches [`EmbedRequest`] on the wire. Returns the
3612
+ * wire-shaped [`EmbedResponse`] as a plain JS object.
3613
+ */
3614
+ embed(request: any): Promise<any>
3615
+ /**
3616
+ * Issue a `GenerateImage` RPC.
3617
+ *
3618
+ * `request` matches [`GenerateImageRequest`] on the wire. Returns the
3619
+ * wire-shaped [`GenerateImageResponse`] as a plain JS object.
3620
+ */
3621
+ generateImage(request: any): Promise<any>
3622
+ /**
3623
+ * Issue a `TextToSpeech` RPC.
3624
+ *
3625
+ * `request` matches [`TextToSpeechRequest`] on the wire. Returns the
3626
+ * wire-shaped [`TextToSpeechResponse`] as a plain JS object.
3627
+ */
3628
+ textToSpeech(request: any): Promise<any>
3629
+ /**
3630
+ * Issue a `GenerateMusic` RPC.
3631
+ *
3632
+ * `request` matches [`GenerateMusicRequest`] on the wire. Returns the
3633
+ * wire-shaped [`GenerateMusicResponse`] as a plain JS object.
3634
+ */
3635
+ generateMusic(request: any): Promise<any>
3636
+ /**
3637
+ * Issue a `Transcribe` RPC.
3638
+ *
3639
+ * `request` matches [`TranscribeRequest`] on the wire. Returns the
3640
+ * wire-shaped [`TranscribeResponse`] as a plain JS object.
3641
+ */
3642
+ transcribe(request: any): Promise<any>
3643
+ /**
3644
+ * Issue a `StreamComplete` server-streaming RPC.
3645
+ *
3646
+ * `request` matches [`CompleteRequest`] on the wire (same shape as
3647
+ * the unary `complete` method). Returns a JS
3648
+ * `AsyncIterableIterator` that yields wire-shaped
3649
+ * [`StreamCompleteChunk`](blazen_controlplane::model_protocol::StreamCompleteChunk)
3650
+ * objects (each a plain JS object — `{ kind: "delta", ... }` or
3651
+ * `{ kind: "done", ... }` depending on the variant) until the
3652
+ * server closes the stream.
3653
+ *
3654
+ * The stream is opened lazily on the first `next()` call so the
3655
+ * initial RPC error (if any) surfaces to the consumer rather than
3656
+ * to the synchronous call site.
3267
3657
  *
3268
- * The directory does not need to exist yet; it will be created on the
3269
- * first download.
3658
+ * Mirrors the lazy-open `AsyncIterableIterator` pattern used by
3659
+ * [`crate::controlplane::client::JsControlPlaneClient::subscribe_run_events`].
3270
3660
  */
3271
- static withDir(path: string): ModelCache
3272
- /** The root cache directory path as a string. */
3273
- get cacheDir(): string
3274
- /** Check if a file is already present in the cache (without downloading). */
3275
- isCached(repo: string, file: string): boolean
3661
+ streamComplete(request: object): AsyncIterableIterator<object>
3276
3662
  /**
3277
- * Download a file from `HuggingFace` Hub if it is not already cached.
3663
+ * Issue an `UploadBlob` client-streaming RPC.
3278
3664
  *
3279
- * Returns the local filesystem path to the cached file.
3665
+ * `chunks` is the pre-collected blob payload split into one or more
3666
+ * `Buffer` (or `Uint8Array`) pieces. The binding wraps them in the
3667
+ * canonical `Start` / `Data*` / `End` frame sequence — callers do
3668
+ * not need to construct envelope frames themselves. `options.blobId`
3669
+ * names the upload (defaults to a freshly-generated UUID-shaped
3670
+ * string when omitted); `options.mime` is forwarded as the
3671
+ * `content_type` hint on the `Start` frame.
3280
3672
  *
3281
- * The optional `onProgress` argument accepts either:
3282
- * - A raw callback `(downloaded: number, total: number | null) => void`
3283
- * for a quick inline progress hook, or
3284
- * - A [`JsProgressCallback`] subclass instance (recommended for stateful
3285
- * reporters), whose `onProgress(downloaded, total)` method receives
3286
- * byte counts as `bigint` values.
3673
+ * This binding uses the **pre-collected `Vec<Buffer>`** approach
3674
+ * rather than consuming a JS `AsyncIterable`: napi-rs does not yet
3675
+ * surface a first-class JS-async-iterator Rust-`Stream` adapter,
3676
+ * and a hand-rolled `iterator.next()`-pumping bridge would have run
3677
+ * well past the ~50-line budget called out in the wave plan. The
3678
+ * streaming-over-the-wire shape (multiple postcard frames) is
3679
+ * preserved — only the JS-side ergonomics differ from a true async
3680
+ * iterable. Returns the wire-shaped [`UploadBlobResponse`] as a
3681
+ * plain JS object.
3682
+ */
3683
+ uploadBlob(chunks: Array<Buffer | Uint8Array>, options?: { blobId?: string, mime?: string }): Promise<any>
3684
+ /**
3685
+ * Issue a `FetchBlob` server-streaming RPC.
3287
3686
  *
3288
- * `total` is `null` when the server does not report the file size up
3289
- * front.
3687
+ * `request` matches [`FetchBlobRequest`] on the wire
3688
+ * (`{ envelopeVersion?, blobId, offset?, chunkSize? }`). Returns a
3689
+ * JS `AsyncIterableIterator<Buffer>` that yields the blob body in
3690
+ * order — only the `Data` frames are surfaced as `Buffer` values;
3691
+ * the `Start` / `End` envelope frames are consumed transparently
3692
+ * (`Start` is dropped so callers see only bytes, `End` terminates
3693
+ * iteration). The stream is opened lazily on the first `next()`
3694
+ * call so the initial RPC error (if any) surfaces to the consumer.
3695
+ *
3696
+ * Mirrors the lazy-open pattern used by [`Self::stream_complete`].
3290
3697
  */
3291
- download(repo: string, file: string, onProgress?: ProgressTsfn | object | undefined | null): Promise<string>
3698
+ fetchBlob(request: object): AsyncIterableIterator<Buffer>
3292
3699
  }
3293
- export type JsModelCache = ModelCache
3700
+ export type JsModelClient = ModelClient
3294
3701
 
3295
3702
  /**
3296
3703
  * Memory-budget-aware model manager with per-pool LRU eviction.
@@ -3331,7 +3738,7 @@ export declare class ModelManager {
3331
3738
  */
3332
3739
  constructor(config?: ModelManagerConfig | undefined | null)
3333
3740
  /**
3334
- * Register a `CompletionModel`-backed local model with the manager.
3741
+ * Register a `Model`-backed local model with the manager.
3335
3742
  *
3336
3743
  * The model starts in the unloaded state. An optional
3337
3744
  * `memoryEstimateBytes` overrides the model's self-reported
@@ -3343,11 +3750,11 @@ export declare class ModelManager {
3343
3750
  * tokenizer, custom runtime, …), use
3344
3751
  * [`Self::register_local_model`] instead.
3345
3752
  */
3346
- register(id: string, model: JsCompletionModel, memoryEstimateBytes?: bigint | undefined | null): Promise<void>
3753
+ register(id: string, model: JsModel, memoryEstimateBytes?: bigint | undefined | null): Promise<void>
3347
3754
  /**
3348
3755
  * Register an arbitrary JS-managed local model with the manager.
3349
3756
  *
3350
- * Unlike [`Self::register`] — which expects a [`JsCompletionModel`]
3757
+ * Unlike [`Self::register`] — which expects a [`JsModel`]
3351
3758
  * backed by an in-process provider — this entrypoint takes raw
3352
3759
  * lifecycle callbacks. The manager will invoke `load()` when the
3353
3760
  * model is brought into memory (potentially after evicting an LRU
@@ -3377,10 +3784,14 @@ export declare class ModelManager {
3377
3784
  * );
3378
3785
  * ```
3379
3786
  *
3380
- * `isLoaded`, `memoryEstimateBytes`, and `device` are all
3381
- * nullable / optional (pass `null` or `undefined` to omit).
3787
+ * `isLoaded`, `memoryEstimateBytes`, `device`, `loadAdapter`,
3788
+ * `unloadAdapter`, and `listAdapters` are all nullable / optional
3789
+ * (pass `null` or `undefined` to omit). Omitted adapter callbacks
3790
+ * cause [`JsModelManager::load_adapter`] / `unloadAdapter` /
3791
+ * `listAdapters` to surface the upstream "backend does not support
3792
+ * `LoRA` adapters" error for this model.
3382
3793
  */
3383
- registerLocalModel(id: string, load: LifecycleTsfn, unload: LifecycleTsfn, isLoaded?: IsLoadedTsfn | undefined | null, memoryEstimateBytes?: bigint | undefined | null, device?: string | undefined | null): Promise<void>
3794
+ registerLocalModel(id: string, load: LifecycleTsfn, unload: LifecycleTsfn, isLoaded?: IsLoadedTsfn | undefined | null, memoryEstimateBytes?: bigint | undefined | null, device?: string | undefined | null, loadAdapter?: LoadAdapterTsfn | undefined | null, unloadAdapter?: UnloadAdapterTsfn | undefined | null, listAdapters?: ListAdaptersTsfn | undefined | null): Promise<void>
3384
3795
  /**
3385
3796
  * Load a model, evicting LRU peers in the same pool if the budget
3386
3797
  * would be exceeded.
@@ -3424,6 +3835,161 @@ export declare class ModelManager {
3424
3835
  pools(): Array<JsPoolBudget>
3425
3836
  /** Status of all registered models. */
3426
3837
  status(): Promise<Array<JsModelStatus>>
3838
+ /**
3839
+ * Auto-detect the right local-inference backend for a Hugging Face
3840
+ * repo, then register and budget the model with this manager.
3841
+ *
3842
+ * Performs a single metadata request against the Hub to enumerate
3843
+ * the repo's siblings, picks a backend (mistral.rs / candle /
3844
+ * llama.cpp) per the rules documented on
3845
+ * [`blazen_manager::hf_loader::choose_backend`], computes a memory
3846
+ * estimate from the sibling sizes, and registers the model under
3847
+ * `id`. The model starts unloaded — call [`Self::load`] or
3848
+ * [`Self::ensure_loaded`] to materialize it.
3849
+ *
3850
+ * Returns the chosen backend as a lower-case string
3851
+ * (`"mistralrs"` / `"candle"` / `"llamacpp"`).
3852
+ *
3853
+ * Throws on empty repo id, gated/missing repo, PEFT-adapter-only
3854
+ * repo (use [`Self::load_adapter`] instead), missing backend
3855
+ * feature, or any provider construction failure.
3856
+ * Mount a PEFT-format `LoRA` adapter onto a registered model.
3857
+ *
3858
+ * `adapterDir` must contain the canonical PEFT layout
3859
+ * (`adapter_model.safetensors` + `adapter_config.json`). The base
3860
+ * model is implicitly loaded (`ensureLoaded`) before mounting.
3861
+ *
3862
+ * Returns the adapter id assigned by the backend (echoes
3863
+ * `options.adapterId`).
3864
+ *
3865
+ * Throws if the model is not registered, the adapter id is already
3866
+ * mounted, the pool budget would be exceeded, or the backend does
3867
+ * not support adapters.
3868
+ */
3869
+ loadAdapter(modelId: string, adapterDir: string, options: AdapterOptions): Promise<string>
3870
+ /**
3871
+ * Unmount a previously-loaded adapter from a registered model.
3872
+ *
3873
+ * Throws if the model is not registered or the adapter id is not
3874
+ * currently mounted on it.
3875
+ */
3876
+ unloadAdapter(modelId: string, adapterId: string): Promise<void>
3877
+ /**
3878
+ * List adapters currently mounted on a registered model.
3879
+ *
3880
+ * Throws if the model is not registered.
3881
+ */
3882
+ listAdapters(modelId: string): Promise<Array<JsAdapterStatus>>
3883
+ /**
3884
+ * Load a model from Hugging Face by repo id.
3885
+ *
3886
+ * Inspects the repo's siblings, picks a backend (mistral.rs /
3887
+ * candle / llama.cpp) per the rules documented on
3888
+ * [`blazen_manager::hf_loader::choose_backend`], computes a memory
3889
+ * estimate from the sibling sizes, and registers the model under
3890
+ * `id`. The model starts unloaded — call [`Self::load`] or
3891
+ * [`Self::ensure_loaded`] to materialize it.
3892
+ *
3893
+ * Returns the chosen backend as a lower-case string
3894
+ * (`"mistralrs"` / `"candle"` / `"llamacpp"`).
3895
+ */
3896
+ loadFromHf(id: string, repo: string, options?: JsHfLoadOptions | undefined | null): Promise<string>
3897
+ /**
3898
+ * Train a `LoRA` adapter end-to-end on the configured base model.
3899
+ *
3900
+ * Downloads the base model from HuggingFace (cached), builds a
3901
+ * VarMap, runs the AdamW + LoRA training loop driven by `dataset`,
3902
+ * and writes the resulting PEFT-format adapter to
3903
+ * `config.outputDir`. The returned `TrainedAdapter`'s `adapterDir`
3904
+ * is immediately mountable via [`Self::load_adapter`] on a
3905
+ * compatible backend.
3906
+ *
3907
+ * `progress`, when supplied, is invoked once per Started /
3908
+ * StepCompleted / Evaluating / EvalCompleted / CheckpointSaved /
3909
+ * Finished transition. The return value is ignored; throwing
3910
+ * inside the callback does not abort the run. A failure to queue
3911
+ * the call (closed function, etc.) cancels the run with a
3912
+ * `BlazenError::cancelled`.
3913
+ *
3914
+ * # Errors
3915
+ *
3916
+ * Throws on invalid config, unrecognised device, HF download
3917
+ * failure, dataset I/O failure, trainer failure, or queueing
3918
+ * failure on the progress callback.
3919
+ */
3920
+ trainLora(config: JsTrainConfig, dataset: JsonlDataset, progress?: ProgressTsfn | undefined | null): Promise<TrainedAdapter>
3921
+ /**
3922
+ * Train a `LoRA` adapter via Direct Preference Optimization (DPO).
3923
+ *
3924
+ * Like [`Self::train_lora`] but consumes a preference-pair dataset
3925
+ * of `(prompt, chosen, rejected)` triples and requires a frozen
3926
+ * reference model (defaults to `config.core.baseModelRepo` when
3927
+ * `config.referenceModelRepo` is `null`).
3928
+ *
3929
+ * # Errors
3930
+ *
3931
+ * Throws on invalid config, unrecognised device, HF download
3932
+ * failure, dataset I/O failure, trainer failure, or queueing
3933
+ * failure on the progress callback.
3934
+ */
3935
+ trainDpo(config: JsDpoConfig, dataset: PreferenceJsonlDataset, progress?: ProgressTsfn | undefined | null): Promise<TrainedAdapter>
3936
+ /**
3937
+ * Train a `LoRA` adapter via Odds Ratio Preference Optimization (ORPO).
3938
+ *
3939
+ * Reference-free; combines a standard SFT loss on chosen
3940
+ * completions with an odds-ratio preference term weighted by
3941
+ * `config.lambda`.
3942
+ *
3943
+ * # Errors
3944
+ *
3945
+ * Same surface as [`Self::train_dpo`].
3946
+ */
3947
+ trainOrpo(config: JsOrpoConfig, dataset: PreferenceJsonlDataset, progress?: ProgressTsfn | undefined | null): Promise<TrainedAdapter>
3948
+ /**
3949
+ * Train a `LoRA` adapter via Simple Preference Optimization (`SimPO`).
3950
+ *
3951
+ * Reference-free and length-normalized. `config.beta` scales the
3952
+ * preference logits and `config.gamma` sets the target reward
3953
+ * margin.
3954
+ *
3955
+ * # Errors
3956
+ *
3957
+ * Same surface as [`Self::train_dpo`].
3958
+ */
3959
+ trainSimpo(config: JsSimpoConfig, dataset: PreferenceJsonlDataset, progress?: ProgressTsfn | undefined | null): Promise<TrainedAdapter>
3960
+ /**
3961
+ * Train a `LoRA` adapter via Kahneman-Tversky Optimization (KTO).
3962
+ *
3963
+ * Like DPO, KTO requires a frozen reference model — but the
3964
+ * dataset schema differs: each row is a
3965
+ * `(prompt, completion, desirable)` triple
3966
+ * ([`JsRatedJsonlDataset`]), not a chosen/rejected pair.
3967
+ *
3968
+ * # Errors
3969
+ *
3970
+ * Same surface as [`Self::train_dpo`].
3971
+ */
3972
+ trainKto(config: JsKtoConfig, dataset: RatedJsonlDataset, progress?: ProgressTsfn | undefined | null): Promise<TrainedAdapter>
3973
+ /**
3974
+ * Run a full fine-tune (every parameter trainable; no `LoRA`
3975
+ * adapter).
3976
+ *
3977
+ * Returns [`JsFullFineTuneResult`] — not [`JsTrainedAdapter`] —
3978
+ * because the output is a complete set of model weights in
3979
+ * `config.core.outputDir` rather than a mountable PEFT delta.
3980
+ *
3981
+ * Setting `config.gradientCheckpointing = true` is rejected at
3982
+ * init time because candle 0.10.2 has no activation-checkpointing
3983
+ * primitive.
3984
+ *
3985
+ * # Errors
3986
+ *
3987
+ * Throws on invalid config, unrecognised device,
3988
+ * `gradientCheckpointing = true`, HF download failure, dataset
3989
+ * I/O failure, trainer failure, or queueing failure on the
3990
+ * progress callback.
3991
+ */
3992
+ fineTune(config: JsFullFineTuneConfig, dataset: JsonlDataset, progress?: ProgressTsfn | undefined | null): Promise<FullFineTuneResult>
3427
3993
  }
3428
3994
  export type JsModelManager = ModelManager
3429
3995
 
@@ -3460,6 +4026,137 @@ export declare class ModelRegistry {
3460
4026
  }
3461
4027
  export type JsModelRegistry = ModelRegistry
3462
4028
 
4029
+ /**
4030
+ * Typed handle wrapping a `MusicGen` text-to-music backend.
4031
+ *
4032
+ * Mirrors [`blazen_llm::MusicBackendHandle`]. Construct it directly to
4033
+ * get a default-configured `MusicGen` handle; weights load lazily on
4034
+ * first generation.
4035
+ */
4036
+ export declare class MusicBackendHandle {
4037
+ /** Build a default-configured `MusicGen` music backend handle. */
4038
+ constructor()
4039
+ /** The wrapped backend's stable identifier. */
4040
+ get id(): string
4041
+ }
4042
+ export type JsMusicBackendHandle = MusicBackendHandle
4043
+
4044
+ /**
4045
+ * MusicGen text-to-music + text-to-SFX backend.
4046
+ *
4047
+ * Use the [`JsMusicgenBackend::create`] factory to construct an instance.
4048
+ */
4049
+ export declare class MusicgenBackend {
4050
+ /**
4051
+ * Construct a MusicGen backend handle.
4052
+ *
4053
+ * # Errors
4054
+ * Returns the resulting `napi::Error` if option conversion fails;
4055
+ * in practice always succeeds.
4056
+ */
4057
+ static create(options?: JsMusicgenOptions | undefined | null): MusicgenBackend
4058
+ /** Backend identifier, e.g. `"musicgen-small"`. */
4059
+ get modelId(): string
4060
+ /**
4061
+ * Generate music conditioned on `prompt`.
4062
+ *
4063
+ * # Errors
4064
+ * Returns `MusicInvalidInputError` for empty prompts or non-positive
4065
+ * / out-of-range durations, `MusicHfHubError` on weight-download
4066
+ * failure, `MusicCandleError` on inference failure, or
4067
+ * `MusicEngineNotAvailableError` when the engine feature was
4068
+ * compiled out.
4069
+ */
4070
+ generateMusic(prompt: string, durationSeconds: number): Promise<JsMusicResult>
4071
+ /**
4072
+ * Generate sound-effect audio conditioned on `prompt`.
4073
+ *
4074
+ * MusicGen treats music and SFX as the same autoregressive pipeline
4075
+ * (the prompt is the only discriminator).
4076
+ *
4077
+ * # Errors
4078
+ * Same surface as [`Self::generate_music`].
4079
+ */
4080
+ generateSfx(prompt: string, durationSeconds: number): Promise<JsMusicResult>
4081
+ /**
4082
+ * Stream music generation, invoking `onChunk` for each emitted
4083
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
4084
+ *
4085
+ * # Errors
4086
+ * Same surface as [`Self::generate_music`].
4087
+ */
4088
+ streamGenerateMusic(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
4089
+ /**
4090
+ * Stream SFX generation, invoking `onChunk` for each emitted
4091
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
4092
+ *
4093
+ * # Errors
4094
+ * Same surface as [`Self::generate_music`].
4095
+ */
4096
+ streamGenerateSfx(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
4097
+ }
4098
+ export type JsMusicgenBackend = MusicgenBackend
4099
+
4100
+ /**
4101
+ * Unified music + SFX backend aggregator.
4102
+ *
4103
+ * ```javascript
4104
+ * // Pick a backend at construction time:
4105
+ * const m = MusicModel.musicgen({ variant: "small" });
4106
+ * const wav = await m.generateMusic("uplifting piano", 8);
4107
+ *
4108
+ * // Or swap to AudioGen / Stable Audio with the same method surface:
4109
+ * const sfx = MusicModel.audioGen({});
4110
+ * const ambient = MusicModel.stableAudio({});
4111
+ * ```
4112
+ */
4113
+ export declare class MusicModel {
4114
+ /** Build a [`JsMusicModel`] backed by MusicGen. */
4115
+ static musicgen(options?: JsMusicgenOptions | undefined | null): MusicModel
4116
+ /** Build a [`JsMusicModel`] backed by AudioGen. */
4117
+ static audioGen(options?: JsAudioGenOptions | undefined | null): MusicModel
4118
+ /** Build a [`JsMusicModel`] backed by Stable Audio Open. */
4119
+ static stableAudio(options?: JsStableAudioOptions | undefined | null): MusicModel
4120
+ /**
4121
+ * Backend identifier — same value `modelId` returns on the per-
4122
+ * backend `#[napi]` class (e.g. `"musicgen-small"`,
4123
+ * `"audiogen-medium"`, `"stable-audio"`).
4124
+ */
4125
+ get modelId(): string
4126
+ /**
4127
+ * Generate music conditioned on `prompt`.
4128
+ *
4129
+ * # Errors
4130
+ * See per-backend documentation
4131
+ * ([`JsMusicgenBackend::generate_music`], etc.).
4132
+ */
4133
+ generateMusic(prompt: string, durationSeconds: number): Promise<JsMusicResult>
4134
+ /**
4135
+ * Generate sound-effect audio conditioned on `prompt`.
4136
+ *
4137
+ * # Errors
4138
+ * See per-backend documentation.
4139
+ */
4140
+ generateSfx(prompt: string, durationSeconds: number): Promise<JsMusicResult>
4141
+ /**
4142
+ * Stream music generation, invoking `onChunk` for each emitted
4143
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
4144
+ *
4145
+ * # Errors
4146
+ * See per-backend documentation.
4147
+ */
4148
+ streamGenerateMusic(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
4149
+ /**
4150
+ * Stream SFX generation, invoking `onChunk` for each emitted
4151
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
4152
+ *
4153
+ * # Errors
4154
+ * See per-backend documentation.
4155
+ */
4156
+ streamGenerateSfx(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
4157
+ }
4158
+ export type JsMusicModel = MusicModel
4159
+
3463
4160
  /**
3464
4161
  * r" Base class for music generation providers.
3465
4162
  * r"
@@ -3490,7 +4187,7 @@ export type JsMusicProvider = MusicProvider
3490
4187
  * Useful as a default when no downstream observer is wired up:
3491
4188
  *
3492
4189
  * ```javascript
3493
- * const model = new UsageRecordingCompletionModel(base, new NoopUsageEmitter(), "openai");
4190
+ * const model = new UsageRecordingModel(base, new NoopUsageEmitter(), "openai");
3494
4191
  * ```
3495
4192
  */
3496
4193
  export declare class NoopUsageEmitter {
@@ -3534,13 +4231,13 @@ export declare class OpenAiCompatProvider {
3534
4231
  /** Get the model ID. */
3535
4232
  get modelId(): string
3536
4233
  /** Perform a chat completion. */
3537
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
4234
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
3538
4235
  /** Perform a chat completion with additional options. */
3539
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
4236
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
3540
4237
  /** Stream a chat completion. */
3541
4238
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
3542
4239
  /** Stream a chat completion with additional options. */
3543
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
4240
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
3544
4241
  }
3545
4242
  export type JsOpenAiCompatProvider = OpenAiCompatProvider
3546
4243
 
@@ -3572,7 +4269,7 @@ export type JsOpenAiEmbeddingModel = OpenAiEmbeddingModel
3572
4269
  * An `OpenAI` compute provider exposing text-to-speech.
3573
4270
  *
3574
4271
  * For chat completions and embeddings, use
3575
- * [`CompletionModel.openai`](crate::providers::completion_model::JsCompletionModel::openai)
4272
+ * [`Model.openai`](crate::providers::model::JsModel::openai)
3576
4273
  * instead — this class is the standalone entry point for the compute
3577
4274
  * capabilities (currently text-to-speech) that the `OpenAI` provider
3578
4275
  * implements directly.
@@ -3607,13 +4304,13 @@ export declare class OpenRouterProvider {
3607
4304
  /** Get the model ID. */
3608
4305
  get modelId(): string
3609
4306
  /** Perform a chat completion. */
3610
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
4307
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
3611
4308
  /** Perform a chat completion with additional options. */
3612
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
4309
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
3613
4310
  /** Stream a chat completion. */
3614
4311
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
3615
4312
  /** Stream a chat completion with additional options. */
3616
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
4313
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
3617
4314
  }
3618
4315
  export type JsOpenRouterProvider = OpenRouterProvider
3619
4316
 
@@ -3733,13 +4430,13 @@ export declare class PerplexityProvider {
3733
4430
  /** Get the model ID. */
3734
4431
  get modelId(): string
3735
4432
  /** Perform a chat completion. */
3736
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
4433
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
3737
4434
  /** Perform a chat completion with additional options. */
3738
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
4435
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
3739
4436
  /** Stream a chat completion. */
3740
4437
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
3741
4438
  /** Stream a chat completion with additional options. */
3742
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
4439
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
3743
4440
  }
3744
4441
  export type JsPerplexityProvider = PerplexityProvider
3745
4442
 
@@ -3750,6 +4447,15 @@ export declare class Pipeline {
3750
4447
  * Consumes the pipeline -- calling start/resume a second time errors.
3751
4448
  */
3752
4449
  start(input: any): Promise<PipelineHandler>
4450
+ /**
4451
+ * Execute the pipeline and await its final result in one call.
4452
+ *
4453
+ * This is the result-shorthand mirror of [`crate::workflow::JsWorkflow::run`]:
4454
+ * equivalent to `(await pipeline.start(input)).result()`, but without
4455
+ * exposing the intermediate handler. Consumes the pipeline -- calling
4456
+ * `run`/`start`/`resume` a second time errors.
4457
+ */
4458
+ run(input: any): Promise<JsPipelineResult>
3753
4459
  /**
3754
4460
  * Inspect the pipeline-level default retry configuration, if any.
3755
4461
  * Mirrors [`blazen_pipeline::Pipeline::retry_config`] (Wave 2).
@@ -3771,6 +4477,14 @@ export declare class PipelineBuilder {
3771
4477
  stage(stage: JsStage): this
3772
4478
  /** Append a `ParallelStage` to the pipeline. */
3773
4479
  parallel(parallel: JsParallelStage): this
4480
+ /**
4481
+ * Append a `LoopStage` to the pipeline.
4482
+ *
4483
+ * The loop re-runs its inner stage up to `maxIterations` times (see the
4484
+ * [`LoopStage`](crate::pipeline::loop_stage::JsLoopStage) docs for the v1
4485
+ * iteration semantics). The `loopStage` instance is consumed.
4486
+ */
4487
+ loopStage(loopStage: JsLoopStage): this
3774
4488
  /** Set a per-stage timeout in seconds. Each stage's workflow gets this duration. */
3775
4489
  timeoutPerStage(seconds: number): this
3776
4490
  /**
@@ -3845,6 +4559,36 @@ export declare class PipelineHandler {
3845
4559
  * Returns `null` after [`Self::result`] has consumed the handler.
3846
4560
  */
3847
4561
  progress(): Promise<JsProgressSnapshot | null>
4562
+ /**
4563
+ * Respond to an input request from a stage that is paused on an
4564
+ * `InputRequestEvent`. The response is fanned out to the active
4565
+ * stage's inner workflow(s). Mirrors
4566
+ * [`blazen_pipeline::PipelineHandler::respond_to_input`].
4567
+ *
4568
+ * The `request_id` must match the `InputRequestEvent.request_id` that
4569
+ * was published by a step inside the running stage.
4570
+ */
4571
+ respondToInput(requestId: string, response: any): Promise<void>
4572
+ /**
4573
+ * Respond to an input request using a typed [`JsInputResponseEvent`].
4574
+ *
4575
+ * Equivalent to [`Self::respond_to_input`] but accepts the typed
4576
+ * event object so JS callers can pass a single value already shaped
4577
+ * like the input-response event they may have built earlier.
4578
+ */
4579
+ respondToInputTyped(event: JsInputResponseEvent): Promise<void>
4580
+ /**
4581
+ * Aggregated token usage across the pipeline run so far. Mirrors
4582
+ * [`blazen_pipeline::PipelineHandler::usage_total`]. Returns `null`
4583
+ * after the handler has been consumed by [`Self::result`].
4584
+ */
4585
+ usageTotal(): Promise<JsTokenUsageClass | null>
4586
+ /**
4587
+ * Aggregated cost in USD across the pipeline run so far. Mirrors
4588
+ * [`blazen_pipeline::PipelineHandler::cost_total_usd`]. Returns `null`
4589
+ * after the handler has been consumed by [`Self::result`].
4590
+ */
4591
+ costTotalUsd(): Promise<number | null>
3848
4592
  /** Abort the pipeline. */
3849
4593
  abort(): Promise<void>
3850
4594
  /**
@@ -3852,7 +4596,7 @@ export declare class PipelineHandler {
3852
4596
  * The callback `(eventJson) => void` is invoked for each `PipelineEvent`;
3853
4597
  * `eventJson` is a JS object with shape `{ stageName, branchName, workflowRunId, event }`.
3854
4598
  */
3855
- streamEvents(onEvent: StreamCallbackTsfn): Promise<void>
4599
+ streamEvents(onEvent: (event: { stageName: string; branchName: string; workflowRunId: string; event: Event }) => void): Promise<void>
3856
4600
  }
3857
4601
  export type JsPipelineHandler = PipelineHandler
3858
4602
 
@@ -3891,26 +4635,25 @@ export declare class PipelineSnapshot {
3891
4635
  export type JsPipelineSnapshot = PipelineSnapshot
3892
4636
 
3893
4637
  /**
3894
- * A local Piper TTS provider.
4638
+ * Preference-pair JSONL dataset for DPO / ORPO / SimPO.
3895
4639
  *
3896
- * ```javascript
3897
- * const provider = PiperProvider.create({
3898
- * modelId: "en_US-amy-medium",
3899
- * });
3900
- * ```
4640
+ * Each line of the input file must deserialize to either
4641
+ * `{"prompt": "...", "chosen": "...", "rejected": "..."}` or
4642
+ * `{"messages": [...], "chosen": "...", "rejected": "..."}` (chat shape).
3901
4643
  */
3902
- export declare class PiperProvider {
3903
- /** Create a new Piper provider. */
3904
- static create(options?: JsPiperOptions | undefined | null): PiperProvider
3905
- /** Get the configured voice model identifier, if any. */
3906
- get modelId(): string | null
4644
+ export declare class PreferenceJsonlDataset {
3907
4645
  /**
3908
- * Whether the engine feature is compiled in. When `false`,
3909
- * synthesis methods will return errors.
4646
+ * Load a preference JSONL file using the tokenizer at
4647
+ * `tokenizerPath`.
4648
+ *
4649
+ * # Errors
4650
+ *
4651
+ * Throws if the tokenizer cannot be loaded, the device string is
4652
+ * invalid, or the JSONL file fails to parse.
3910
4653
  */
3911
- get engineAvailable(): boolean
4654
+ static fromPath(path: string, tokenizerPath: string, opts?: JsJsonlDatasetOptions | undefined | null): PreferenceJsonlDataset
3912
4655
  }
3913
- export type JsPiperProvider = PiperProvider
4656
+ export type JsPreferenceJsonlDataset = PreferenceJsonlDataset
3914
4657
 
3915
4658
  /**
3916
4659
  * Subclassable base for download progress callbacks.
@@ -4109,6 +4852,66 @@ export declare class PromptTemplate {
4109
4852
  }
4110
4853
  export type JsPromptTemplate = PromptTemplate
4111
4854
 
4855
+ /**
4856
+ * Completion-role provider defaults: system prompt, default tools,
4857
+ * `responseFormat`, and a typed `beforeModel` hook.
4858
+ *
4859
+ * ```javascript
4860
+ * import { BaseProviderDefaults, ProviderDefaults } from "blazen";
4861
+ *
4862
+ * const d = new ProviderDefaults(
4863
+ * new BaseProviderDefaults(),
4864
+ * "Be terse.",
4865
+ * [], // default tools
4866
+ * { type: "json_object" },
4867
+ * async (request) => { /* mutate request *\/ },
4868
+ * );
4869
+ * ```
4870
+ */
4871
+ export declare class ProviderDefaults {
4872
+ /** Construct completion-role defaults. */
4873
+ constructor(base?: BaseProviderDefaults | undefined | null, systemPrompt?: string | undefined | null, tools?: Array<JsToolDefinition> | undefined | null, responseFormat?: any | undefined | null, beforeModel?: BeforeModelTsfn | undefined | null)
4874
+ /**
4875
+ * The system prompt prepended to requests when the request itself
4876
+ * carries no system message.
4877
+ */
4878
+ get systemPrompt(): string | null
4879
+ /** Replace the system prompt. Pass `null` to clear. */
4880
+ set systemPrompt(value: string | undefined | null)
4881
+ /** The default tools appended to every completion request. */
4882
+ get tools(): Array<JsToolDefinition>
4883
+ /** Replace the default tools. */
4884
+ set tools(value: Array<JsToolDefinition> | undefined | null)
4885
+ /** Default `response_format` (JSON Schema or similar object). */
4886
+ get responseFormat(): any | null
4887
+ /** Replace the default `responseFormat`. Pass `null` to clear. */
4888
+ set responseFormat(value: any | undefined | null)
4889
+ /** Returns `true` when a `beforeModel` hook is configured. */
4890
+ get hasBeforeCompletion(): boolean
4891
+ /** Replace the typed `beforeModel` hook. Pass `null` to clear. */
4892
+ set beforeModel(hook: BeforeModelTsfn | undefined | null)
4893
+ }
4894
+ export type JsProviderDefaults = ProviderDefaults
4895
+
4896
+ /**
4897
+ * Rated JSONL dataset for KTO.
4898
+ *
4899
+ * Each line of the input file must deserialize to
4900
+ * `{"prompt"|"messages": ..., "completion": "...", "label": true|false}`.
4901
+ */
4902
+ export declare class RatedJsonlDataset {
4903
+ /**
4904
+ * Load a rated JSONL file using the tokenizer at `tokenizerPath`.
4905
+ *
4906
+ * # Errors
4907
+ *
4908
+ * Throws if the tokenizer cannot be loaded, the device string is
4909
+ * invalid, or the JSONL file fails to parse.
4910
+ */
4911
+ static fromPath(path: string, tokenizerPath: string, opts?: JsJsonlDatasetOptions | undefined | null): RatedJsonlDataset
4912
+ }
4913
+ export type JsRatedJsonlDataset = RatedJsonlDataset
4914
+
4112
4915
  /**
4113
4916
  * Class wrapper around [`ReasoningTrace`].
4114
4917
  *
@@ -4182,67 +4985,25 @@ export declare class RegistryKey {
4182
4985
  * Alias for [`Self::render_uuid`] exposed as a getter so JS can do
4183
4986
  * `key.uuid` in addition to `key.toString()`.
4184
4987
  */
4185
- get uuid(): string
4186
- }
4187
- export type JsRegistryKey = RegistryKey
4188
-
4189
- /** Class wrapper around [`RequestTiming`]. */
4190
- export declare class RequestTiming {
4191
- /** Construct a request-timing instance. */
4192
- constructor(options: RequestTimingOptions)
4193
- get queueMs(): number | null
4194
- get executionMs(): number | null
4195
- get totalMs(): number | null
4196
- }
4197
- export type JsRequestTimingClass = RequestTiming
4198
-
4199
- /**
4200
- * A completion model that retries transient failures with exponential
4201
- * backoff.
4202
- *
4203
- * ```javascript
4204
- * const model = new RetryCompletionModel(
4205
- * CompletionModel.openrouter(),
4206
- * { maxRetries: 5, initialDelayMs: 500 },
4207
- * );
4208
- * const response = await model.complete([ChatMessage.user("hi")]);
4209
- * ```
4210
- */
4211
- export declare class RetryCompletionModel {
4212
- /**
4213
- * Wrap `model` with retry-on-transient-error behaviour.
4214
- *
4215
- * `config` defaults to [`RetryConfig::default()`] (3 retries, 1s
4216
- * initial delay, 30s cap, jitter on, `Retry-After` honoured) when
4217
- * omitted.
4218
- */
4219
- constructor(model: CompletionModel, config?: JsRetryConfig | undefined | null)
4220
- /** The wrapped model's id. */
4221
- get modelId(): string
4222
- /** Perform a chat completion with automatic retries. */
4223
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
4224
- /** Perform a chat completion with options and automatic retries. */
4225
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
4226
- /**
4227
- * Stream a chat completion. Retries the initial request on transient
4228
- * failures; mid-stream errors are not retried.
4229
- */
4230
- stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
4231
- /** Stream a chat completion with options. */
4232
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
4233
- /**
4234
- * Convert this retry wrapper into a plain [`JsCompletionModel`] so
4235
- * it can be passed to APIs that expect the base type.
4236
- */
4237
- toCompletionModel(): CompletionModel
4988
+ get uuid(): string
4989
+ }
4990
+ export type JsRegistryKey = RegistryKey
4991
+
4992
+ /** Class wrapper around [`RequestTiming`]. */
4993
+ export declare class RequestTiming {
4994
+ /** Construct a request-timing instance. */
4995
+ constructor(options: RequestTimingOptions)
4996
+ get queueMs(): number | null
4997
+ get executionMs(): number | null
4998
+ get totalMs(): number | null
4238
4999
  }
4239
- export type JsRetryCompletionModel = RetryCompletionModel
5000
+ export type JsRequestTimingClass = RequestTiming
4240
5001
 
4241
5002
  /**
4242
5003
  * A `MemoryBackend` decorator that retries transient errors with
4243
5004
  * exponential backoff.
4244
5005
  *
4245
- * Mirrors `RetryCompletionModel` for `MemoryBackend`. Use one of the
5006
+ * Mirrors `RetryModel` for `MemoryBackend`. Use one of the
4246
5007
  * `wrapInMemory` / `wrapJsonl` / `wrapValkey` factories to wrap the
4247
5008
  * matching backend.
4248
5009
  *
@@ -4271,7 +5032,7 @@ export type JsRetryMemoryBackend = RetryMemoryBackend
4271
5032
  /**
4272
5033
  * Built-in middleware that wraps the inner model with retry-on-transient-
4273
5034
  * error behaviour. Equivalent to constructing a
4274
- * [`super::wrappers::JsRetryCompletionModel`] but composable inside a
5035
+ * [`super::wrappers::JsRetryModel`] but composable inside a
4275
5036
  * [`JsMiddlewareStack`].
4276
5037
  *
4277
5038
  * ```javascript
@@ -4294,6 +5055,115 @@ export declare class RetryMiddleware {
4294
5055
  }
4295
5056
  export type JsRetryMiddleware = RetryMiddleware
4296
5057
 
5058
+ /**
5059
+ * A completion model that retries transient failures with exponential
5060
+ * backoff.
5061
+ *
5062
+ * ```javascript
5063
+ * const model = new RetryModel(
5064
+ * Model.openrouter(),
5065
+ * { maxRetries: 5, initialDelayMs: 500 },
5066
+ * );
5067
+ * const response = await model.complete([ChatMessage.user("hi")]);
5068
+ * ```
5069
+ */
5070
+ export declare class RetryModel {
5071
+ /**
5072
+ * Wrap `model` with retry-on-transient-error behaviour.
5073
+ *
5074
+ * `config` defaults to [`RetryConfig::default()`] (3 retries, 1s
5075
+ * initial delay, 30s cap, jitter on, `Retry-After` honoured) when
5076
+ * omitted.
5077
+ */
5078
+ constructor(model: Model, config?: JsRetryConfig | undefined | null)
5079
+ /** The wrapped model's id. */
5080
+ get modelId(): string
5081
+ /** Perform a chat completion with automatic retries. */
5082
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
5083
+ /** Perform a chat completion with options and automatic retries. */
5084
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
5085
+ /**
5086
+ * Stream a chat completion. Retries the initial request on transient
5087
+ * failures; mid-stream errors are not retried.
5088
+ */
5089
+ stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
5090
+ /** Stream a chat completion with options. */
5091
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
5092
+ /**
5093
+ * Convert this retry wrapper into a plain [`JsModel`] so
5094
+ * it can be passed to APIs that expect the base type.
5095
+ */
5096
+ toModel(): Model
5097
+ }
5098
+ export type JsRetryModel = RetryModel
5099
+
5100
+ /**
5101
+ * Retrieval-based Voice Conversion backend.
5102
+ *
5103
+ * Use the [`JsRvcBackend::create`] factory to construct an instance.
5104
+ */
5105
+ export declare class RvcBackend {
5106
+ /** Construct an RVC backend handle. */
5107
+ static create(options?: JsRvcOptions | undefined | null): RvcBackend
5108
+ /** Backend identifier, always `"rvc"`. */
5109
+ get modelId(): string
5110
+ /**
5111
+ * Convert a source utterance to the voice of a registered target
5112
+ * speaker, returning the rendered audio as a self-describing WAV
5113
+ * payload + parsed sample-rate / duration metadata.
5114
+ *
5115
+ * # Errors
5116
+ * Returns `VcVoiceNotFoundError` when `targetVoiceId` is not
5117
+ * registered, `VcIoError` on file-read failures, `VcModelLoadError`
5118
+ * on weight-load failures, `VcConversionError` on inference
5119
+ * failures, or `VcEngineNotAvailableError` when the engine
5120
+ * feature was compiled out.
5121
+ */
5122
+ convertVoice(inputAudioPath: string, targetVoiceId: string): Promise<JsVcResult>
5123
+ /**
5124
+ * Stream voice conversion over an in-memory PCM buffer, invoking
5125
+ * `onChunk` for each emitted [`crate::vc::JsVcChunk`] until the
5126
+ * stream ends (the last chunk arrives with `isFinal === true`).
5127
+ *
5128
+ * The input samples are wrapped in a single-item stream and fed to
5129
+ * the backend's chunked streaming entry point; the backend
5130
+ * internally buffers windows (typically 2 seconds at 16 kHz) and
5131
+ * emits the converted PCM at the target voice's native sample
5132
+ * rate.
5133
+ *
5134
+ * # Errors
5135
+ * Same surface as [`Self::convert_voice`]; additionally surfaces
5136
+ * `VcUnsupportedError` from a backend that does not support
5137
+ * streaming (the default-impl path).
5138
+ */
5139
+ streamConvertPcm(inputSamples: Float32Array, targetVoiceId: string, onChunk: StreamVcChunkCallbackTsfn): Promise<void>
5140
+ /**
5141
+ * List the target voices this backend can currently render.
5142
+ *
5143
+ * # Errors
5144
+ * Returns `VcUnsupportedError` from backends that don't expose a
5145
+ * voice catalogue; `VcIoError` when probing the voice directory
5146
+ * fails.
5147
+ */
5148
+ listTargetVoices(): Promise<Array<JsTargetVoice>>
5149
+ /**
5150
+ * Register a new target voice from a reference utterance.
5151
+ *
5152
+ * RVC voice registration is intentionally unsupported at runtime
5153
+ * (training a voice profile requires an offline pipeline of 1+
5154
+ * hours); this method therefore surfaces
5155
+ * `VcUnsupportedError`. Pre-trained voice profiles can be placed
5156
+ * under `$BLAZEN_RVC_VOICE_DIR/<voice_id>/` and will surface
5157
+ * through [`Self::list_target_voices`] / [`Self::convert_voice`].
5158
+ *
5159
+ * # Errors
5160
+ * Returns `VcUnsupportedError` from RVC; other backends may
5161
+ * override this with a real implementation.
5162
+ */
5163
+ registerTargetVoice(voiceId: string, referenceAudioPath: string): Promise<void>
5164
+ }
5165
+ export type JsRvcBackend = RvcBackend
5166
+
4297
5167
  /**
4298
5168
  * Namespace for in-process-only workflow values.
4299
5169
  *
@@ -4408,6 +5278,83 @@ export declare class SessionRefRegistry {
4408
5278
  }
4409
5279
  export type JsSessionRefRegistry = SessionRefRegistry
4410
5280
 
5281
+ /**
5282
+ * The Spark-TTS backend.
5283
+ *
5284
+ * Mirrors [`blazen_llm::SparkTtsBackend`]. Construct with an optional
5285
+ * [`SparkTtsConfig`](JsSparkTtsConfig); weights load lazily on first
5286
+ * synthesis.
5287
+ */
5288
+ export declare class SparkTtsBackend {
5289
+ /**
5290
+ * Build a Spark-TTS backend. No weights are loaded until the first
5291
+ * synthesis call.
5292
+ */
5293
+ constructor(config?: SparkTtsConfig | undefined | null)
5294
+ /** The configured model id. */
5295
+ get modelId(): string
5296
+ /** Wrap this backend in a typed [`TtsBackendHandle`]. */
5297
+ intoHandle(): JsTtsBackendHandle
5298
+ }
5299
+ export type JsSparkTtsBackend = SparkTtsBackend
5300
+
5301
+ /**
5302
+ * Stable Audio Open backend.
5303
+ *
5304
+ * Use the [`JsStableAudioBackend::create`] factory to construct an
5305
+ * instance. In stub mode (feature `audio-music-stable-audio` OFF), every
5306
+ * `generate*` entry point surfaces `MusicNotYetImplementedError`.
5307
+ */
5308
+ export declare class StableAudioBackend {
5309
+ /**
5310
+ * Construct a Stable Audio backend handle.
5311
+ *
5312
+ * In stub mode (`audio-music-stable-audio` OFF), the returned
5313
+ * handle's `generate*` calls all surface
5314
+ * `MusicNotYetImplementedError`. With the feature ON, the first
5315
+ * `generate*` call lazily downloads weights and loads the model.
5316
+ */
5317
+ static create(options?: JsStableAudioOptions | undefined | null): StableAudioBackend
5318
+ /** Backend identifier, always `"stable-audio"`. */
5319
+ get modelId(): string
5320
+ /**
5321
+ * Generate music conditioned on `prompt`.
5322
+ *
5323
+ * # Errors
5324
+ * Returns `MusicNotYetImplementedError` in stub mode (feature
5325
+ * `audio-music-stable-audio` OFF). With the feature ON, may return
5326
+ * `MusicInvalidInputError`, `MusicHfHubError`, or `MusicCandleError`.
5327
+ */
5328
+ generateMusic(prompt: string, durationSeconds: number): Promise<JsMusicResult>
5329
+ /**
5330
+ * Generate sound-effect audio conditioned on `prompt`.
5331
+ *
5332
+ * # Errors
5333
+ * Same surface as [`Self::generate_music`].
5334
+ */
5335
+ generateSfx(prompt: string, durationSeconds: number): Promise<JsMusicResult>
5336
+ /**
5337
+ * Stream music generation, invoking `onChunk` for each emitted
5338
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
5339
+ *
5340
+ * # Errors
5341
+ * Same surface as [`Self::generate_music`]. In stub mode (without
5342
+ * the streaming path on the upstream trait), this surfaces
5343
+ * `MusicNotYetImplementedError` because the trait default
5344
+ * implementation routes there.
5345
+ */
5346
+ streamGenerateMusic(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
5347
+ /**
5348
+ * Stream SFX generation, invoking `onChunk` for each emitted
5349
+ * `JsMusicChunk` until the final chunk arrives (`isFinal === true`).
5350
+ *
5351
+ * # Errors
5352
+ * Same surface as [`Self::stream_generate_music`].
5353
+ */
5354
+ streamGenerateSfx(prompt: string, durationSeconds: number, onChunk: StreamMusicChunkCallbackTsfn): Promise<void>
5355
+ }
5356
+ export type JsStableAudioBackend = StableAudioBackend
5357
+
4411
5358
  /**
4412
5359
  * A single sequential pipeline stage.
4413
5360
  *
@@ -4614,9 +5561,9 @@ export type JsStepDeserializerRegistry = StepDeserializerRegistry
4614
5561
  */
4615
5562
  export declare class StepOutput {
4616
5563
  /** Construct a single-event output. */
4617
- static single(event: any): StepOutput
5564
+ static single(event: Event): StepOutput
4618
5565
  /** Construct a fan-out output from an array of events. */
4619
- static multiple(events: Array<any>): StepOutput
5566
+ static multiple(events: Array<Event>): StepOutput
4620
5567
  /** Construct a no-output result (side-effect only). */
4621
5568
  static none(): StepOutput
4622
5569
  /** Active variant tag. */
@@ -4699,7 +5646,7 @@ export type JsStopEventClass = StopEvent
4699
5646
  * Base class for the structured-output extraction surface.
4700
5647
  *
4701
5648
  * Mirrors [`blazen_llm::traits::StructuredOutput`]. Most callers should
4702
- * use [`crate::providers::JsCompletionModel`]'s built-in structured
5649
+ * use [`crate::providers::JsModel`]'s built-in structured
4703
5650
  * output (every completion model supports it via the blanket impl);
4704
5651
  * this class exists so users can write a custom `extract` that does
4705
5652
  * something different (e.g. multi-pass extraction, retries, custom
@@ -4716,6 +5663,100 @@ export declare class StructuredOutput {
4716
5663
  }
4717
5664
  export type JsStructuredOutput = StructuredOutput
4718
5665
 
5666
+ /**
5667
+ * Typed handle wrapping a faster-whisper STT backend.
5668
+ *
5669
+ * Mirrors [`blazen_llm::SttBackendHandle`]. Obtain one from
5670
+ * [`FasterWhisperBackend.intoHandle`](JsFasterWhisperBackend::into_handle).
5671
+ */
5672
+ export declare class SttBackendHandle {
5673
+ /** The wrapped backend's stable identifier. */
5674
+ get id(): string
5675
+ /** The wrapped backend's capability tag. */
5676
+ get providerKind(): string
5677
+ /** Load the wrapped backend's weights. */
5678
+ load(): Promise<void>
5679
+ }
5680
+ export type JsSttBackendHandle = SttBackendHandle
5681
+
5682
+ /**
5683
+ * A user-defined child runner embeddable inside a parent `Workflow`.
5684
+ *
5685
+ * Subclass `SubExecutable` and override `execute(input)` to run an opaque
5686
+ * JSON payload to completion, returning the terminal JSON value. The
5687
+ * resulting object can be embedded as a step via `SubPipelineStep`'s
5688
+ * `fromExecutable` factory.
5689
+ *
5690
+ * ```typescript
5691
+ * import { SubExecutable, SubPipelineStep, Workflow } from "blazen";
5692
+ *
5693
+ * class Doubler extends SubExecutable {
5694
+ * async execute(input) {
5695
+ * return { value: input.value * 2 };
5696
+ * }
5697
+ * }
5698
+ *
5699
+ * const step = SubPipelineStep.fromExecutable(
5700
+ * "double", ["blazen::StartEvent"], ["double::output"], new Doubler(),
5701
+ * );
5702
+ * ```
5703
+ *
5704
+ * Constructing `SubExecutable` directly (without a subclass override)
5705
+ * yields a runner whose `execute` reports an error — override `execute` to
5706
+ * give it behavior.
5707
+ */
5708
+ export declare class SubExecutable {
5709
+ /**
5710
+ * Construct a `SubExecutable`.
5711
+ *
5712
+ * When invoked through a JS subclass that overrides `execute`, the
5713
+ * constructor binds that override and dispatches every Rust
5714
+ * [`SubExecutable::execute`](blazen_core::SubExecutable::execute) call
5715
+ * to it. When invoked directly (no override), `execute` reports an
5716
+ * error until overridden.
5717
+ */
5718
+ constructor()
5719
+ }
5720
+ export type JsSubExecutable = SubExecutable
5721
+
5722
+ /**
5723
+ * A workflow step that delegates to a `Pipeline`.
5724
+ *
5725
+ * The child pipeline is cloned (from a built [`Pipeline`](JsPipeline)) at
5726
+ * construction time and stored as an `Arc<dyn SubExecutable>` so this step
5727
+ * instance can be reused across multiple parent workflows.
5728
+ */
5729
+ export declare class SubPipelineStep {
5730
+ /**
5731
+ * Create a sub-pipeline step.
5732
+ *
5733
+ * `name` / `accepts` / `emits` describe routing. `inner` is the child
5734
+ * pipeline whose stages are run for each parent dispatch. The inner
5735
+ * pipeline is cloned at construction time, so `inner` must not have
5736
+ * been consumed (by `start`/`run`/`resume`) yet and this step instance
5737
+ * can be reused across builders.
5738
+ */
5739
+ constructor(name: string, accepts: Array<string>, emits: Array<string>, inner: Pipeline, timeoutSecs?: number | undefined | null, retryConfig?: JsRetryConfig | undefined | null)
5740
+ /**
5741
+ * Create a sub-pipeline step from any [`SubExecutable`](JsSubExecutable)
5742
+ * child runner.
5743
+ *
5744
+ * Unlike [`new`](Self::new) (which embeds a built `Pipeline`), this
5745
+ * accepts a user-defined `SubExecutable` subclass instance, letting an
5746
+ * arbitrary JS-implemented child runner be embedded inside a parent
5747
+ * `Workflow`. The executable handle is cloned, so the instance can be
5748
+ * reused across builders.
5749
+ */
5750
+ static fromExecutable(name: string, accepts: Array<string>, emits: Array<string>, executable: SubExecutable, timeoutSecs?: number | undefined | null, retryConfig?: JsRetryConfig | undefined | null): SubPipelineStep
5751
+ /** The step name. */
5752
+ get name(): string
5753
+ /** Event type identifiers this step accepts. */
5754
+ get accepts(): Array<string>
5755
+ /** Event type identifiers this step may emit. */
5756
+ get emits(): Array<string>
5757
+ }
5758
+ export type JsSubPipelineStep = SubPipelineStep
5759
+
4719
5760
  /**
4720
5761
  * A workflow step that delegates to another `Workflow`.
4721
5762
  *
@@ -4825,13 +5866,13 @@ export declare class TogetherProvider {
4825
5866
  */
4826
5867
  static embeddingModel(options?: JsProviderOptions | undefined | null): OpenAiCompatEmbeddingModel
4827
5868
  /** Perform a chat completion. */
4828
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
5869
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
4829
5870
  /** Perform a chat completion with additional options. */
4830
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
5871
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
4831
5872
  /** Stream a chat completion. */
4832
5873
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
4833
5874
  /** Stream a chat completion with additional options. */
4834
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
5875
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
4835
5876
  }
4836
5877
  export type JsTogetherProvider = TogetherProvider
4837
5878
 
@@ -5099,6 +6140,42 @@ export declare class TranscriptionProviderDefaults {
5099
6140
  }
5100
6141
  export type JsTranscriptionProviderDefaults = TranscriptionProviderDefaults
5101
6142
 
6143
+ /**
6144
+ * Typed handle wrapping a Spark-TTS backend.
6145
+ *
6146
+ * Mirrors [`blazen_llm::TtsBackendHandle`]. Obtain one from
6147
+ * [`SparkTtsBackend.intoHandle`](JsSparkTtsBackend::into_handle).
6148
+ */
6149
+ export declare class TtsBackendHandle {
6150
+ /** The wrapped backend's stable identifier. */
6151
+ get id(): string
6152
+ }
6153
+ export type JsTtsBackendHandle = TtsBackendHandle
6154
+
6155
+ /**
6156
+ * A local TTS provider backed by `any-tts`.
6157
+ *
6158
+ * ```javascript
6159
+ * const provider = TtsProvider.create({
6160
+ * model: "kokoro82m",
6161
+ * voice: "af_bella",
6162
+ * });
6163
+ * ```
6164
+ */
6165
+ export declare class TtsProvider {
6166
+ /** Create a new TTS provider. */
6167
+ static create(options?: JsTtsOptions | undefined | null): TtsProvider
6168
+ /** The configured model kind, as a string (`"kokoro"`, `"vibevoice"`, `"qwen3_tts"`). */
6169
+ get model(): string
6170
+ /**
6171
+ * Whether the engine feature is compiled in. When the `anytts`
6172
+ * feature is on, this returns `true` — the provider can be
6173
+ * constructed regardless of the runtime model-load outcome.
6174
+ */
6175
+ get engineAvailable(): boolean
6176
+ }
6177
+ export type JsTtsProvider = TtsProvider
6178
+
5102
6179
  /**
5103
6180
  * r" Base class for text-to-speech providers.
5104
6181
  * r"
@@ -5211,7 +6288,7 @@ export type JsUpstashBackend = UpstashBackend
5211
6288
  * ```javascript
5212
6289
  * const events: UsageEvent[] = [];
5213
6290
  * const emitter = new UsageEmitter((event) => { events.push(event); });
5214
- * const model = new UsageRecordingCompletionModel(base, emitter, "openai");
6291
+ * const model = new UsageRecordingModel(base, emitter, "openai");
5215
6292
  * ```
5216
6293
  */
5217
6294
  export declare class UsageEmitter {
@@ -5224,44 +6301,44 @@ export declare class UsageEmitter {
5224
6301
  export type JsUsageEmitter = UsageEmitter
5225
6302
 
5226
6303
  /**
5227
- * A `CompletionModel` decorator that emits a `UsageEvent` after each
6304
+ * An `EmbeddingModel` decorator that emits a `UsageEvent` after each
6305
+ * successful `embed` call.
6306
+ */
6307
+ export declare class UsageRecordingEmbeddingModel {
6308
+ /** Wrap an `EmbeddingModel` with a usage-recording layer. */
6309
+ constructor(model: EmbeddingModel, emitter: AnyEmitter, providerLabel: string, runId?: string | undefined | null)
6310
+ /** The underlying provider's model id. */
6311
+ get modelId(): string
6312
+ /** Output dimensionality. */
6313
+ get dimensions(): number
6314
+ }
6315
+ export type JsUsageRecordingEmbeddingModel = UsageRecordingEmbeddingModel
6316
+
6317
+ /**
6318
+ * A `Model` decorator that emits a `UsageEvent` after each
5228
6319
  * successful `complete` call. Mirrors
5229
- * `blazen_llm::usage_recording::UsageRecordingCompletionModel`.
6320
+ * `blazen_llm::usage_recording::UsageRecordingModel`.
5230
6321
  *
5231
6322
  * ```javascript
5232
- * const base = CompletionModel.openai();
6323
+ * const base = Model.openai();
5233
6324
  * const events = [];
5234
6325
  * const emitter = new UsageEmitter((e) => events.push(e));
5235
- * const model = new UsageRecordingCompletionModel(base, emitter, "openai");
6326
+ * const model = new UsageRecordingModel(base, emitter, "openai");
5236
6327
  * const response = await model.complete([ChatMessage.user("hi")]);
5237
6328
  * ```
5238
6329
  */
5239
- export declare class UsageRecordingCompletionModel {
5240
- /** Wrap a `CompletionModel` with a usage-recording layer. */
5241
- constructor(model: CompletionModel, emitter: AnyEmitter, providerLabel: string, runId?: string | undefined | null)
6330
+ export declare class UsageRecordingModel {
6331
+ /** Wrap a `Model` with a usage-recording layer. */
6332
+ constructor(model: Model, emitter: AnyEmitter, providerLabel: string, runId?: string | undefined | null)
5242
6333
  /** The underlying provider's model id. */
5243
6334
  get modelId(): string
5244
6335
  /**
5245
- * Convert this decorator into a `CompletionModel` so it can be passed to
6336
+ * Convert this decorator into a `Model` so it can be passed to
5246
6337
  * APIs that expect the base type (`runAgent`, further decorators, …).
5247
6338
  */
5248
- toCompletionModel(): CompletionModel
6339
+ toModel(): Model
5249
6340
  }
5250
- export type JsUsageRecordingCompletionModel = UsageRecordingCompletionModel
5251
-
5252
- /**
5253
- * An `EmbeddingModel` decorator that emits a `UsageEvent` after each
5254
- * successful `embed` call.
5255
- */
5256
- export declare class UsageRecordingEmbeddingModel {
5257
- /** Wrap an `EmbeddingModel` with a usage-recording layer. */
5258
- constructor(model: EmbeddingModel, emitter: AnyEmitter, providerLabel: string, runId?: string | undefined | null)
5259
- /** The underlying provider's model id. */
5260
- get modelId(): string
5261
- /** Output dimensionality. */
5262
- get dimensions(): number
5263
- }
5264
- export type JsUsageRecordingEmbeddingModel = UsageRecordingEmbeddingModel
6341
+ export type JsUsageRecordingModel = UsageRecordingModel
5265
6342
 
5266
6343
  /**
5267
6344
  * A Valkey/Redis-backed backend for the memory store.
@@ -5319,6 +6396,87 @@ export declare class ValkeyCheckpointStore {
5319
6396
  }
5320
6397
  export type JsValkeyCheckpointStore = ValkeyCheckpointStore
5321
6398
 
6399
+ /**
6400
+ * Unified voice-conversion backend aggregator.
6401
+ *
6402
+ * ```javascript
6403
+ * // Pick a backend at construction time:
6404
+ * const m = VcModel.rvc({ topK: 8, retrievalBlend: 0.75 });
6405
+ * const result = await m.convertVoice('input.wav', 'speaker-01');
6406
+ * ```
6407
+ */
6408
+ export declare class VcModel {
6409
+ /** Build a [`JsVcModel`] backed by RVC. */
6410
+ static rvc(options?: JsRvcOptions | undefined | null): VcModel
6411
+ /**
6412
+ * Backend identifier — same value `modelId` returns on the per-
6413
+ * backend `#[napi]` class (e.g. `"rvc"`).
6414
+ */
6415
+ get modelId(): string
6416
+ /**
6417
+ * Convert a source utterance to the voice of a registered target
6418
+ * speaker.
6419
+ *
6420
+ * # Errors
6421
+ * See per-backend documentation
6422
+ * ([`JsRvcBackend::convert_voice`]).
6423
+ */
6424
+ convertVoice(inputAudioPath: string, targetVoiceId: string): Promise<JsVcResult>
6425
+ /**
6426
+ * Stream voice conversion over an in-memory PCM buffer.
6427
+ *
6428
+ * # Errors
6429
+ * See per-backend documentation
6430
+ * ([`JsRvcBackend::stream_convert_pcm`]).
6431
+ */
6432
+ streamConvertPcm(inputSamples: Float32Array, targetVoiceId: string, onChunk: StreamVcChunkCallbackTsfn): Promise<void>
6433
+ /**
6434
+ * List the target voices the active backend can currently render.
6435
+ *
6436
+ * # Errors
6437
+ * See per-backend documentation.
6438
+ */
6439
+ listTargetVoices(): Promise<Array<JsTargetVoice>>
6440
+ /**
6441
+ * Register a new target voice from a reference utterance.
6442
+ *
6443
+ * # Errors
6444
+ * See per-backend documentation.
6445
+ */
6446
+ registerTargetVoice(voiceId: string, referenceAudioPath: string): Promise<void>
6447
+ }
6448
+ export type JsVcModel = VcModel
6449
+
6450
+ /**
6451
+ * r" Base class for voice-conversion providers.
6452
+ * r"
6453
+ * r" Mirrors the [`blazen_llm::providers::VcProvider`] capability trait —
6454
+ * r" source utterance + target voice → re-voiced audio, plus voice
6455
+ * r" cloning. Subclass and override `convertVoice()` (and optionally
6456
+ * r" `cloneVoice()`, `listVoices()`, `deleteVoice()`).
6457
+ */
6458
+ export declare class VcProvider {
6459
+ constructor(config: CapabilityProviderConfig)
6460
+ /** The provider identifier. */
6461
+ get providerId(): string | null
6462
+ /** The base URL, if set. */
6463
+ get baseUrl(): string | null
6464
+ /**
6465
+ * Estimated memory footprint in bytes (host RAM if the
6466
+ * provider targets the CPU, GPU VRAM otherwise), if set.
6467
+ */
6468
+ get memoryEstimateBytes(): number | null
6469
+ /** r" Convert the source utterance into the target voice. */
6470
+ convertVoice(request: any): Promise<any>
6471
+ /** r" Clone a voice from reference audio clips. */
6472
+ cloneVoice(request: any): Promise<any>
6473
+ /** r" List all voices known to this provider. */
6474
+ listVoices(): Promise<any>
6475
+ /** r" Delete a previously-cloned voice. */
6476
+ deleteVoice(voice: any): Promise<any>
6477
+ }
6478
+ export type JsVcProvider = VcProvider
6479
+
5322
6480
  /**
5323
6481
  * r" Base class for video generation providers.
5324
6482
  * r"
@@ -5519,6 +6677,28 @@ export declare class Workflow {
5519
6677
  * awkward parallel-arrays signature into a single class instance.
5520
6678
  */
5521
6679
  addParallelSubworkflowsObj(step: ParallelSubWorkflowsStep): void
6680
+ /**
6681
+ * Register a sub-pipeline step that delegates to a `Pipeline`.
6682
+ * Mirrors [`blazen_core::WorkflowBuilder::add_subpipeline_step`] and is
6683
+ * the pipeline analogue of [`Self::add_subworkflow_step`].
6684
+ *
6685
+ * - `name`: human-readable step name.
6686
+ * - `accepts`: array of event type strings this step handles.
6687
+ * - `emits`: array of event type strings this step may emit (informational).
6688
+ * - `inner`: the child `Pipeline` to run (cloned at registration time,
6689
+ * so it must not have been consumed by `start`/`run`/`resume`).
6690
+ * - `timeoutSecs`: optional wall-clock timeout for the child run.
6691
+ * - `retryConfig`: optional retry policy for the child run.
6692
+ */
6693
+ addSubpipelineStep(name: string, accepts: Array<string>, emits: Array<string>, inner: Pipeline, timeoutSecs?: number | undefined | null, retryConfig?: JsRetryConfig | undefined | null): void
6694
+ /**
6695
+ * Register a pre-built [`SubPipelineStep`] wrapper.
6696
+ *
6697
+ * Object-form of [`Self::add_subpipeline_step`]: the same step instance
6698
+ * can be reused across multiple workflows since its inner child pipeline
6699
+ * is captured in `Arc<dyn SubExecutable>` form at construction time.
6700
+ */
6701
+ addSubpipelineStepObj(step: SubPipelineStep): void
5522
6702
  /**
5523
6703
  * Add a step to the workflow.
5524
6704
  *
@@ -5527,7 +6707,7 @@ export declare class Workflow {
5527
6707
  * - `handler`: Async function `(event, ctx) => Event` that processes
5528
6708
  * events and returns the next event.
5529
6709
  */
5530
- addStep(name: string, eventTypes: Array<string>, handler: StepHandlerTsfn): void
6710
+ addStep(name: string, eventTypes: Array<string>, handler: (event: Event, ctx: Context) => Event | Event[] | null | void | Promise<Event | Event[] | null | void>): void
5531
6711
  /**
5532
6712
  * Set the workflow timeout in seconds.
5533
6713
  *
@@ -5549,7 +6729,7 @@ export declare class Workflow {
5549
6729
  *
5550
6730
  * Returns the final result when the workflow completes.
5551
6731
  */
5552
- runStreaming(input: any, onEvent: StreamCallbackTsfn): Promise<JsWorkflowResult>
6732
+ runStreaming(input: any, onEvent: (event: Event) => void): Promise<JsWorkflowResult>
5553
6733
  /**
5554
6734
  * Run the workflow and return a handler object.
5555
6735
  *
@@ -5652,7 +6832,7 @@ export declare class WorkflowBuilder {
5652
6832
  * JavaScript function; the workflow engine routes events whose
5653
6833
  * `type` matches one of `eventTypes` to it.
5654
6834
  */
5655
- addStep(name: string, eventTypes: Array<string>, handler: StepHandlerTsfn): this
6835
+ addStep(name: string, eventTypes: Array<string>, handler: (event: Event, ctx: Context) => Event | Event[] | null | void | Promise<Event | Event[] | null | void>): this
5656
6836
  /**
5657
6837
  * Set the workflow timeout in seconds. A non-positive value
5658
6838
  * disables the timeout (equivalent to [`Self::no_timeout`]).
@@ -5838,7 +7018,7 @@ export declare class WorkflowHandler {
5838
7018
  * are captured. Subsequent calls subscribe a fresh stream that
5839
7019
  * starts from the current point in time.
5840
7020
  */
5841
- streamEvents(onEvent: StreamCallbackTsfn): Promise<void>
7021
+ streamEvents(onEvent: (event: Event) => void): Promise<void>
5842
7022
  }
5843
7023
  export type JsWorkflowHandler = WorkflowHandler
5844
7024
 
@@ -5937,16 +7117,36 @@ export declare class XaiProvider {
5937
7117
  /** Get the model ID. */
5938
7118
  get modelId(): string
5939
7119
  /** Perform a chat completion. */
5940
- complete(messages: Array<JsChatMessage>): Promise<JsCompletionResponse>
7120
+ complete(messages: Array<JsChatMessage>): Promise<JsModelResponse>
5941
7121
  /** Perform a chat completion with additional options. */
5942
- completeWithOptions(messages: Array<JsChatMessage>, options: JsCompletionOptions): Promise<JsCompletionResponse>
7122
+ completeWithOptions(messages: Array<JsChatMessage>, options: JsModelOptions): Promise<JsModelResponse>
5943
7123
  /** Stream a chat completion. */
5944
7124
  stream(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn): Promise<void>
5945
7125
  /** Stream a chat completion with additional options. */
5946
- streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsCompletionOptions): Promise<void>
7126
+ streamWithOptions(messages: Array<JsChatMessage>, onChunk: StreamChunkCallbackTsfn, options: JsModelOptions): Promise<void>
5947
7127
  }
5948
7128
  export type JsXaiProvider = XaiProvider
5949
7129
 
7130
+ /**
7131
+ * Caller-supplied options when mounting a `LoRA` adapter via
7132
+ * [`JsModelManager::load_adapter`].
7133
+ *
7134
+ * Mirrors [`blazen_llm::AdapterOptions`]; `scale` is optional and
7135
+ * defaults to `1.0` (full strength, PEFT convention) when omitted.
7136
+ */
7137
+ export interface AdapterOptions {
7138
+ /**
7139
+ * Caller-chosen identifier for this adapter mount. Must be unique
7140
+ * per `(model, adapter)` pair within a manager.
7141
+ */
7142
+ adapterId: string
7143
+ /**
7144
+ * Scaling factor applied to the adapter's delta-weights. Defaults
7145
+ * to `1.0` when not provided.
7146
+ */
7147
+ scale?: number
7148
+ }
7149
+
5950
7150
  /**
5951
7151
  * Aggregate one [`JsUsageEvent`] into a [`crate::types::JsTokenUsageClass`].
5952
7152
  * Returns a fresh class instance that adds the seven token counters from the
@@ -6010,22 +7210,41 @@ export interface AgentConfig {
6010
7210
  * `toolName`, `toolCallId`, and `arguments` are populated.
6011
7211
  * - `"toolResult"` -- a tool execution completed. `iteration`, `toolName`,
6012
7212
  * and `result` are populated.
7213
+ * - `"toolError"` -- a tool execution failed and the error was fed back to
7214
+ * the model as a `tool_result` so it can retry (the run is NOT aborted).
7215
+ * `iteration`, `toolName`, `result` (the `{"error": ...}` payload), and
7216
+ * `error` are populated.
6013
7217
  * - `"iterationComplete"` -- the model produced a response. `iteration`
6014
7218
  * and `hadToolCalls` are populated.
6015
7219
  */
6016
7220
  export interface AgentEvent {
6017
- /** Discriminant: `"toolCalled"`, `"toolResult"`, or `"iterationComplete"`. */
7221
+ /**
7222
+ * Discriminant: `"toolCalled"`, `"toolResult"`, `"toolError"`, or
7223
+ * `"iterationComplete"`.
7224
+ */
6018
7225
  kind: string
6019
7226
  /** Iteration index (0-based). */
6020
7227
  iteration: number
6021
- /** Tool name. Populated for `"toolCalled"` and `"toolResult"`. */
7228
+ /**
7229
+ * Tool name. Populated for `"toolCalled"`, `"toolResult"`, and
7230
+ * `"toolError"`.
7231
+ */
6022
7232
  toolName?: string
6023
7233
  /** Tool call ID. Populated for `"toolCalled"`. */
6024
7234
  toolCallId?: string
6025
7235
  /** Tool arguments. Populated for `"toolCalled"`. */
6026
7236
  arguments?: any
6027
- /** Tool result payload. Populated for `"toolResult"`. */
7237
+ /**
7238
+ * Tool result payload. Populated for `"toolResult"`, and for
7239
+ * `"toolError"` (where it holds the `{"error": ...}` payload sent to the
7240
+ * model).
7241
+ */
6028
7242
  result?: any
7243
+ /**
7244
+ * Error message. Populated only for `"toolError"` -- the failure that was
7245
+ * fed back to the model as a `tool_result` so it could retry.
7246
+ */
7247
+ error?: string
6029
7248
  /**
6030
7249
  * Whether this iteration contained tool calls. Populated for
6031
7250
  * `"iterationComplete"`.
@@ -6039,6 +7258,35 @@ export declare function audioInput(name: string, description: string): any
6039
7258
  /** Build a JSON Schema declaring a single required CAD-file-handle input. */
6040
7259
  export declare function cadInput(name: string, description: string): any
6041
7260
 
7261
+ /**
7262
+ * The capability a provider serves. Mirrors
7263
+ * [`blazen_llm::providers::CapabilityKind`].
7264
+ */
7265
+ export declare const enum CapabilityKind {
7266
+ /** Large language model — chat / completion / streaming. */
7267
+ Llm = 'Llm',
7268
+ /** Text-to-speech audio synthesis. */
7269
+ Tts = 'Tts',
7270
+ /** Speech-to-text transcription. */
7271
+ Stt = 'Stt',
7272
+ /** Text-to-music / text-to-sfx audio generation. */
7273
+ Music = 'Music',
7274
+ /** Voice conversion. */
7275
+ Vc = 'Vc',
7276
+ /** 3D mesh generation. */
7277
+ ThreeD = 'ThreeD',
7278
+ /** 2D image generation. */
7279
+ ImageGen = 'ImageGen',
7280
+ /** Vector embedding generation. */
7281
+ Embedding = 'Embedding',
7282
+ /** Neural audio codec. */
7283
+ Codec = 'Codec',
7284
+ /** Background removal on existing images. */
7285
+ BackgroundRemoval = 'BackgroundRemoval',
7286
+ /** Video generation. */
7287
+ Video = 'Video'
7288
+ }
7289
+
6042
7290
  /** Configuration passed to any capability provider constructor. */
6043
7291
  export interface CapabilityProviderConfig {
6044
7292
  /** Short identifier for this provider (e.g. `"elevenlabs"`, `"fal"`). */
@@ -6090,9 +7338,9 @@ export interface CitationOptions {
6090
7338
  * order as the input.
6091
7339
  *
6092
7340
  * ```typescript
6093
- * import { CompletionModel, ChatMessage, completeBatch } from 'blazen';
7341
+ * import { Model, ChatMessage, completeBatch } from 'blazen';
6094
7342
  *
6095
- * const model = CompletionModel.openai({ apiKey: "sk-..." });
7343
+ * const model = Model.openai({ apiKey: "sk-..." });
6096
7344
  *
6097
7345
  * const result = await completeBatch(
6098
7346
  * model,
@@ -6113,7 +7361,7 @@ export interface CitationOptions {
6113
7361
  * }
6114
7362
  * ```
6115
7363
  */
6116
- export declare function completeBatch(model: JsCompletionModel, messageSets: Array<Array<JsChatMessage>>, options?: JsBatchOptions | undefined | null): Promise<BatchResult>
7364
+ export declare function completeBatch(model: JsModel, messageSets: Array<Array<JsChatMessage>>, options?: JsBatchOptions | undefined | null): Promise<BatchResult>
6117
7365
 
6118
7366
  /**
6119
7367
  * Run a batch using a typed [`JsBatchConfig`] instance instead of an options
@@ -6124,85 +7372,13 @@ export declare function completeBatch(model: JsCompletionModel, messageSets: Arr
6124
7372
  * multiple calls.
6125
7373
  *
6126
7374
  * ```typescript
6127
- * import { CompletionModel, ChatMessage, BatchConfig, completeBatchConfig } from 'blazen';
7375
+ * import { Model, ChatMessage, BatchConfig, completeBatchConfig } from 'blazen';
6128
7376
  *
6129
7377
  * const cfg = new BatchConfig(4);
6130
7378
  * const result = await completeBatchConfig(model, messageSets, cfg);
6131
7379
  * ```
6132
7380
  */
6133
- export declare function completeBatchConfig(model: JsCompletionModel, messageSets: Array<Array<JsChatMessage>>, config: BatchConfig): Promise<BatchResult>
6134
-
6135
- /**
6136
- * Configuration for subclassed `CompletionModel` instances.
6137
- *
6138
- * When extending `CompletionModel` from JavaScript/TypeScript, pass this
6139
- * to `super()` so the base class can report `modelId` and other metadata
6140
- * without a concrete provider.
6141
- *
6142
- * ```javascript
6143
- * class MyLLM extends CompletionModel {
6144
- * constructor() {
6145
- * super({ modelId: "my-custom-model", contextLength: 8192 });
6146
- * }
6147
- * }
6148
- * ```
6149
- */
6150
- export interface CompletionModelConfig {
6151
- /** Model identifier (e.g. `"my-org/custom-llama"`). */
6152
- modelId?: string
6153
- /** Maximum context window in tokens. */
6154
- contextLength?: number
6155
- /** Base URL for HTTP-based providers. */
6156
- baseUrl?: string
6157
- /**
6158
- * Estimated memory footprint in bytes when loaded (host RAM if
6159
- * the provider targets the CPU, GPU VRAM otherwise).
6160
- */
6161
- memoryEstimateBytes?: number
6162
- /** Maximum output tokens the model supports. */
6163
- maxOutputTokens?: number
6164
- }
6165
-
6166
- /**
6167
- * Provider-agnostic request for a chat completion.
6168
- *
6169
- * Mirrors [`blazen_llm::CompletionRequest`]. Most callers reach for the
6170
- * [`crate::providers::JsCompletionModel`] factory + per-call options
6171
- * path; this typed shape exists for callers who need to build a request
6172
- * envelope explicitly (e.g. forwarding the same request through multiple
6173
- * middleware layers).
6174
- */
6175
- export interface CompletionRequest {
6176
- /**
6177
- * The conversation history as JSON-serialized `ChatMessage` values.
6178
- *
6179
- * Each entry must round-trip through `serde_json` into a Rust
6180
- * [`blazen_llm::ChatMessage`]. Use the `ChatMessage` class to build
6181
- * these in JS.
6182
- */
6183
- messages: Array<any>
6184
- /** Tools available for the model to invoke. */
6185
- tools?: Array<JsToolDefinition>
6186
- /** Sampling temperature. */
6187
- temperature?: number
6188
- /** Maximum number of tokens to generate. */
6189
- maxTokens?: number
6190
- /** Nucleus sampling parameter. */
6191
- topP?: number
6192
- /**
6193
- * JSON-encoded response format hint (raw, matching the `OpenAI` shape
6194
- * or the typed [`crate::types::JsResponseFormat`] when serialized).
6195
- */
6196
- responseFormat?: any
6197
- /** Override the provider's default model for this request. */
6198
- model?: string
6199
- /** Output modalities (e.g., `["text"]`, `["image", "text"]`). */
6200
- modalities?: Array<string>
6201
- /** Image generation configuration (model-specific). */
6202
- imageConfig?: any
6203
- /** Audio output configuration (voice, format, etc.). */
6204
- audioConfig?: any
6205
- }
7381
+ export declare function completeBatchConfig(model: JsModel, messageSets: Array<Array<JsChatMessage>>, config: BatchConfig): Promise<BatchResult>
6206
7382
 
6207
7383
  /**
6208
7384
  * Compute the cost in USD for an audio call (TTS / STT) given the model id
@@ -6414,6 +7590,28 @@ export interface EventEnvelope {
6414
7590
  */
6415
7591
  export declare function extractInlineArtifacts(content: string): Array<JsArtifact>
6416
7592
 
7593
+ /**
7594
+ * Configuration for the faster-whisper (`CTranslate2`) STT backend.
7595
+ *
7596
+ * Mirrors [`blazen_llm::FasterWhisperConfig`]. All fields are optional;
7597
+ * unset fields fall back to the upstream defaults
7598
+ * (`Systran/faster-whisper-tiny`, HF download on first use).
7599
+ */
7600
+ export interface FasterWhisperConfig {
7601
+ /** Hugging Face repo id for the `CTranslate2` Whisper bundle. */
7602
+ modelId?: string
7603
+ /**
7604
+ * Local path to a pre-downloaded bundle directory. When unset, the
7605
+ * bundle is fetched from Hugging Face on first transcription.
7606
+ */
7607
+ modelDir?: string
7608
+ /**
7609
+ * Optional Hugging Face Hub revision pin (branch, tag, or commit
7610
+ * SHA).
7611
+ */
7612
+ revision?: string
7613
+ }
7614
+
6417
7615
  /**
6418
7616
  * Fetch a single model's pricing from `DEFAULT_MODEL_PRICING_URL_BASE` using
6419
7617
  * the platform-default HTTP client and register it. Resolves to the registered
@@ -6678,6 +7876,41 @@ export declare const enum JoinStrategy {
6678
7876
  FirstCompletes = 'FirstCompletes'
6679
7877
  }
6680
7878
 
7879
+ /**
7880
+ * Handle returned by [`JsModelManager::load_adapter`] and accepted by
7881
+ * JS-side `unloadAdapter` lifecycle callbacks (see
7882
+ * [`JsModelManager::register_local_model`]).
7883
+ *
7884
+ * Mirrors [`blazen_llm::AdapterHandle`]; `mountStrategy` is one of
7885
+ * `"attached"`, `"rebuilt"`, or `"merged"`.
7886
+ */
7887
+ export interface JsAdapterHandle {
7888
+ /** Echoes [`AdapterOptions::adapter_id`]. */
7889
+ adapterId: string
7890
+ /** Bytes the adapter occupies on top of the base model. */
7891
+ memoryBytes: bigint
7892
+ /**
7893
+ * One of `"attached"`, `"rebuilt"`, or `"merged"` — what the
7894
+ * backend actually did to honor the mount request.
7895
+ */
7896
+ mountStrategy: string
7897
+ }
7898
+
7899
+ /**
7900
+ * Snapshot of one mounted adapter, returned by
7901
+ * [`JsModelManager::list_adapters`]. Mirrors [`blazen_llm::AdapterStatus`].
7902
+ */
7903
+ export interface JsAdapterStatus {
7904
+ /** Caller-supplied adapter identifier. */
7905
+ adapterId: string
7906
+ /** Scaling factor applied at mount time. */
7907
+ scale: number
7908
+ /** Absolute filesystem path to the adapter directory. */
7909
+ sourceDir: string
7910
+ /** Bytes the adapter occupies on top of the base model. */
7911
+ memoryBytes: bigint
7912
+ }
7913
+
6681
7914
  /** An entry to add to the memory store (used by `addMany`). */
6682
7915
  export interface JsAddEntry {
6683
7916
  /** Unique identifier. If empty, one will be generated. */
@@ -6829,6 +8062,28 @@ export interface JsAudioContent {
6829
8062
  durationSeconds?: number
6830
8063
  }
6831
8064
 
8065
+ /**
8066
+ * Construction-time options for [`JsAudioGenBackend`]. All fields
8067
+ * optional — defaults match `facebook/audiogen-medium`.
8068
+ */
8069
+ export interface JsAudioGenOptions {
8070
+ /**
8071
+ * Override the Hugging Face Hub repo id. Defaults to
8072
+ * `"facebook/audiogen-medium"`.
8073
+ */
8074
+ repoId?: string
8075
+ /** Optional pinned revision (commit SHA or tag) for the HF repo. */
8076
+ revision?: string
8077
+ /** Optional override for the Hugging Face cache directory. */
8078
+ cacheDir?: string
8079
+ /**
8080
+ * Hard safety cap on the requested duration (seconds). Defaults to
8081
+ * 30 s. AudioGen-medium's absolute upper bound is 30 s; requests past
8082
+ * either limit surface `MusicInvalidInputError`.
8083
+ */
8084
+ maxDurationSeconds?: number
8085
+ }
8086
+
6832
8087
  export interface JsAudioResult {
6833
8088
  audio: Array<JsGeneratedAudio>
6834
8089
  timing: JsRequestTiming
@@ -6857,6 +8112,26 @@ export interface JsAzureOptions {
6857
8112
  apiVersion?: string
6858
8113
  }
6859
8114
 
8115
+ /**
8116
+ * Local-inference backend identifier returned by
8117
+ * [`JsModelManager::load_from_hf`] and accepted as a forced override on
8118
+ * [`JsHfLoadOptions::backend_hint`].
8119
+ */
8120
+ export declare const enum JsBackendHint {
8121
+ /**
8122
+ * `mistral.rs` — broad architecture coverage, handles both safetensors
8123
+ * and GGUF, supports vision/multimodal models.
8124
+ */
8125
+ mistralrs = 'mistralrs',
8126
+ /**
8127
+ * `candle` — pure-Rust, supports safetensors and GGUF for the subset of
8128
+ * architectures candle ships.
8129
+ */
8130
+ candle = 'candle',
8131
+ /** `llama.cpp` — GGUF only, best CPU performance and lowest memory. */
8132
+ llamacpp = 'llamacpp'
8133
+ }
8134
+
6860
8135
  export interface JsBackgroundRemovalRequest {
6861
8136
  imageUrl: string
6862
8137
  model?: string
@@ -6973,35 +8248,6 @@ export interface JsClientConnectOptions {
6973
8248
  mtls?: JsMtlsOptions
6974
8249
  }
6975
8250
 
6976
- /** Options for a chat completion request. */
6977
- export interface JsCompletionOptions {
6978
- temperature?: number
6979
- maxTokens?: number
6980
- topP?: number
6981
- model?: string
6982
- tools?: Array<JsToolDefinition>
6983
- /** JSON Schema for structured output / response format. */
6984
- responseFormat?: any
6985
- }
6986
-
6987
- /** The result of a chat completion. */
6988
- export interface JsCompletionResponse {
6989
- content?: string
6990
- toolCalls: Array<JsToolCall>
6991
- usage?: JsTokenUsage
6992
- model: string
6993
- finishReason?: string
6994
- cost?: number
6995
- timing?: JsRequestTiming
6996
- images: Array<JsGeneratedImage>
6997
- audio: Array<JsGeneratedAudio>
6998
- videos: Array<JsGeneratedVideo>
6999
- reasoning?: JsReasoningTrace
7000
- citations: Array<JsCitation>
7001
- artifacts: Array<JsArtifact>
7002
- metadata: any
7003
- }
7004
-
7005
8251
  export interface JsComputeRequest {
7006
8252
  model: string
7007
8253
  input: any
@@ -7082,7 +8328,7 @@ export interface JsContentPart {
7082
8328
  image?: JsImageContent
7083
8329
  audio?: JsAudioContent
7084
8330
  video?: JsVideoContent
7085
- file?: FileContent
8331
+ file?: JsFileContent
7086
8332
  }
7087
8333
 
7088
8334
  /** Request to dereference a remote session ref. */
@@ -7151,6 +8397,43 @@ export declare const enum JsDiffusionScheduler {
7151
8397
  Ddim = 'ddim'
7152
8398
  }
7153
8399
 
8400
+ /**
8401
+ * Configuration for distributed (ring-AllReduce) training. Pass to
8402
+ * the training verbs to enable gradient averaging across
8403
+ * `worldSize` workers connected via gRPC. Each worker holds an
8404
+ * identical-shape gradient tensor; the ring algorithm sums and
8405
+ * averages per-parameter gradients before the optimizer step.
8406
+ *
8407
+ * `rank` is the 0-indexed rank of this worker; `worldSize` is the
8408
+ * total number of workers. `peers` is the ordered list of
8409
+ * `"host:port"` gRPC endpoints — one entry per rank. `masterAddr`
8410
+ * + `masterPort` identify the bootstrap node (typically the host
8411
+ * part of `peers[0]`).
8412
+ */
8413
+ export interface JsDistributedConfig {
8414
+ rank: number
8415
+ worldSize: number
8416
+ peers: Array<string>
8417
+ masterAddr: string
8418
+ masterPort: number
8419
+ }
8420
+
8421
+ /** Direct Preference Optimization (DPO) configuration. */
8422
+ export interface JsDpoConfig {
8423
+ /** Shared training hyperparameters. */
8424
+ core: JsTrainCoreConfig
8425
+ /** LoRA hyperparameters applied to the policy model. */
8426
+ lora?: JsLoraConfig
8427
+ /** KL-regularization strength. Default `0.1`. */
8428
+ beta?: number
8429
+ /** Conservative DPO label smoothing (cDPO). Default `0.0`. */
8430
+ labelSmoothing?: number
8431
+ /** Reference model repo. `null` reuses `core.baseModelRepo`. */
8432
+ referenceModelRepo?: string
8433
+ /** Optional revision for the reference model. */
8434
+ referenceModelRevision?: string
8435
+ }
8436
+
7154
8437
  /** The result of an embedding operation. */
7155
8438
  export interface JsEmbeddingResponse {
7156
8439
  /** The embedding vectors (one per input text). */
@@ -7277,6 +8560,30 @@ export interface JsFinishReason {
7277
8560
  value: string
7278
8561
  }
7279
8562
 
8563
+ /**
8564
+ * Full fine-tune configuration (every parameter trains; no LoRA adapter).
8565
+ *
8566
+ * `gradientCheckpointing = true` is accepted for forward compatibility
8567
+ * but the trainer currently rejects it at init time because candle
8568
+ * 0.10.2 has no activation-checkpointing primitive.
8569
+ */
8570
+ export interface JsFullFineTuneConfig {
8571
+ /** Shared training hyperparameters. */
8572
+ core: JsTrainCoreConfig
8573
+ /** Activation checkpointing (currently unsupported in the trainer). */
8574
+ gradientCheckpointing?: boolean
8575
+ }
8576
+
8577
+ /** Result of a completed full fine-tune run. */
8578
+ export interface JsFullFineTuneResult {
8579
+ /** Directory the trained model weights were written to. */
8580
+ outputDir: string
8581
+ /** Final training loss. */
8582
+ finalLoss: number
8583
+ /** Total optimizer steps executed. */
8584
+ stepsCompleted: number
8585
+ }
8586
+
7280
8587
  export interface JsGenerated3DModel {
7281
8588
  media: JsMediaOutput
7282
8589
  vertexCount?: number
@@ -7306,6 +8613,42 @@ export interface JsGeneratedVideo {
7306
8613
  fps?: number
7307
8614
  }
7308
8615
 
8616
+ /**
8617
+ * Caller-supplied options for [`JsModelManager::load_from_hf`].
8618
+ *
8619
+ * Mirrors [`blazen_manager::HfLoadOptions`]; every field is optional.
8620
+ */
8621
+ export interface JsHfLoadOptions {
8622
+ /**
8623
+ * Force a specific backend; skips engine inference but still probes
8624
+ * the repo for memory sizing.
8625
+ */
8626
+ backendHint?: JsBackendHint
8627
+ /**
8628
+ * Git revision (branch, tag, or commit sha). Defaults to the repo's
8629
+ * default branch.
8630
+ */
8631
+ revision?: string
8632
+ /**
8633
+ * Hugging Face access token. When omitted, falls back to the
8634
+ * `HF_TOKEN` environment variable, then to anonymous access.
8635
+ */
8636
+ hfToken?: string
8637
+ /** Override the on-disk cache directory used by `hf-hub`. */
8638
+ cacheDir?: string
8639
+ /**
8640
+ * Device specifier forwarded to the chosen provider (`"cpu"`,
8641
+ * `"cuda:0"`, `"metal"`, …).
8642
+ */
8643
+ device?: string
8644
+ /** Explicit GGUF filename for repos that ship multiple quantizations. */
8645
+ ggufFile?: string
8646
+ /** Override the auto-derived memory estimate, in bytes. */
8647
+ memoryEstimateBytes?: bigint
8648
+ /** Pool label (`"cpu"`, `"gpu"`, `"gpu:N"`). Defaults to `"cpu"`. */
8649
+ pool?: string
8650
+ }
8651
+
7309
8652
  /** An outgoing HTTP request, as seen by a JavaScript [`HttpClient`] subclass. */
7310
8653
  export interface JsHttpRequest {
7311
8654
  /** HTTP method (`"GET"`, `"POST"`, `"PUT"`, `"DELETE"`, `"PATCH"`). */
@@ -7408,6 +8751,39 @@ export declare const enum JsJobStatus {
7408
8751
  Cancelled = 'cancelled'
7409
8752
  }
7410
8753
 
8754
+ /** Optional knobs for [`JsJsonlDataset::from_path`]. */
8755
+ export interface JsJsonlDatasetOptions {
8756
+ /**
8757
+ * Jinja2 chat template (from `tokenizer_config.json`). Required when
8758
+ * rows use the `messages` shape.
8759
+ */
8760
+ chatTemplate?: string
8761
+ /** Maximum tokenized sequence length per example. Default `2048`. */
8762
+ maxSeqLen?: number
8763
+ /** Candle device string. Default `"cpu"`. */
8764
+ device?: string
8765
+ /** Token id to write into padded positions. Default `0`. */
8766
+ padTokenId?: number
8767
+ }
8768
+
8769
+ /** Kahneman-Tversky Optimization (KTO) configuration. */
8770
+ export interface JsKtoConfig {
8771
+ /** Shared training hyperparameters. */
8772
+ core: JsTrainCoreConfig
8773
+ /** LoRA hyperparameters applied to the policy model. */
8774
+ lora?: JsLoraConfig
8775
+ /** KL-regularization strength. Default `0.1`. */
8776
+ beta?: number
8777
+ /** Loss weight applied to desirable examples. Default `1.0`. */
8778
+ lambdaD?: number
8779
+ /** Loss weight applied to undesirable examples. Default `1.0`. */
8780
+ lambdaU?: number
8781
+ /** Reference model repo. `null` reuses `core.baseModelRepo`. */
8782
+ referenceModelRepo?: string
8783
+ /** Optional revision for the reference model. */
8784
+ referenceModelRevision?: string
8785
+ }
8786
+
7411
8787
  /**
7412
8788
  * Options for the local llama.cpp LLM backend.
7413
8789
  *
@@ -7436,6 +8812,21 @@ export interface JsLlamaCppOptions {
7436
8812
  cacheDir?: string
7437
8813
  }
7438
8814
 
8815
+ /** LoRA hyperparameters. */
8816
+ export interface JsLoraConfig {
8817
+ /** Low-rank dimension (PEFT "r"). Default `16`. */
8818
+ rank?: number
8819
+ /** Scaling numerator; effective per-layer scale is `alpha / rank`. Default `32`. */
8820
+ alpha?: number
8821
+ /** Dropout applied to LoRA-A input. Default `0.0`. */
8822
+ dropout?: number
8823
+ /**
8824
+ * Module-name suffixes to inject LoRA into. Default
8825
+ * `["q_proj","k_proj","v_proj","o_proj"]`.
8826
+ */
8827
+ targetModules?: Array<string>
8828
+ }
8829
+
7439
8830
  export interface JsMediaOutput {
7440
8831
  url?: string
7441
8832
  base64?: string
@@ -7483,7 +8874,7 @@ export interface JsMiddlewareConfig {
7483
8874
  * All other fields are optional.
7484
8875
  *
7485
8876
  * ```javascript
7486
- * const model = CompletionModel.mistralrs({
8877
+ * const model = Model.mistralrs({
7487
8878
  * modelId: "mistralai/Mistral-7B-Instruct-v0.3",
7488
8879
  * device: "cuda:0",
7489
8880
  * quantization: "q4_k_m",
@@ -7507,6 +8898,42 @@ export interface JsMistralRsOptions {
7507
8898
  cacheDir?: string
7508
8899
  }
7509
8900
 
8901
+ /** Mixed-precision mode passed to [`JsTrainConfig`]. */
8902
+ export declare const enum JsMixedPrecision {
8903
+ None = 'none',
8904
+ Bf16 = 'bf16'
8905
+ }
8906
+
8907
+ /** TLS options accepted by [`JsModelClient::connect_with_tls`]. */
8908
+ export interface JsModelClientTlsOptions {
8909
+ /**
8910
+ * Filesystem path to the PEM-encoded CA certificate used to verify
8911
+ * the server.
8912
+ */
8913
+ caCert: string
8914
+ /**
8915
+ * Optional path to the PEM-encoded client certificate (mTLS). Must
8916
+ * be paired with [`Self::client_key`].
8917
+ */
8918
+ clientCert?: string
8919
+ /**
8920
+ * Optional path to the PEM-encoded client private key (mTLS). Must
8921
+ * be paired with [`Self::client_cert`].
8922
+ */
8923
+ clientKey?: string
8924
+ }
8925
+
8926
+ /** Options for a chat completion request. */
8927
+ export interface JsModelOptions {
8928
+ temperature?: number
8929
+ maxTokens?: number
8930
+ topP?: number
8931
+ model?: string
8932
+ tools?: Array<JsToolDefinition>
8933
+ /** JSON Schema for structured output / response format. */
8934
+ responseFormat?: any
8935
+ }
8936
+
7510
8937
  /**
7511
8938
  * Pricing information for a model in USD per million tokens.
7512
8939
  *
@@ -7524,6 +8951,24 @@ export interface JsModelPricing {
7524
8951
  perSecond?: number
7525
8952
  }
7526
8953
 
8954
+ /** The result of a chat completion. */
8955
+ export interface JsModelResponse {
8956
+ content?: string
8957
+ toolCalls: Array<JsToolCall>
8958
+ usage?: JsTokenUsage
8959
+ model: string
8960
+ finishReason?: string
8961
+ cost?: number
8962
+ timing?: JsRequestTiming
8963
+ images: Array<JsGeneratedImage>
8964
+ audio: Array<JsGeneratedAudio>
8965
+ videos: Array<JsGeneratedVideo>
8966
+ reasoning?: JsReasoningTrace
8967
+ citations: Array<JsCitation>
8968
+ artifacts: Array<JsArtifact>
8969
+ metadata: any
8970
+ }
8971
+
7527
8972
  /** Status snapshot for a single registered model. */
7528
8973
  export interface JsModelStatus {
7529
8974
  /** Model identifier. */
@@ -7540,16 +8985,69 @@ export interface JsModelStatus {
7540
8985
  }
7541
8986
 
7542
8987
  /**
7543
- * PEM file paths for mTLS configuration. Used by
7544
- * [`crate::controlplane::client::JsControlPlaneClient::connect`].
8988
+ * PEM file paths for mTLS configuration. Used by
8989
+ * [`crate::controlplane::client::JsControlPlaneClient::connect`].
8990
+ */
8991
+ export interface JsMtlsOptions {
8992
+ /** Path to the client certificate PEM file. */
8993
+ cert: string
8994
+ /** Path to the client private-key PEM file. */
8995
+ key: string
8996
+ /** Path to the CA PEM file used to authenticate the server. */
8997
+ ca: string
8998
+ }
8999
+
9000
+ /**
9001
+ * One emission from a streaming music backend.
9002
+ *
9003
+ * Carries a `Float32Array` slice of 32-bit float PCM samples in `[-1, 1]`
9004
+ * at the backend's native output sample rate (32 kHz for MusicGen,
9005
+ * 16 kHz for AudioGen, 44.1 kHz stereo for Stable Audio), an `isFinal`
9006
+ * flag, and an optional measured per-chunk latency in seconds.
9007
+ */
9008
+ export interface JsMusicChunk {
9009
+ /**
9010
+ * 32-bit float PCM samples in `[-1, 1]` at the backend's native
9011
+ * output sample rate (interleaved for multi-channel outputs).
9012
+ */
9013
+ samples: Float32Array
9014
+ /**
9015
+ * `true` when this is the final chunk emitted for the generation
9016
+ * call; `false` for intermediate chunks.
9017
+ */
9018
+ isFinal: boolean
9019
+ /**
9020
+ * Optional measured latency-from-call-start for this chunk, in
9021
+ * seconds. `null` when the backend does not surface a timestamp.
9022
+ */
9023
+ latencySeconds?: number
9024
+ }
9025
+
9026
+ /**
9027
+ * Construction-time options for [`JsMusicgenBackend`]. All fields
9028
+ * optional — defaults match the small CPU-friendly variant.
7545
9029
  */
7546
- export interface JsMtlsOptions {
7547
- /** Path to the client certificate PEM file. */
7548
- cert: string
7549
- /** Path to the client private-key PEM file. */
7550
- key: string
7551
- /** Path to the CA PEM file used to authenticate the server. */
7552
- ca: string
9030
+ export interface JsMusicgenOptions {
9031
+ /** Which checkpoint to load. Defaults to `"small"`. */
9032
+ variant?: JsMusicgenVariant
9033
+ /** Optional override for the Hugging Face cache directory. */
9034
+ cacheDir?: string
9035
+ /**
9036
+ * Hard safety cap on the requested duration (seconds). Defaults to
9037
+ * 30 s. The absolute upper bound enforced by MusicGen itself is
9038
+ * 60 s — requests past either limit surface `MusicInvalidInputError`.
9039
+ */
9040
+ maxDurationSeconds?: number
9041
+ }
9042
+
9043
+ /** Available MusicGen checkpoints on Hugging Face Hub. */
9044
+ export declare const enum JsMusicgenVariant {
9045
+ /** `facebook/musicgen-small` -- ~300M params, 32 kHz mono. */
9046
+ Small = 'small',
9047
+ /** `facebook/musicgen-medium` -- ~1.5B params, 32 kHz mono. */
9048
+ Medium = 'medium',
9049
+ /** `facebook/musicgen-large` -- ~3.3B params, 32 kHz mono. */
9050
+ Large = 'large'
7553
9051
  }
7554
9052
 
7555
9053
  export interface JsMusicRequest {
@@ -7559,6 +9057,33 @@ export interface JsMusicRequest {
7559
9057
  parameters?: any
7560
9058
  }
7561
9059
 
9060
+ /**
9061
+ * Fully-rendered music + SFX result returned by the non-streaming
9062
+ * `generateMusic` / `generateSfx` entry points.
9063
+ *
9064
+ * `bytes` carries the encoded clip — typically a WAV container holding
9065
+ * PCM samples; `format` distinguishes the container so callers can route
9066
+ * directly to a player without re-sniffing the payload.
9067
+ */
9068
+ export interface JsMusicResult {
9069
+ /**
9070
+ * Encoded audio bytes (typically WAV for MusicGen / AudioGen /
9071
+ * Stable Audio).
9072
+ */
9073
+ bytes: Uint8Array
9074
+ /**
9075
+ * Container format: one of `"wav"`, `"mp3"`, `"flac"`, `"opus"`,
9076
+ * or `"pcm"`.
9077
+ */
9078
+ format: string
9079
+ /** Sample rate in hertz. */
9080
+ sampleRate: number
9081
+ /** Channel count (mono = 1, stereo = 2). */
9082
+ channels: number
9083
+ /** Duration of the clip in seconds, if known. */
9084
+ durationSeconds?: number
9085
+ }
9086
+
7562
9087
  /**
7563
9088
  * Configuration for an OpenAI-compatible provider.
7564
9089
  *
@@ -7594,6 +9119,32 @@ export interface JsOpenAiCompatConfig {
7594
9119
  supportsModelListing?: boolean
7595
9120
  }
7596
9121
 
9122
+ /** AdamW optimizer hyperparameters. */
9123
+ export interface JsOptimConfig {
9124
+ /** Peak learning rate (applied at end of warmup). Default `2e-4`. */
9125
+ learningRate?: number
9126
+ /** AdamW beta1. Default `0.9`. */
9127
+ beta1?: number
9128
+ /** AdamW beta2. Default `0.999`. */
9129
+ beta2?: number
9130
+ /** AdamW numerical-stability epsilon. Default `1e-8`. */
9131
+ epsilon?: number
9132
+ /** Decoupled weight decay. Default `0.0`. */
9133
+ weightDecay?: number
9134
+ /** Global gradient L2-norm clip; `null` disables clipping. Default `1.0`. */
9135
+ gradientClip?: number
9136
+ }
9137
+
9138
+ /** Odds Ratio Preference Optimization (ORPO) configuration. */
9139
+ export interface JsOrpoConfig {
9140
+ /** Shared training hyperparameters. */
9141
+ core: JsTrainCoreConfig
9142
+ /** LoRA hyperparameters. */
9143
+ lora?: JsLoraConfig
9144
+ /** Weight of the odds-ratio term relative to the SFT term. Default `0.1`. */
9145
+ lambda?: number
9146
+ }
9147
+
7597
9148
  /**
7598
9149
  * Metadata describing a remote session ref handed back by an
7599
9150
  * `invokeSubWorkflow` call.
@@ -7610,31 +9161,6 @@ export interface JsPeerRemoteRefDescriptor {
7610
9161
  createdAtEpochMs: number
7611
9162
  }
7612
9163
 
7613
- /**
7614
- * Options for the local Piper TTS backend.
7615
- *
7616
- * All fields are optional. `modelId` selects the voice (e.g.
7617
- * `"en_US-amy-medium"`); when `null`, callers must set it before
7618
- * synthesis can run.
7619
- *
7620
- * ```javascript
7621
- * const provider = PiperProvider.create({
7622
- * modelId: "en_US-amy-medium",
7623
- * sampleRate: 22050,
7624
- * });
7625
- * ```
7626
- */
7627
- export interface JsPiperOptions {
7628
- /** Piper voice model identifier. */
7629
- modelId?: string
7630
- /** Speaker ID for multi-speaker models. */
7631
- speakerId?: number
7632
- /** Output audio sample rate in Hz. */
7633
- sampleRate?: number
7634
- /** Path to cache downloaded voice models. */
7635
- cacheDir?: string
7636
- }
7637
-
7638
9164
  /** Reported per-pool budget pair returned by [`JsModelManager::pools`]. */
7639
9165
  export interface JsPoolBudget {
7640
9166
  /** Pool label (`"cpu"` or `"gpu:N"`). */
@@ -7826,6 +9352,57 @@ export declare const enum JsRunStatus {
7826
9352
  Cancelled = 'Cancelled'
7827
9353
  }
7828
9354
 
9355
+ /**
9356
+ * Construction-time options for [`JsRvcBackend`]. All fields optional
9357
+ * — defaults match the upstream RVC reference (top-k = 8, blend = 0.75,
9358
+ * V2 content encoder).
9359
+ */
9360
+ export interface JsRvcOptions {
9361
+ /**
9362
+ * kNN neighbour count for the retrieval blend (`top_k`). Defaults
9363
+ * to 8. Clamped to `>= 1` at query time.
9364
+ */
9365
+ topK?: number
9366
+ /**
9367
+ * Retrieval blend factor (`index_rate` in the upstream
9368
+ * reference). Defaults to 0.75. Clamped into `[0.0, 1.0]`.
9369
+ */
9370
+ retrievalBlend?: number
9371
+ /**
9372
+ * Which ContentVec family to use for the shared HuBERT encoder.
9373
+ * One of `"v1"` or `"v2"` (case-insensitive). Defaults to `"v2"`
9374
+ * — the family contemporary RVC checkpoints target.
9375
+ */
9376
+ rvcVersion?: string
9377
+ }
9378
+
9379
+ /** Learning-rate scheduler configuration. */
9380
+ export interface JsSchedulerConfig {
9381
+ /** Schedule shape. Default `Cosine`. */
9382
+ kind?: JsSchedulerKind
9383
+ /** Linear-warmup duration in steps applied before the main shape. Default `0`. */
9384
+ warmupSteps?: number
9385
+ }
9386
+
9387
+ /** Learning-rate schedule shape passed to [`JsSchedulerConfig`]. */
9388
+ export declare const enum JsSchedulerKind {
9389
+ Constant = 'constant',
9390
+ Linear = 'linear',
9391
+ Cosine = 'cosine'
9392
+ }
9393
+
9394
+ /** Simple Preference Optimization (`SimPO`) configuration. */
9395
+ export interface JsSimpoConfig {
9396
+ /** Shared training hyperparameters. */
9397
+ core: JsTrainCoreConfig
9398
+ /** LoRA hyperparameters. */
9399
+ lora?: JsLoraConfig
9400
+ /** Logit scaling for the length-normalized preference margin. Default `2.0`. */
9401
+ beta?: number
9402
+ /** Target reward margin between chosen and rejected. Default `1.0`. */
9403
+ gamma?: number
9404
+ }
9405
+
7829
9406
  export interface JsSpeechRequest {
7830
9407
  text: string
7831
9408
  voice?: string
@@ -7836,6 +9413,46 @@ export interface JsSpeechRequest {
7836
9413
  parameters?: any
7837
9414
  }
7838
9415
 
9416
+ /**
9417
+ * Construction-time options for [`JsStableAudioBackend`]. All fields
9418
+ * optional — defaults to the Small variant on CPU with F32 precision.
9419
+ */
9420
+ export interface JsStableAudioOptions {
9421
+ /** Which variant to load. Defaults to `"small"`. */
9422
+ variant?: JsStableAudioVariant
9423
+ /**
9424
+ * Override the Hugging Face Hub repo id. Defaults to the variant's
9425
+ * canonical repo (`stabilityai/stable-audio-open-{small,1.0}`).
9426
+ */
9427
+ hfRepo?: string
9428
+ /**
9429
+ * Path to a local `tokenizer.json` for the T5 conditioner. Required
9430
+ * when the `audio-music-stable-audio` feature is enabled; ignored in
9431
+ * stub mode.
9432
+ */
9433
+ tokenizerPath?: string
9434
+ /**
9435
+ * Optional override for a pre-downloaded safetensors weights file.
9436
+ * When `None`, weights are pulled from the configured HF repo on
9437
+ * first generation.
9438
+ */
9439
+ localWeightsPath?: string
9440
+ }
9441
+
9442
+ /** Hyperparameter pack describing which Stable Audio Open checkpoint to load. */
9443
+ export declare const enum JsStableAudioVariant {
9444
+ /**
9445
+ * `stabilityai/stable-audio-open-small` -- 341 M params, 8-step
9446
+ * distilled sampler, 11 s output cap.
9447
+ */
9448
+ Small = 'small',
9449
+ /**
9450
+ * `stabilityai/stable-audio-open-1.0` -- 1.21 B params, 100-step
9451
+ * DPM-Solver++, 47 s output cap.
9452
+ */
9453
+ Open10 = 'open1_0'
9454
+ }
9455
+
7839
9456
  /**
7840
9457
  * A persisted memory entry as returned from the underlying backend.
7841
9458
  *
@@ -7985,6 +9602,26 @@ export interface JsSubWorkflowResponse {
7985
9602
  error?: string
7986
9603
  }
7987
9604
 
9605
+ /**
9606
+ * A registered target voice descriptor.
9607
+ *
9608
+ * Returned by [`crate::vc::JsRvcBackend::list_target_voices`] /
9609
+ * [`crate::vc::JsVcModel::list_target_voices`] and accepted by the
9610
+ * matching `convertVoice` / `streamConvertPcm` calls (the `id` field
9611
+ * is the lookup key).
9612
+ */
9613
+ export interface JsTargetVoice {
9614
+ /**
9615
+ * Backend-scoped identifier for this voice. Passed to
9616
+ * `convertVoice` / `streamConvertPcm`.
9617
+ */
9618
+ id: string
9619
+ /** Optional human-readable display name for UIs. */
9620
+ label?: string
9621
+ /** Native sample rate the backend renders this voice at, in Hz. */
9622
+ sampleRateHz: number
9623
+ }
9624
+
7988
9625
  export interface JsThreeDRequest {
7989
9626
  prompt?: string
7990
9627
  imageUrl?: string
@@ -8082,6 +9719,101 @@ export interface JsTractResponse {
8082
9719
  model: string
8083
9720
  }
8084
9721
 
9722
+ /** Full configuration for one training run. */
9723
+ export interface JsTrainConfig {
9724
+ /** HuggingFace repo id of the base model. */
9725
+ baseModelRepo: string
9726
+ /** Filesystem directory where the trained adapter and checkpoints land. */
9727
+ outputDir: string
9728
+ lora?: JsLoraConfig
9729
+ optim?: JsOptimConfig
9730
+ scheduler?: JsSchedulerConfig
9731
+ /** Total optimizer steps to run. Default `1000`. */
9732
+ maxSteps?: number
9733
+ /** Micro-batch size (per forward pass). Default `4`. */
9734
+ batchSize?: number
9735
+ /** Micro-batches accumulated before each optimizer step. Default `1`. */
9736
+ gradientAccumulationSteps?: number
9737
+ /** Maximum tokenized sequence length per example. Default `2048`. */
9738
+ maxSeqLen?: number
9739
+ /** Run evaluation every N steps when set. */
9740
+ evalSteps?: number
9741
+ /** Write a checkpoint every N steps when set. */
9742
+ saveSteps?: number
9743
+ /** RNG seed (dataset shuffling + LoRA `A` init). Default `42`. */
9744
+ seed?: bigint
9745
+ /** Mixed-precision mode for forward / backward. Default `Bf16`. */
9746
+ mixedPrecision?: JsMixedPrecision
9747
+ /** Device string forwarded to the trainer (`"cpu"`, `"cuda:0"`, `"metal"`). */
9748
+ device?: string
9749
+ }
9750
+
9751
+ /**
9752
+ * Shared training hyperparameters for DPO/ORPO/SimPO/KTO and full
9753
+ * fine-tune. Mirrors [`blazen_train::TrainCoreConfig`].
9754
+ */
9755
+ export interface JsTrainCoreConfig {
9756
+ /** HuggingFace repo id of the base model. */
9757
+ baseModelRepo: string
9758
+ /** Optional revision (branch / tag / commit) for the base model. */
9759
+ baseModelRevision?: string
9760
+ /** Filesystem directory for trained weights and checkpoints. */
9761
+ outputDir: string
9762
+ /** Total optimizer steps to run. Default `1000`. */
9763
+ maxSteps?: number
9764
+ /** Micro-batch size (per forward pass). Default `1`. */
9765
+ batchSize?: number
9766
+ /** Micro-batches accumulated before each optimizer step. Default `8`. */
9767
+ gradientAccumulationSteps?: number
9768
+ /** Maximum tokenized sequence length per example. Default `1024`. */
9769
+ maxSeqLen?: number
9770
+ /** Run evaluation every N steps when set. */
9771
+ evalSteps?: number
9772
+ /** Write a checkpoint every N steps when set. */
9773
+ saveSteps?: number
9774
+ /** RNG seed. Default `42`. */
9775
+ seed?: bigint
9776
+ /** Mixed-precision mode for forward / backward. Default `Bf16`. */
9777
+ mixedPrecision?: JsMixedPrecision
9778
+ /** Device string forwarded to the trainer (`"cpu"`, `"cuda:0"`, `"metal"`). */
9779
+ device?: string
9780
+ /** Optimizer hyperparameters (AdamW). */
9781
+ optim?: JsOptimConfig
9782
+ /** Learning-rate schedule. */
9783
+ scheduler?: JsSchedulerConfig
9784
+ }
9785
+
9786
+ /** Result of a completed training run. */
9787
+ export interface JsTrainedAdapter {
9788
+ /** Directory the PEFT-format adapter was written to. */
9789
+ adapterDir: string
9790
+ /** Final training loss. */
9791
+ finalLoss: number
9792
+ /** Total optimizer steps executed. */
9793
+ totalSteps: bigint
9794
+ }
9795
+
9796
+ /**
9797
+ * One observable event emitted during a training run.
9798
+ *
9799
+ * Switch on `kind` (`"started"` / `"stepCompleted"` / `"evaluating"` /
9800
+ * `"evalCompleted"` / `"checkpointSaved"` / `"finished"`); other fields
9801
+ * carry the per-variant payload and are absent for variants that do not
9802
+ * populate them.
9803
+ */
9804
+ export interface JsTrainingEvent {
9805
+ kind: string
9806
+ step?: bigint
9807
+ loss?: number
9808
+ learningRate?: number
9809
+ elapsedMs?: number
9810
+ totalSteps?: bigint
9811
+ evalLoss?: number
9812
+ checkpointPath?: string
9813
+ adapterDir?: string
9814
+ finalLoss?: number
9815
+ }
9816
+
8085
9817
  export interface JsTranscriptionRequest {
8086
9818
  audioUrl: string
8087
9819
  language?: string
@@ -8108,6 +9840,45 @@ export interface JsTranscriptionSegment {
8108
9840
  speaker?: string
8109
9841
  }
8110
9842
 
9843
+ /**
9844
+ * Which underlying TTS model to load. Maps onto
9845
+ * [`blazen_llm::TtsModel`].
9846
+ */
9847
+ export declare const enum JsTtsModel {
9848
+ /** Kokoro-82M (default; small, CPU-friendly). */
9849
+ Kokoro82m = 'Kokoro82m',
9850
+ /** VibeVoice-1.5B (Microsoft). */
9851
+ VibeVoice = 'VibeVoice',
9852
+ /** Qwen3-TTS-12Hz-1.7B (`CustomVoice` variant). */
9853
+ Qwen3Tts = 'Qwen3Tts'
9854
+ }
9855
+
9856
+ /**
9857
+ * Options for the local TTS backend.
9858
+ *
9859
+ * All fields are optional. `model` selects the backend (defaults to
9860
+ * Kokoro-82M); `voice` selects the speaker preset.
9861
+ *
9862
+ * ```javascript
9863
+ * const provider = TtsProvider.create({
9864
+ * model: "kokoro82m",
9865
+ * voice: "af_bella",
9866
+ * });
9867
+ * ```
9868
+ */
9869
+ export interface JsTtsOptions {
9870
+ /** TTS model to load. Defaults to `"kokoro82m"`. */
9871
+ model?: JsTtsModel
9872
+ /** Voice / speaker preset name. */
9873
+ voice?: string
9874
+ /** Language ISO 639-1 code (e.g. `"en"`, `"ja"`). */
9875
+ language?: string
9876
+ /** Output audio sample rate in Hz. */
9877
+ sampleRate?: number
9878
+ /** Path to cache downloaded model weights. */
9879
+ cacheDir?: string
9880
+ }
9881
+
8111
9882
  export interface JsUpscaleRequest {
8112
9883
  imageUrl: string
8113
9884
  scale: number
@@ -8115,6 +9886,56 @@ export interface JsUpscaleRequest {
8115
9886
  parameters?: any
8116
9887
  }
8117
9888
 
9889
+ /**
9890
+ * One emission from a streaming voice-conversion backend.
9891
+ *
9892
+ * Carries a `Float32Array` slice of 32-bit float PCM samples in `[-1, 1]`
9893
+ * at the target voice's native sample rate (typically 32 kHz or 40 kHz
9894
+ * for RVC-family backends), an `isFinal` flag, and an optional measured
9895
+ * per-chunk latency in seconds.
9896
+ */
9897
+ export interface JsVcChunk {
9898
+ /**
9899
+ * 32-bit float PCM samples in `[-1, 1]` at the target voice's
9900
+ * native sample rate (mono).
9901
+ */
9902
+ samples: Float32Array
9903
+ /**
9904
+ * `true` when this is the final chunk emitted for the conversion
9905
+ * call; `false` for intermediate chunks.
9906
+ */
9907
+ isFinal: boolean
9908
+ /**
9909
+ * Optional measured latency-from-call-start for this chunk, in
9910
+ * seconds. `null` when the backend does not surface a timestamp
9911
+ * (RVC backends today do not).
9912
+ */
9913
+ latencySeconds?: number
9914
+ }
9915
+
9916
+ /**
9917
+ * Fully-rendered voice-conversion result returned by the non-streaming
9918
+ * `convertVoice` entry point.
9919
+ *
9920
+ * `bytes` carries a self-describing WAV (RIFF/`fmt `/`data`) container
9921
+ * holding 16-bit signed little-endian PCM samples at the target voice's
9922
+ * native sample rate. `sampleRate` and `durationSeconds` are parsed out
9923
+ * of the WAV header so callers don't need to re-sniff the payload to
9924
+ * route it to a player.
9925
+ */
9926
+ export interface JsVcResult {
9927
+ /** Encoded WAV bytes (16-bit signed little-endian PCM). */
9928
+ bytes: Uint8Array
9929
+ /** Sample rate in hertz, parsed from the WAV `fmt ` chunk. */
9930
+ sampleRate: number
9931
+ /**
9932
+ * Duration of the clip in seconds, derived from the WAV `data`
9933
+ * chunk size + frame stride. `null` if the WAV header could not be
9934
+ * parsed (in which case `sampleRate` falls back to `0`).
9935
+ */
9936
+ durationSeconds?: number
9937
+ }
9938
+
8118
9939
  /** Video content for multimodal messages. */
8119
9940
  export interface JsVideoContent {
8120
9941
  source: JsImageSource
@@ -8367,6 +10188,20 @@ export declare function lookupPricing(modelId: string): JsModelPricing | null
8367
10188
  /** `true` when a step builder is registered under `stepId`. */
8368
10189
  export declare function lookupStepBuilder(stepId: string): boolean
8369
10190
 
10191
+ /**
10192
+ * The decision returned by a loop stage's `until` predicate after each round.
10193
+ *
10194
+ * - `Continue`: run the inner stage again (subject to the `maxIterations`
10195
+ * cap).
10196
+ * - `Done`: stop looping cleanly; the loop stage succeeds.
10197
+ * - `Abort`: stop looping with an error.
10198
+ */
10199
+ export declare const enum LoopDecision {
10200
+ Continue = 'Continue',
10201
+ Done = 'Done',
10202
+ Abort = 'Abort'
10203
+ }
10204
+
8370
10205
  /**
8371
10206
  * Tagged-union mirror of [`blazen_llm::types::MessageContent`].
8372
10207
  *
@@ -8440,6 +10275,37 @@ export interface ModelCapabilities {
8440
10275
  threeDGeneration: boolean
8441
10276
  }
8442
10277
 
10278
+ /**
10279
+ * Configuration for subclassed `Model` instances.
10280
+ *
10281
+ * When extending `Model` from JavaScript/TypeScript, pass this
10282
+ * to `super()` so the base class can report `modelId` and other metadata
10283
+ * without a concrete provider.
10284
+ *
10285
+ * ```javascript
10286
+ * class MyLLM extends Model {
10287
+ * constructor() {
10288
+ * super({ modelId: "my-custom-model", contextLength: 8192 });
10289
+ * }
10290
+ * }
10291
+ * ```
10292
+ */
10293
+ export interface ModelConfig {
10294
+ /** Model identifier (e.g. `"my-org/custom-llama"`). */
10295
+ modelId?: string
10296
+ /** Maximum context window in tokens. */
10297
+ contextLength?: number
10298
+ /** Base URL for HTTP-based providers. */
10299
+ baseUrl?: string
10300
+ /**
10301
+ * Estimated memory footprint in bytes when loaded (host RAM if
10302
+ * the provider targets the CPU, GPU VRAM otherwise).
10303
+ */
10304
+ memoryEstimateBytes?: number
10305
+ /** Maximum output tokens the model supports. */
10306
+ maxOutputTokens?: number
10307
+ }
10308
+
8443
10309
  /**
8444
10310
  * Information about a model offered by a provider.
8445
10311
  *
@@ -8481,6 +10347,47 @@ export interface ModelManagerConfig {
8481
10347
  poolBudgets?: Record<string, bigint>
8482
10348
  }
8483
10349
 
10350
+ /**
10351
+ * Provider-agnostic request for a chat completion.
10352
+ *
10353
+ * Mirrors [`blazen_llm::ModelRequest`]. Most callers reach for the
10354
+ * [`crate::providers::JsModel`] factory + per-call options
10355
+ * path; this typed shape exists for callers who need to build a request
10356
+ * envelope explicitly (e.g. forwarding the same request through multiple
10357
+ * middleware layers).
10358
+ */
10359
+ export interface ModelRequest {
10360
+ /**
10361
+ * The conversation history as JSON-serialized `ChatMessage` values.
10362
+ *
10363
+ * Each entry must round-trip through `serde_json` into a Rust
10364
+ * [`blazen_llm::ChatMessage`]. Use the `ChatMessage` class to build
10365
+ * these in JS.
10366
+ */
10367
+ messages: Array<any>
10368
+ /** Tools available for the model to invoke. */
10369
+ tools?: Array<JsToolDefinition>
10370
+ /** Sampling temperature. */
10371
+ temperature?: number
10372
+ /** Maximum number of tokens to generate. */
10373
+ maxTokens?: number
10374
+ /** Nucleus sampling parameter. */
10375
+ topP?: number
10376
+ /**
10377
+ * JSON-encoded response format hint (raw, matching the `OpenAI` shape
10378
+ * or the typed [`crate::types::JsResponseFormat`] when serialized).
10379
+ */
10380
+ responseFormat?: any
10381
+ /** Override the provider's default model for this request. */
10382
+ model?: string
10383
+ /** Output modalities (e.g., `["text"]`, `["image", "text"]`). */
10384
+ modalities?: Array<string>
10385
+ /** Image generation configuration (model-specific). */
10386
+ imageConfig?: any
10387
+ /** Audio output configuration (voice, format, etc.). */
10388
+ audioConfig?: any
10389
+ }
10390
+
8484
10391
  /** Build an empty [`JsRetryStack`] with every scope set to `null`. */
8485
10392
  export declare function newRetryStack(): RetryStack
8486
10393
 
@@ -8496,30 +10403,48 @@ export declare function newUsageEvent(provider: string, model: string, runId: st
8496
10403
  *
8497
10404
  * ```javascript
8498
10405
  * initOtlp({
8499
- * endpoint: "http://localhost:4317",
10406
+ * endpoint: "https://otel.example.com/v1/traces",
8500
10407
  * serviceName: "my-service",
8501
- * serviceVersion: "1.0.0",
8502
- * headers: { "x-api-key": "secret" },
10408
+ * protocol: "HttpProto",
10409
+ * headers: { Authorization: "Bearer xxx" },
8503
10410
  * });
8504
10411
  * ```
8505
10412
  */
8506
10413
  export interface OtlpConfig {
8507
- /** The OTLP endpoint URL (e.g. `"http://localhost:4317"`). */
10414
+ /**
10415
+ * The OTLP endpoint URL.
10416
+ *
10417
+ * For gRPC: `"http://localhost:4317"`.
10418
+ * For HTTP: `"https://collector/v1/traces"`.
10419
+ */
8508
10420
  endpoint: string
8509
10421
  /** The service name reported to the backend. */
8510
10422
  serviceName: string
10423
+ /** Wire-level transport. Defaults to `HttpProto`. */
10424
+ protocol?: OtlpProtocol
8511
10425
  /**
8512
- * Service version reported to the backend (recorded for forward
8513
- * compatibility; not yet forwarded by the underlying exporter).
10426
+ * Service version (recorded for forward compatibility; not yet attached
10427
+ * as a resource attribute by the underlying exporter).
8514
10428
  */
8515
10429
  serviceVersion?: string
8516
10430
  /**
8517
- * Additional headers to attach to OTLP requests (recorded for forward
8518
- * compatibility; not yet forwarded by the underlying exporter).
10431
+ * Auth / routing headers attached to OTLP requests. Honored on HTTP;
10432
+ * dropped with a warning on gRPC.
8519
10433
  */
8520
10434
  headers?: Record<string, string>
8521
10435
  }
8522
10436
 
10437
+ /** OTLP wire-level transport. */
10438
+ export declare const enum OtlpProtocol {
10439
+ /** gRPC over tonic. Requires the `otlp` Cargo feature. */
10440
+ Grpc = 'Grpc',
10441
+ /**
10442
+ * HTTP with binary protobuf payload. Requires the `otlp-http` Cargo
10443
+ * feature.
10444
+ */
10445
+ HttpProto = 'HttpProto'
10446
+ }
10447
+
8523
10448
  /**
8524
10449
  * Why a workflow was paused.
8525
10450
  *
@@ -8706,6 +10631,27 @@ export declare const enum ProviderId {
8706
10631
  Fal = 'fal'
8707
10632
  }
8708
10633
 
10634
+ /**
10635
+ * Static metadata describing a provider instance. Mirrors
10636
+ * [`blazen_llm::providers::ProviderMetadata`].
10637
+ */
10638
+ export interface ProviderMetadata {
10639
+ /**
10640
+ * Canonical provider identifier — stable across binding surfaces
10641
+ * (e.g. `"openai"`, `"fal"`, `"spark-tts"`).
10642
+ */
10643
+ providerId: string
10644
+ /** What this provider does. */
10645
+ capability: CapabilityKind
10646
+ /**
10647
+ * Optional human-readable name shown in UIs / logs. Defaults to
10648
+ * `providerId` when unset.
10649
+ */
10650
+ displayName?: string
10651
+ /** Optional version pin — typically the model id / weights revision. */
10652
+ version?: string
10653
+ }
10654
+
8709
10655
  /**
8710
10656
  * Optional hints attached to a `put` call.
8711
10657
  *
@@ -8828,6 +10774,17 @@ export declare function registerEventDeserializer(name: string, deserializer: De
8828
10774
  */
8829
10775
  export declare function registerFromModelInfo(info: any): void
8830
10776
 
10777
+ /**
10778
+ * Register the process-wide native-event serializer hook.
10779
+ *
10780
+ * Installs the Node serializer (see [`node_native_to_json`]) used by
10781
+ * [`DynamicEvent.toJson`](blazen_events::DynamicEvent) to lazily materialize
10782
+ * native-backed events. Idempotent — the first registration wins. This is
10783
+ * invoked automatically at module load, but is exposed for API parity with
10784
+ * the other language bindings.
10785
+ */
10786
+ export declare function registerNativeSerializer(): void
10787
+
8831
10788
  /**
8832
10789
  * Register (or overwrite) pricing for a model.
8833
10790
  *
@@ -8958,9 +10915,9 @@ export interface RetryStack {
8958
10915
  * that resolves to one).
8959
10916
  *
8960
10917
  * ```typescript
8961
- * import { CompletionModel, ChatMessage, runAgent } from 'blazen';
10918
+ * import { Model, ChatMessage, runAgent } from 'blazen';
8962
10919
  *
8963
- * const model = CompletionModel.openai({ apiKey: "sk-..." });
10920
+ * const model = Model.openai({ apiKey: "sk-..." });
8964
10921
  *
8965
10922
  * const result = await runAgent(
8966
10923
  * model,
@@ -8974,7 +10931,7 @@ export interface RetryStack {
8974
10931
  * );
8975
10932
  * ```
8976
10933
  */
8977
- export declare function runAgent(model: JsCompletionModel, messages: Array<JsChatMessage>, tools: Array<JsToolDef>, toolHandler: ToolHandlerTsfn, options?: JsAgentRunOptions | undefined | null): Promise<AgentResult>
10934
+ export declare function runAgent(model: JsModel, messages: Array<JsChatMessage>, tools: Array<JsToolDef>, toolHandler: ToolHandlerTsfn, options?: JsAgentRunOptions | undefined | null): Promise<AgentResult>
8978
10935
 
8979
10936
  /**
8980
10937
  * Run an agent loop with an event-observer callback.
@@ -8985,9 +10942,9 @@ export declare function runAgent(model: JsCompletionModel, messages: Array<JsCha
8985
10942
  * abort the loop.
8986
10943
  *
8987
10944
  * ```typescript
8988
- * import { CompletionModel, ChatMessage, runAgentWithCallback } from 'blazen';
10945
+ * import { Model, ChatMessage, runAgentWithCallback } from 'blazen';
8989
10946
  *
8990
- * const model = CompletionModel.openai({ apiKey: "sk-..." });
10947
+ * const model = Model.openai({ apiKey: "sk-..." });
8991
10948
  *
8992
10949
  * const result = await runAgentWithCallback(
8993
10950
  * model,
@@ -9001,7 +10958,7 @@ export declare function runAgent(model: JsCompletionModel, messages: Array<JsCha
9001
10958
  * );
9002
10959
  * ```
9003
10960
  */
9004
- export declare function runAgentWithCallback(model: JsCompletionModel, messages: Array<JsChatMessage>, tools: Array<JsToolDef>, toolHandler: ToolHandlerTsfn, onEvent: AgentEventCallbackTsfn, options?: JsAgentRunOptions | undefined | null): Promise<AgentResult>
10961
+ export declare function runAgentWithCallback(model: JsModel, messages: Array<JsChatMessage>, tools: Array<JsToolDef>, toolHandler: ToolHandlerTsfn, onEvent: AgentEventCallbackTsfn, options?: JsAgentRunOptions | undefined | null): Promise<AgentResult>
9005
10962
 
9006
10963
  /**
9007
10964
  * Payload returned by [`JsContext::get_session_ref_serializable`].
@@ -9085,6 +11042,25 @@ export declare function simhashFromHex(hex: string): string
9085
11042
  */
9086
11043
  export declare function simhashToHex(value: string): string
9087
11044
 
11045
+ /**
11046
+ * Configuration for the Spark-TTS backend.
11047
+ *
11048
+ * Mirrors [`blazen_llm::SparkTtsConfig`]. All fields are optional; unset
11049
+ * fields fall back to the upstream defaults
11050
+ * (`SparkAudio/Spark-TTS-0.5B`, HF download on first use).
11051
+ */
11052
+ export interface SparkTtsConfig {
11053
+ /** Hugging Face repo id for the Spark-TTS bundle. */
11054
+ modelId?: string
11055
+ /**
11056
+ * Pre-resolved bundle directory. When unset, the bundle is
11057
+ * downloaded and cached on first synthesis.
11058
+ */
11059
+ modelDir?: string
11060
+ /** Optional revision (branch / tag / commit SHA) to pin against. */
11061
+ revision?: string
11062
+ }
11063
+
9088
11064
  /** Options for constructing a [`JsStartEventClass`] from JavaScript. */
9089
11065
  export interface StartEventOptions {
9090
11066
  /** Arbitrary payload passed into the workflow at start. */
@@ -9264,6 +11240,27 @@ export interface ToolOutput {
9264
11240
  llmOverride?: LlmPayload
9265
11241
  }
9266
11242
 
11243
+ /**
11244
+ * Runtime configuration for the tracing wrapper installed by
11245
+ * [`JsModel::with_tracing`](JsModel::with_tracing).
11246
+ *
11247
+ * Defaults are privacy-safe: token counts, model id, provider, and finish
11248
+ * reason are always recorded; the raw prompt + completion message text is
11249
+ * captured only when `captureMessages` is `true`.
11250
+ *
11251
+ * ```javascript
11252
+ * const traced = Model.openai({ apiKey }).withTracingConfig({ captureMessages: true });
11253
+ * ```
11254
+ */
11255
+ export interface TracingConfig {
11256
+ /**
11257
+ * Capture raw prompt + completion message text as span attributes
11258
+ * (`llm.input_messages` / `llm.output_messages`). Defaults to `false`.
11259
+ * Leave off for privacy-sensitive deployments.
11260
+ */
11261
+ captureMessages?: boolean
11262
+ }
11263
+
9267
11264
  /**
9268
11265
  * Attempt to deserialize an event payload using the registry.
9269
11266
  *
@@ -9418,6 +11415,30 @@ export interface ContentHint {
9418
11415
  byteSize?: number | null
9419
11416
  }
9420
11417
 
11418
+ // --- post-build: Event interface (workflow step surface) ---
11419
+ /**
11420
+ * An event flowing through a {@link Workflow}. Every event is a plain
11421
+ * object whose `type` string routes it to the steps that declared it in
11422
+ * their `eventTypes`. All other fields are arbitrary user payload.
11423
+ *
11424
+ * Returned from / passed to step handlers (`addStep`), emitted via
11425
+ * `ctx.sendEvent` / `ctx.writeEventToStream`, wrapped by
11426
+ * `StepOutput.single` / `StepOutput.multiple`, and delivered to the
11427
+ * `streamEvents` / `runStreaming` callbacks.
11428
+ *
11429
+ * Object identity is preserved across step hops: the exact object a
11430
+ * handler returns is the same object the next matching handler
11431
+ * receives (methods, class prototype, and non-JSON fields included).
11432
+ */
11433
+ export interface Event {
11434
+ /** Event type discriminant used for step routing, e.g. `"blazen::StartEvent"`. */
11435
+ type: string
11436
+ /** Result payload carried by `blazen::StopEvent`. */
11437
+ result?: any
11438
+ /** Arbitrary user payload fields. */
11439
+ [key: string]: any
11440
+ }
11441
+
9421
11442
  // --- post-build: typed error classes ---
9422
11443
  export class BlazenError extends Error {}
9423
11444
  export class AuthError extends BlazenError {}
@@ -9441,11 +11462,13 @@ export class LlamaCppInvalidOptionsError extends LlamaCppError {}
9441
11462
  export class LlamaCppModelLoadError extends LlamaCppError {}
9442
11463
  export class LlamaCppInferenceError extends LlamaCppError {}
9443
11464
  export class LlamaCppEngineNotAvailableError extends LlamaCppError {}
11465
+ export class LlamaCppAdapterFailedError extends LlamaCppError {}
9444
11466
  export class CandleLlmError extends ProviderError {}
9445
11467
  export class CandleLlmInvalidOptionsError extends CandleLlmError {}
9446
11468
  export class CandleLlmModelLoadError extends CandleLlmError {}
9447
11469
  export class CandleLlmInferenceError extends CandleLlmError {}
9448
11470
  export class CandleLlmEngineNotAvailableError extends CandleLlmError {}
11471
+ export class CandleLlmUnsupportedError extends CandleLlmError {}
9449
11472
  export class CandleEmbedError extends ProviderError {}
9450
11473
  export class CandleEmbedInvalidOptionsError extends CandleEmbedError {}
9451
11474
  export class CandleEmbedModelLoadError extends CandleEmbedError {}
@@ -9457,6 +11480,7 @@ export class MistralRsInvalidOptionsError extends MistralRsError {}
9457
11480
  export class MistralRsInitError extends MistralRsError {}
9458
11481
  export class MistralRsInferenceError extends MistralRsError {}
9459
11482
  export class MistralRsEngineNotAvailableError extends MistralRsError {}
11483
+ export class MistralRsAdapterFailedError extends MistralRsError {}
9460
11484
  export class WhisperError extends ProviderError {}
9461
11485
  export class WhisperInvalidOptionsError extends WhisperError {}
9462
11486
  export class WhisperModelLoadError extends WhisperError {}
@@ -9472,6 +11496,7 @@ export class DiffusionError extends ProviderError {}
9472
11496
  export class DiffusionInvalidOptionsError extends DiffusionError {}
9473
11497
  export class DiffusionModelLoadError extends DiffusionError {}
9474
11498
  export class DiffusionGenerationError extends DiffusionError {}
11499
+ export class DiffusionEngineNotAvailableError extends DiffusionError {}
9475
11500
  export class FastEmbedError extends ProviderError {}
9476
11501
  export class EmbedUnknownModelError extends FastEmbedError {}
9477
11502
  export class EmbedInitError extends FastEmbedError {}
@@ -9506,4 +11531,17 @@ export class CacheError extends BlazenError {}
9506
11531
  export class DownloadError extends CacheError {}
9507
11532
  export class CacheDirError extends CacheError {}
9508
11533
  export class IoError extends CacheError {}
9509
- export declare function enrichError(err: unknown): unknown
11534
+ export class MusicError extends BlazenError {}
11535
+ export class MusicEngineNotAvailableError extends MusicError {}
11536
+ export class MusicNotYetImplementedError extends MusicError {}
11537
+ export class MusicHfHubError extends MusicError {}
11538
+ export class MusicIoError extends MusicError {}
11539
+ export class MusicCandleError extends MusicError {}
11540
+ export class MusicInvalidInputError extends MusicError {}
11541
+ export class VcError extends BlazenError {}
11542
+ export class VcEngineNotAvailableError extends VcError {}
11543
+ export class VcModelLoadError extends VcError {}
11544
+ export class VcConversionError extends VcError {}
11545
+ export class VcVoiceNotFoundError extends VcError {}
11546
+ export class VcUnsupportedError extends VcError {}
11547
+ export class VcIoError extends VcError {}