blazen 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +194 -86
- package/package.json +8 -8
package/index.d.ts
CHANGED
|
@@ -153,8 +153,11 @@ export declare class BackgroundRemovalProvider {
|
|
|
153
153
|
get providerId(): string | null
|
|
154
154
|
/** The base URL, if set. */
|
|
155
155
|
get baseUrl(): string | null
|
|
156
|
-
/**
|
|
157
|
-
|
|
156
|
+
/**
|
|
157
|
+
* Estimated memory footprint in bytes (host RAM if the
|
|
158
|
+
* provider targets the CPU, GPU VRAM otherwise), if set.
|
|
159
|
+
*/
|
|
160
|
+
get memoryEstimateBytes(): number | null
|
|
158
161
|
/** r" Remove the background from an image. */
|
|
159
162
|
removeBackground(request: any): Promise<any>
|
|
160
163
|
}
|
|
@@ -647,8 +650,11 @@ export declare class CandleLlmProvider {
|
|
|
647
650
|
unload(): Promise<void>
|
|
648
651
|
/** Whether the model is currently loaded in memory / `VRAM`. */
|
|
649
652
|
isLoaded(): Promise<boolean>
|
|
650
|
-
/**
|
|
651
|
-
|
|
653
|
+
/**
|
|
654
|
+
* Approximate memory footprint in bytes (host RAM if the model
|
|
655
|
+
* targets the CPU, GPU VRAM otherwise).
|
|
656
|
+
*/
|
|
657
|
+
memoryBytes(): Promise<number | null>
|
|
652
658
|
}
|
|
653
659
|
export type JsCandleLlmProvider = CandleLlmProvider
|
|
654
660
|
|
|
@@ -1126,16 +1132,19 @@ export declare class CompletionModel {
|
|
|
1126
1132
|
*/
|
|
1127
1133
|
isLoaded(): Promise<boolean>
|
|
1128
1134
|
/**
|
|
1129
|
-
* Approximate
|
|
1130
|
-
*
|
|
1131
|
-
*
|
|
1135
|
+
* Approximate memory footprint in bytes (host RAM if the
|
|
1136
|
+
* provider targets the CPU, GPU VRAM otherwise), if the
|
|
1137
|
+
* implementation can report it. Returns `null` for remote
|
|
1138
|
+
* providers or for local providers that do not expose memory
|
|
1139
|
+
* usage.
|
|
1132
1140
|
*
|
|
1133
1141
|
* Note: napi-rs exposes this as a JS `number`. The underlying
|
|
1134
|
-
* [`blazen_llm::LocalModel::
|
|
1135
|
-
* `i64::MAX` (~9.2 exabytes) when surfacing through
|
|
1136
|
-
* types, which is effectively lossless for any
|
|
1142
|
+
* [`blazen_llm::LocalModel::memory_bytes`] returns `u64`; we clamp
|
|
1143
|
+
* to `i64::MAX` (~9.2 exabytes) when surfacing through
|
|
1144
|
+
* `JSON`-compatible types, which is effectively lossless for any
|
|
1145
|
+
* realistic footprint.
|
|
1137
1146
|
*/
|
|
1138
|
-
|
|
1147
|
+
memoryBytes(): Promise<number | null>
|
|
1139
1148
|
/**
|
|
1140
1149
|
* Create a local mistral.rs completion model.
|
|
1141
1150
|
*
|
|
@@ -2311,8 +2320,11 @@ export declare class ImageProvider {
|
|
|
2311
2320
|
get providerId(): string | null
|
|
2312
2321
|
/** The base URL, if set. */
|
|
2313
2322
|
get baseUrl(): string | null
|
|
2314
|
-
/**
|
|
2315
|
-
|
|
2323
|
+
/**
|
|
2324
|
+
* Estimated memory footprint in bytes (host RAM if the
|
|
2325
|
+
* provider targets the CPU, GPU VRAM otherwise), if set.
|
|
2326
|
+
*/
|
|
2327
|
+
get memoryEstimateBytes(): number | null
|
|
2316
2328
|
/** r" Generate an image from a prompt. */
|
|
2317
2329
|
generateImage(request: any): Promise<any>
|
|
2318
2330
|
/** r" Upscale an existing image. */
|
|
@@ -2685,8 +2697,11 @@ export declare class LlamaCppProvider {
|
|
|
2685
2697
|
unload(): Promise<void>
|
|
2686
2698
|
/** Whether the model is currently loaded in memory / `VRAM`. */
|
|
2687
2699
|
isLoaded(): Promise<boolean>
|
|
2688
|
-
/**
|
|
2689
|
-
|
|
2700
|
+
/**
|
|
2701
|
+
* Approximate memory footprint in bytes (host RAM if the model
|
|
2702
|
+
* targets the CPU, GPU VRAM otherwise).
|
|
2703
|
+
*/
|
|
2704
|
+
memoryBytes(): Promise<number | null>
|
|
2690
2705
|
}
|
|
2691
2706
|
export type JsLlamaCppProvider = LlamaCppProvider
|
|
2692
2707
|
|
|
@@ -2695,14 +2710,15 @@ export type JsLlamaCppProvider = LlamaCppProvider
|
|
|
2695
2710
|
* memory / VRAM.
|
|
2696
2711
|
*
|
|
2697
2712
|
* Mirrors [`blazen_llm::traits::LocalModel`]. Subclasses must override
|
|
2698
|
-
* `load`, `unload`, and `
|
|
2699
|
-
* `
|
|
2713
|
+
* `load`, `unload`, `isLoaded`, and `device` (and may optionally
|
|
2714
|
+
* override `memoryBytes`).
|
|
2700
2715
|
*
|
|
2701
2716
|
* ```javascript
|
|
2702
2717
|
* class MyLocalModel extends LocalModel {
|
|
2703
2718
|
* async load() { /* ... *\/ }
|
|
2704
2719
|
* async unload() { /* ... *\/ }
|
|
2705
2720
|
* async isLoaded() { return false; }
|
|
2721
|
+
* device() { return "cpu"; }
|
|
2706
2722
|
* }
|
|
2707
2723
|
* ```
|
|
2708
2724
|
*/
|
|
@@ -2719,10 +2735,15 @@ export declare class LocalModel {
|
|
|
2719
2735
|
/** Whether the model is currently loaded. Subclasses **must** override. */
|
|
2720
2736
|
isLoaded(): Promise<boolean>
|
|
2721
2737
|
/**
|
|
2722
|
-
*
|
|
2723
|
-
*
|
|
2738
|
+
* Return the device this model targets: `'cpu'`, `'cuda:0'`,
|
|
2739
|
+
* `'metal'`, etc.
|
|
2740
|
+
*/
|
|
2741
|
+
device(): string
|
|
2742
|
+
/**
|
|
2743
|
+
* Approximate memory footprint in bytes (host RAM if `device()`
|
|
2744
|
+
* returns `'cpu'`, GPU VRAM otherwise). Default returns `null`.
|
|
2724
2745
|
*/
|
|
2725
|
-
|
|
2746
|
+
memoryBytes(): Promise<number | null>
|
|
2726
2747
|
}
|
|
2727
2748
|
export type JsLocalModel = LocalModel
|
|
2728
2749
|
|
|
@@ -3061,8 +3082,11 @@ export declare class MistralRsProvider {
|
|
|
3061
3082
|
unload(): Promise<void>
|
|
3062
3083
|
/** Whether the model is currently loaded in memory / `VRAM`. */
|
|
3063
3084
|
isLoaded(): Promise<boolean>
|
|
3064
|
-
/**
|
|
3065
|
-
|
|
3085
|
+
/**
|
|
3086
|
+
* Approximate memory footprint in bytes (host RAM if the model
|
|
3087
|
+
* targets the CPU, GPU VRAM otherwise).
|
|
3088
|
+
*/
|
|
3089
|
+
memoryBytes(): Promise<number | null>
|
|
3066
3090
|
}
|
|
3067
3091
|
export type JsMistralRsProvider = MistralRsProvider
|
|
3068
3092
|
|
|
@@ -3124,55 +3148,77 @@ export declare class ModelCache {
|
|
|
3124
3148
|
export type JsModelCache = ModelCache
|
|
3125
3149
|
|
|
3126
3150
|
/**
|
|
3127
|
-
*
|
|
3151
|
+
* Memory-budget-aware model manager with per-pool LRU eviction.
|
|
3128
3152
|
*
|
|
3129
|
-
* Tracks registered local models and their estimated
|
|
3130
|
-
* When loading a model that would exceed
|
|
3131
|
-
* loaded model is unloaded first.
|
|
3153
|
+
* Tracks registered local models and their estimated memory footprint.
|
|
3154
|
+
* When loading a model that would exceed its pool's budget, the
|
|
3155
|
+
* least-recently-used loaded model in the same pool is unloaded first.
|
|
3132
3156
|
*
|
|
3133
3157
|
* ```javascript
|
|
3134
|
-
*
|
|
3135
|
-
*
|
|
3158
|
+
* // Single-GPU desktop layout:
|
|
3159
|
+
* const manager = new ModelManager({ cpuRamGb: 100, gpuVramGb: 24 });
|
|
3160
|
+
*
|
|
3161
|
+
* // Multi-pool layout via explicit BigInt budgets:
|
|
3162
|
+
* const manager = new ModelManager({
|
|
3163
|
+
* poolBudgets: { "cpu": 100_000_000_000n, "gpu:0": 24_000_000_000n },
|
|
3164
|
+
* });
|
|
3165
|
+
*
|
|
3166
|
+
* // No arguments: both `cpu` and `gpu:0` default to the unlimited sentinel.
|
|
3167
|
+
* const manager = new ModelManager();
|
|
3168
|
+
*
|
|
3169
|
+
* await manager.register("llama-7b", model, 4_000_000_000n);
|
|
3136
3170
|
* await manager.load("llama-7b");
|
|
3137
3171
|
* ```
|
|
3138
3172
|
*/
|
|
3139
3173
|
export declare class ModelManager {
|
|
3140
3174
|
/**
|
|
3141
|
-
* Create a new model manager with
|
|
3175
|
+
* Create a new model manager with per-pool memory budgets.
|
|
3176
|
+
*
|
|
3177
|
+
* If `poolBudgets` is provided, it is used verbatim (pool labels are
|
|
3178
|
+
* parsed by [`parse_pool_label`]). Otherwise the manager uses
|
|
3179
|
+
* `cpuRamGb` (default `0`) for `Pool::Cpu` and `gpuVramGb`
|
|
3180
|
+
* (default `0`) for `Pool::Gpu(0)`.
|
|
3142
3181
|
*
|
|
3143
|
-
*
|
|
3144
|
-
* (
|
|
3182
|
+
* When **all** fields are omitted, both `Pool::Cpu` and
|
|
3183
|
+
* `Pool::Gpu(0)` default to `u64::MAX` — the unlimited-budget
|
|
3184
|
+
* sentinel used by integration tests that don't want to think
|
|
3185
|
+
* about capacity.
|
|
3145
3186
|
*/
|
|
3146
|
-
constructor(config
|
|
3187
|
+
constructor(config?: ModelManagerConfig | undefined | null)
|
|
3147
3188
|
/**
|
|
3148
3189
|
* Register a `CompletionModel`-backed local model with the manager.
|
|
3149
3190
|
*
|
|
3150
|
-
* The model starts in the unloaded state.
|
|
3151
|
-
* `
|
|
3191
|
+
* The model starts in the unloaded state. An optional
|
|
3192
|
+
* `memoryEstimateBytes` overrides the model's self-reported
|
|
3193
|
+
* estimate.
|
|
3152
3194
|
*
|
|
3153
|
-
* Only local in-process providers (mistral.rs, llama.cpp, candle)
|
|
3154
|
-
* registered
|
|
3155
|
-
* arbitrary JS-managed resource (embedding model,
|
|
3156
|
-
* runtime, …), use
|
|
3195
|
+
* Only local in-process providers (mistral.rs, llama.cpp, candle)
|
|
3196
|
+
* can be registered — remote HTTP providers will throw. To
|
|
3197
|
+
* register an arbitrary JS-managed resource (embedding model,
|
|
3198
|
+
* tokenizer, custom runtime, …), use
|
|
3199
|
+
* [`Self::register_local_model`] instead.
|
|
3157
3200
|
*/
|
|
3158
|
-
register(id: string, model: JsCompletionModel,
|
|
3201
|
+
register(id: string, model: JsCompletionModel, memoryEstimateBytes?: bigint | undefined | null): Promise<void>
|
|
3159
3202
|
/**
|
|
3160
3203
|
* Register an arbitrary JS-managed local model with the manager.
|
|
3161
3204
|
*
|
|
3162
|
-
* Unlike [`Self::register`]
|
|
3163
|
-
* backed by an in-process provider
|
|
3164
|
-
* lifecycle callbacks. The manager will invoke `load()` when the
|
|
3165
|
-
* is brought into
|
|
3166
|
-
* `unload()` when it is evicted or explicitly released.
|
|
3205
|
+
* Unlike [`Self::register`] — which expects a [`JsCompletionModel`]
|
|
3206
|
+
* backed by an in-process provider — this entrypoint takes raw
|
|
3207
|
+
* lifecycle callbacks. The manager will invoke `load()` when the
|
|
3208
|
+
* model is brought into memory (potentially after evicting an LRU
|
|
3209
|
+
* peer) and `unload()` when it is evicted or explicitly released.
|
|
3167
3210
|
*
|
|
3168
|
-
* Both callbacks must return a `Promise<void>` (or be `async`).
|
|
3169
|
-
* rejection from `load()` aborts the load operation; a rejection
|
|
3170
|
-
* `unload()` is propagated as a manager error.
|
|
3211
|
+
* Both callbacks must return a `Promise<void>` (or be `async`).
|
|
3212
|
+
* A rejection from `load()` aborts the load operation; a rejection
|
|
3213
|
+
* from `unload()` is propagated as a manager error.
|
|
3171
3214
|
*
|
|
3172
3215
|
* `isLoaded()` is optional: when omitted, the manager's own
|
|
3173
3216
|
* loaded-flag bookkeeping is the source of truth.
|
|
3174
|
-
* `
|
|
3175
|
-
* can enforce the
|
|
3217
|
+
* `memoryEstimateBytes` reports the model's footprint so the
|
|
3218
|
+
* manager can enforce the per-pool budget; defaults to `0` when
|
|
3219
|
+
* not provided. `device` selects which pool the model targets
|
|
3220
|
+
* (`"cpu"`, `"cuda:0"`, `"metal"`, …); defaults to `"cpu"` when
|
|
3221
|
+
* omitted.
|
|
3176
3222
|
*
|
|
3177
3223
|
* ```javascript
|
|
3178
3224
|
* let loaded = false;
|
|
@@ -3182,22 +3228,24 @@ export declare class ModelManager {
|
|
|
3182
3228
|
* async () => { /* release *\/ loaded = false; },
|
|
3183
3229
|
* async () => loaded,
|
|
3184
3230
|
* 2_000_000_000n,
|
|
3231
|
+
* "cuda:0",
|
|
3185
3232
|
* );
|
|
3186
3233
|
* ```
|
|
3187
3234
|
*
|
|
3188
|
-
* `isLoaded
|
|
3189
|
-
* `
|
|
3235
|
+
* `isLoaded`, `memoryEstimateBytes`, and `device` are all
|
|
3236
|
+
* nullable / optional (pass `null` or `undefined` to omit).
|
|
3190
3237
|
*/
|
|
3191
|
-
registerLocalModel(id: string, load: LifecycleTsfn, unload: LifecycleTsfn, isLoaded?: IsLoadedTsfn | undefined | null,
|
|
3238
|
+
registerLocalModel(id: string, load: LifecycleTsfn, unload: LifecycleTsfn, isLoaded?: IsLoadedTsfn | undefined | null, memoryEstimateBytes?: bigint | undefined | null, device?: string | undefined | null): Promise<void>
|
|
3192
3239
|
/**
|
|
3193
|
-
* Load a model, evicting LRU
|
|
3240
|
+
* Load a model, evicting LRU peers in the same pool if the budget
|
|
3241
|
+
* would be exceeded.
|
|
3194
3242
|
*
|
|
3195
|
-
* Throws if the model is not registered or its
|
|
3196
|
-
* total budget.
|
|
3243
|
+
* Throws if the model is not registered or its memory estimate
|
|
3244
|
+
* exceeds the pool's total budget.
|
|
3197
3245
|
*/
|
|
3198
3246
|
load(id: string): Promise<void>
|
|
3199
3247
|
/**
|
|
3200
|
-
* Unload a model, freeing its
|
|
3248
|
+
* Unload a model, freeing its slice of the pool budget.
|
|
3201
3249
|
*
|
|
3202
3250
|
* Idempotent: unloading an already-unloaded model is a no-op.
|
|
3203
3251
|
*/
|
|
@@ -3207,14 +3255,28 @@ export declare class ModelManager {
|
|
|
3207
3255
|
/**
|
|
3208
3256
|
* Ensure a model is loaded.
|
|
3209
3257
|
*
|
|
3210
|
-
* If already loaded, updates the LRU timestamp. If not loaded,
|
|
3211
|
-
* (potentially evicting other models).
|
|
3258
|
+
* If already loaded, updates the LRU timestamp. If not loaded,
|
|
3259
|
+
* loads it (potentially evicting other models in the same pool).
|
|
3212
3260
|
*/
|
|
3213
3261
|
ensureLoaded(id: string): Promise<void>
|
|
3214
|
-
/**
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3262
|
+
/**
|
|
3263
|
+
* Total memory currently used by loaded models in the given pool,
|
|
3264
|
+
* in bytes. Defaults to `"cpu"` when no pool label is provided.
|
|
3265
|
+
*/
|
|
3266
|
+
usedBytes(pool?: string | undefined | null): Promise<bigint>
|
|
3267
|
+
/**
|
|
3268
|
+
* Available memory within the given pool's budget, in bytes.
|
|
3269
|
+
* Defaults to `"cpu"` when no pool label is provided.
|
|
3270
|
+
*/
|
|
3271
|
+
availableBytes(pool?: string | undefined | null): Promise<bigint>
|
|
3272
|
+
/**
|
|
3273
|
+
* Snapshot of the configured per-pool budgets.
|
|
3274
|
+
*
|
|
3275
|
+
* Returns one entry per pool the manager knows about; each entry
|
|
3276
|
+
* carries the pool label (`"cpu"` or `"gpu:N"`) and its budget in
|
|
3277
|
+
* bytes.
|
|
3278
|
+
*/
|
|
3279
|
+
pools(): Array<JsPoolBudget>
|
|
3218
3280
|
/** Status of all registered models. */
|
|
3219
3281
|
status(): Promise<Array<JsModelStatus>>
|
|
3220
3282
|
}
|
|
@@ -3265,8 +3327,11 @@ export declare class MusicProvider {
|
|
|
3265
3327
|
get providerId(): string | null
|
|
3266
3328
|
/** The base URL, if set. */
|
|
3267
3329
|
get baseUrl(): string | null
|
|
3268
|
-
/**
|
|
3269
|
-
|
|
3330
|
+
/**
|
|
3331
|
+
* Estimated memory footprint in bytes (host RAM if the
|
|
3332
|
+
* provider targets the CPU, GPU VRAM otherwise), if set.
|
|
3333
|
+
*/
|
|
3334
|
+
get memoryEstimateBytes(): number | null
|
|
3270
3335
|
/** r" Generate music from a prompt. */
|
|
3271
3336
|
generateMusic(request: any): Promise<any>
|
|
3272
3337
|
/** r" Generate a sound effect from a prompt. */
|
|
@@ -4553,8 +4618,11 @@ export declare class ThreeDProvider {
|
|
|
4553
4618
|
get providerId(): string | null
|
|
4554
4619
|
/** The base URL, if set. */
|
|
4555
4620
|
get baseUrl(): string | null
|
|
4556
|
-
/**
|
|
4557
|
-
|
|
4621
|
+
/**
|
|
4622
|
+
* Estimated memory footprint in bytes (host RAM if the
|
|
4623
|
+
* provider targets the CPU, GPU VRAM otherwise), if set.
|
|
4624
|
+
*/
|
|
4625
|
+
get memoryEstimateBytes(): number | null
|
|
4558
4626
|
/** r" Generate a 3D model from a prompt or image. */
|
|
4559
4627
|
generate3d(request: any): Promise<any>
|
|
4560
4628
|
}
|
|
@@ -4898,8 +4966,11 @@ export declare class TTSProvider {
|
|
|
4898
4966
|
get providerId(): string | null
|
|
4899
4967
|
/** The base URL, if set. */
|
|
4900
4968
|
get baseUrl(): string | null
|
|
4901
|
-
/**
|
|
4902
|
-
|
|
4969
|
+
/**
|
|
4970
|
+
* Estimated memory footprint in bytes (host RAM if the
|
|
4971
|
+
* provider targets the CPU, GPU VRAM otherwise), if set.
|
|
4972
|
+
*/
|
|
4973
|
+
get memoryEstimateBytes(): number | null
|
|
4903
4974
|
/** r" Synthesize speech from text. */
|
|
4904
4975
|
textToSpeech(request: any): Promise<any>
|
|
4905
4976
|
}
|
|
@@ -5115,8 +5186,11 @@ export declare class VideoProvider {
|
|
|
5115
5186
|
get providerId(): string | null
|
|
5116
5187
|
/** The base URL, if set. */
|
|
5117
5188
|
get baseUrl(): string | null
|
|
5118
|
-
/**
|
|
5119
|
-
|
|
5189
|
+
/**
|
|
5190
|
+
* Estimated memory footprint in bytes (host RAM if the
|
|
5191
|
+
* provider targets the CPU, GPU VRAM otherwise), if set.
|
|
5192
|
+
*/
|
|
5193
|
+
get memoryEstimateBytes(): number | null
|
|
5120
5194
|
/** r" Generate a video from a text prompt. */
|
|
5121
5195
|
textToVideo(request: any): Promise<any>
|
|
5122
5196
|
/** r" Generate a video from an image (image-to-video). */
|
|
@@ -5156,8 +5230,11 @@ export declare class VoiceProvider {
|
|
|
5156
5230
|
get providerId(): string | null
|
|
5157
5231
|
/** The base URL, if set. */
|
|
5158
5232
|
get baseUrl(): string | null
|
|
5159
|
-
/**
|
|
5160
|
-
|
|
5233
|
+
/**
|
|
5234
|
+
* Estimated memory footprint in bytes (host RAM if the
|
|
5235
|
+
* provider targets the CPU, GPU VRAM otherwise), if set.
|
|
5236
|
+
*/
|
|
5237
|
+
get memoryEstimateBytes(): number | null
|
|
5161
5238
|
/** r" Clone a voice from audio samples. */
|
|
5162
5239
|
cloneVoice(request: any): Promise<any>
|
|
5163
5240
|
/** r" List all available voices. */
|
|
@@ -5823,8 +5900,11 @@ export interface CapabilityProviderConfig {
|
|
|
5823
5900
|
providerId: string
|
|
5824
5901
|
/** Optional base URL for HTTP-based providers. */
|
|
5825
5902
|
baseUrl?: string
|
|
5826
|
-
/**
|
|
5827
|
-
|
|
5903
|
+
/**
|
|
5904
|
+
* Optional estimated memory footprint in bytes when loaded
|
|
5905
|
+
* (host RAM if the provider targets the CPU, GPU VRAM otherwise).
|
|
5906
|
+
*/
|
|
5907
|
+
memoryEstimateBytes?: number
|
|
5828
5908
|
}
|
|
5829
5909
|
|
|
5830
5910
|
/** Options for creating a `ChatMessage`. */
|
|
@@ -5929,8 +6009,11 @@ export interface CompletionModelConfig {
|
|
|
5929
6009
|
contextLength?: number
|
|
5930
6010
|
/** Base URL for HTTP-based providers. */
|
|
5931
6011
|
baseUrl?: string
|
|
5932
|
-
/**
|
|
5933
|
-
|
|
6012
|
+
/**
|
|
6013
|
+
* Estimated memory footprint in bytes when loaded (host RAM if
|
|
6014
|
+
* the provider targets the CPU, GPU VRAM otherwise).
|
|
6015
|
+
*/
|
|
6016
|
+
memoryEstimateBytes?: number
|
|
5934
6017
|
/** Maximum output tokens the model supports. */
|
|
5935
6018
|
maxOutputTokens?: number
|
|
5936
6019
|
}
|
|
@@ -7237,10 +7320,15 @@ export interface JsModelPricing {
|
|
|
7237
7320
|
export interface JsModelStatus {
|
|
7238
7321
|
/** Model identifier. */
|
|
7239
7322
|
id: string
|
|
7240
|
-
/** Whether the model is currently loaded into
|
|
7323
|
+
/** Whether the model is currently loaded into its pool. */
|
|
7241
7324
|
loaded: boolean
|
|
7242
|
-
/**
|
|
7243
|
-
|
|
7325
|
+
/**
|
|
7326
|
+
* Estimated memory footprint in bytes (host RAM if `pool` is
|
|
7327
|
+
* `"cpu"`, GPU VRAM otherwise).
|
|
7328
|
+
*/
|
|
7329
|
+
memoryEstimateBytes: bigint
|
|
7330
|
+
/** Pool label this model targets (`"cpu"` or `"gpu:N"`). */
|
|
7331
|
+
pool: string
|
|
7244
7332
|
}
|
|
7245
7333
|
|
|
7246
7334
|
export interface JsMusicRequest {
|
|
@@ -7326,6 +7414,14 @@ export interface JsPiperOptions {
|
|
|
7326
7414
|
cacheDir?: string
|
|
7327
7415
|
}
|
|
7328
7416
|
|
|
7417
|
+
/** Reported per-pool budget pair returned by [`JsModelManager::pools`]. */
|
|
7418
|
+
export interface JsPoolBudget {
|
|
7419
|
+
/** Pool label (`"cpu"` or `"gpu:N"`). */
|
|
7420
|
+
pool: string
|
|
7421
|
+
/** Configured budget for the pool in bytes. */
|
|
7422
|
+
budgetBytes: bigint
|
|
7423
|
+
}
|
|
7424
|
+
|
|
7329
7425
|
/** A single `(provider, envVar)` pair returned by [`provider_env_vars`]. */
|
|
7330
7426
|
export interface JsProviderEnvVar {
|
|
7331
7427
|
/** Canonical provider name (e.g. `"openai"`, `"anthropic"`). */
|
|
@@ -8041,13 +8137,22 @@ export interface ModelInfo {
|
|
|
8041
8137
|
/**
|
|
8042
8138
|
* Configuration for creating a [`JsModelManager`].
|
|
8043
8139
|
*
|
|
8044
|
-
*
|
|
8140
|
+
* Pass either the convenience pair (`cpuRamGb` / `gpuVramGb`) for the
|
|
8141
|
+
* common single-GPU desktop layout, or a fully explicit `poolBudgets`
|
|
8142
|
+
* map for multi-GPU or custom topologies. Omit everything to receive
|
|
8143
|
+
* the unlimited-budget defaults (useful for tests).
|
|
8045
8144
|
*/
|
|
8046
8145
|
export interface ModelManagerConfig {
|
|
8047
|
-
/**
|
|
8048
|
-
|
|
8049
|
-
/** VRAM budget in
|
|
8050
|
-
|
|
8146
|
+
/** Host RAM budget in gigabytes for `Pool::Cpu`. */
|
|
8147
|
+
cpuRamGb?: number
|
|
8148
|
+
/** GPU VRAM budget in gigabytes for `Pool::Gpu(0)`. */
|
|
8149
|
+
gpuVramGb?: number
|
|
8150
|
+
/**
|
|
8151
|
+
* Explicit per-pool budget map. Keys: `"cpu"`, `"gpu"`, `"gpu:N"`.
|
|
8152
|
+
* Values: bytes as `BigInt`. When provided, overrides
|
|
8153
|
+
* `cpuRamGb` / `gpuVramGb`.
|
|
8154
|
+
*/
|
|
8155
|
+
poolBudgets?: Record<string, bigint>
|
|
8051
8156
|
}
|
|
8052
8157
|
|
|
8053
8158
|
/** Build an empty [`JsRetryStack`] with every scope set to `null`. */
|
|
@@ -8241,8 +8346,11 @@ export interface ProviderConfig {
|
|
|
8241
8346
|
contextLength?: number
|
|
8242
8347
|
/** Maximum output tokens the model supports. */
|
|
8243
8348
|
maxOutputTokens?: number
|
|
8244
|
-
/**
|
|
8245
|
-
|
|
8349
|
+
/**
|
|
8350
|
+
* Estimated memory footprint in bytes when loaded (host RAM if on CPU,
|
|
8351
|
+
* GPU VRAM otherwise).
|
|
8352
|
+
*/
|
|
8353
|
+
memoryEstimateBytes?: number
|
|
8246
8354
|
/** Pricing information for automatic cost tracking. */
|
|
8247
8355
|
pricing?: JsModelPricing
|
|
8248
8356
|
/** Capability flags. */
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "blazen",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.2",
|
|
4
4
|
"description": "Blazen - Event-driven AI workflow framework for Node.js/TypeScript",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
@@ -88,13 +88,13 @@
|
|
|
88
88
|
"verbose": true
|
|
89
89
|
},
|
|
90
90
|
"optionalDependencies": {
|
|
91
|
-
"@blazen-dev/blazen-linux-x64-gnu": "0.5.
|
|
92
|
-
"@blazen-dev/blazen-linux-x64-musl": "0.5.
|
|
93
|
-
"@blazen-dev/blazen-linux-arm64-gnu": "0.5.
|
|
94
|
-
"@blazen-dev/blazen-linux-arm64-musl": "0.5.
|
|
95
|
-
"@blazen-dev/blazen-darwin-arm64": "0.5.
|
|
96
|
-
"@blazen-dev/blazen-win32-x64-msvc": "0.5.
|
|
97
|
-
"@blazen-dev/blazen-wasm32-wasi": "0.5.
|
|
91
|
+
"@blazen-dev/blazen-linux-x64-gnu": "0.5.2",
|
|
92
|
+
"@blazen-dev/blazen-linux-x64-musl": "0.5.2",
|
|
93
|
+
"@blazen-dev/blazen-linux-arm64-gnu": "0.5.2",
|
|
94
|
+
"@blazen-dev/blazen-linux-arm64-musl": "0.5.2",
|
|
95
|
+
"@blazen-dev/blazen-darwin-arm64": "0.5.2",
|
|
96
|
+
"@blazen-dev/blazen-win32-x64-msvc": "0.5.2",
|
|
97
|
+
"@blazen-dev/blazen-wasm32-wasi": "0.5.2"
|
|
98
98
|
},
|
|
99
99
|
"scripts": {
|
|
100
100
|
"build": "napi build --release --platform --features local-all,langfuse --js index.js && node scripts/post-build.mjs",
|