blazen 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.d.ts +194 -86
  2. package/package.json +8 -8
package/index.d.ts CHANGED
@@ -153,8 +153,11 @@ export declare class BackgroundRemovalProvider {
153
153
  get providerId(): string | null
154
154
  /** The base URL, if set. */
155
155
  get baseUrl(): string | null
156
- /** Estimated VRAM footprint in bytes, if set. */
157
- get vramEstimateBytes(): number | null
156
+ /**
157
+ * Estimated memory footprint in bytes (host RAM if the
158
+ * provider targets the CPU, GPU VRAM otherwise), if set.
159
+ */
160
+ get memoryEstimateBytes(): number | null
158
161
  /** r" Remove the background from an image. */
159
162
  removeBackground(request: any): Promise<any>
160
163
  }
@@ -647,8 +650,11 @@ export declare class CandleLlmProvider {
647
650
  unload(): Promise<void>
648
651
  /** Whether the model is currently loaded in memory / `VRAM`. */
649
652
  isLoaded(): Promise<boolean>
650
- /** Approximate `VRAM` footprint in bytes. */
651
- vramBytes(): Promise<number | null>
653
+ /**
654
+ * Approximate memory footprint in bytes (host RAM if the model
655
+ * targets the CPU, GPU VRAM otherwise).
656
+ */
657
+ memoryBytes(): Promise<number | null>
652
658
  }
653
659
  export type JsCandleLlmProvider = CandleLlmProvider
654
660
 
@@ -1126,16 +1132,19 @@ export declare class CompletionModel {
1126
1132
  */
1127
1133
  isLoaded(): Promise<boolean>
1128
1134
  /**
1129
- * Approximate `VRAM` footprint in bytes, if the implementation can
1130
- * report it. Returns `null` for remote providers or for local
1131
- * providers that do not expose memory usage.
1135
+ * Approximate memory footprint in bytes (host RAM if the
1136
+ * provider targets the CPU, GPU VRAM otherwise), if the
1137
+ * implementation can report it. Returns `null` for remote
1138
+ * providers or for local providers that do not expose memory
1139
+ * usage.
1132
1140
  *
1133
1141
  * Note: napi-rs exposes this as a JS `number`. The underlying
1134
- * [`blazen_llm::LocalModel::vram_bytes`] returns `u64`; we clamp to
1135
- * `i64::MAX` (~9.2 exabytes) when surfacing through `JSON`-compatible
1136
- * types, which is effectively lossless for any realistic `VRAM` size.
1142
+ * [`blazen_llm::LocalModel::memory_bytes`] returns `u64`; we clamp
1143
+ * to `i64::MAX` (~9.2 exabytes) when surfacing through
1144
+ * `JSON`-compatible types, which is effectively lossless for any
1145
+ * realistic footprint.
1137
1146
  */
1138
- vramBytes(): Promise<number | null>
1147
+ memoryBytes(): Promise<number | null>
1139
1148
  /**
1140
1149
  * Create a local mistral.rs completion model.
1141
1150
  *
@@ -2311,8 +2320,11 @@ export declare class ImageProvider {
2311
2320
  get providerId(): string | null
2312
2321
  /** The base URL, if set. */
2313
2322
  get baseUrl(): string | null
2314
- /** Estimated VRAM footprint in bytes, if set. */
2315
- get vramEstimateBytes(): number | null
2323
+ /**
2324
+ * Estimated memory footprint in bytes (host RAM if the
2325
+ * provider targets the CPU, GPU VRAM otherwise), if set.
2326
+ */
2327
+ get memoryEstimateBytes(): number | null
2316
2328
  /** r" Generate an image from a prompt. */
2317
2329
  generateImage(request: any): Promise<any>
2318
2330
  /** r" Upscale an existing image. */
@@ -2685,8 +2697,11 @@ export declare class LlamaCppProvider {
2685
2697
  unload(): Promise<void>
2686
2698
  /** Whether the model is currently loaded in memory / `VRAM`. */
2687
2699
  isLoaded(): Promise<boolean>
2688
- /** Approximate `VRAM` footprint in bytes. */
2689
- vramBytes(): Promise<number | null>
2700
+ /**
2701
+ * Approximate memory footprint in bytes (host RAM if the model
2702
+ * targets the CPU, GPU VRAM otherwise).
2703
+ */
2704
+ memoryBytes(): Promise<number | null>
2690
2705
  }
2691
2706
  export type JsLlamaCppProvider = LlamaCppProvider
2692
2707
 
@@ -2695,14 +2710,15 @@ export type JsLlamaCppProvider = LlamaCppProvider
2695
2710
  * memory / VRAM.
2696
2711
  *
2697
2712
  * Mirrors [`blazen_llm::traits::LocalModel`]. Subclasses must override
2698
- * `load`, `unload`, and `isLoaded` (and may optionally override
2699
- * `vramBytes`).
2713
+ * `load`, `unload`, `isLoaded`, and `device` (and may optionally
2714
+ * override `memoryBytes`).
2700
2715
  *
2701
2716
  * ```javascript
2702
2717
  * class MyLocalModel extends LocalModel {
2703
2718
  * async load() { /* ... *\/ }
2704
2719
  * async unload() { /* ... *\/ }
2705
2720
  * async isLoaded() { return false; }
2721
+ * device() { return "cpu"; }
2706
2722
  * }
2707
2723
  * ```
2708
2724
  */
@@ -2719,10 +2735,15 @@ export declare class LocalModel {
2719
2735
  /** Whether the model is currently loaded. Subclasses **must** override. */
2720
2736
  isLoaded(): Promise<boolean>
2721
2737
  /**
2722
- * Approximate memory footprint in bytes. Default implementation
2723
- * returns `null`.
2738
+ * Return the device this model targets: `'cpu'`, `'cuda:0'`,
2739
+ * `'metal'`, etc.
2740
+ */
2741
+ device(): string
2742
+ /**
2743
+ * Approximate memory footprint in bytes (host RAM if `device()`
2744
+ * returns `'cpu'`, GPU VRAM otherwise). Default returns `null`.
2724
2745
  */
2725
- vramBytes(): Promise<number | null>
2746
+ memoryBytes(): Promise<number | null>
2726
2747
  }
2727
2748
  export type JsLocalModel = LocalModel
2728
2749
 
@@ -3061,8 +3082,11 @@ export declare class MistralRsProvider {
3061
3082
  unload(): Promise<void>
3062
3083
  /** Whether the model is currently loaded in memory / `VRAM`. */
3063
3084
  isLoaded(): Promise<boolean>
3064
- /** Approximate `VRAM` footprint in bytes. */
3065
- vramBytes(): Promise<number | null>
3085
+ /**
3086
+ * Approximate memory footprint in bytes (host RAM if the model
3087
+ * targets the CPU, GPU VRAM otherwise).
3088
+ */
3089
+ memoryBytes(): Promise<number | null>
3066
3090
  }
3067
3091
  export type JsMistralRsProvider = MistralRsProvider
3068
3092
 
@@ -3124,55 +3148,77 @@ export declare class ModelCache {
3124
3148
  export type JsModelCache = ModelCache
3125
3149
 
3126
3150
  /**
3127
- * VRAM budget-aware model manager with LRU eviction.
3151
+ * Memory-budget-aware model manager with per-pool LRU eviction.
3128
3152
  *
3129
- * Tracks registered local models and their estimated VRAM footprint.
3130
- * When loading a model that would exceed the budget, the least-recently-used
3131
- * loaded model is unloaded first.
3153
+ * Tracks registered local models and their estimated memory footprint.
3154
+ * When loading a model that would exceed its pool's budget, the
3155
+ * least-recently-used loaded model in the same pool is unloaded first.
3132
3156
  *
3133
3157
  * ```javascript
3134
- * const manager = new ModelManager({ budgetGb: 8.0 });
3135
- * await manager.register("llama-7b", model, 4_000_000_000n); // BigInt
3158
+ * // Single-GPU desktop layout:
3159
+ * const manager = new ModelManager({ cpuRamGb: 100, gpuVramGb: 24 });
3160
+ *
3161
+ * // Multi-pool layout via explicit BigInt budgets:
3162
+ * const manager = new ModelManager({
3163
+ * poolBudgets: { "cpu": 100_000_000_000n, "gpu:0": 24_000_000_000n },
3164
+ * });
3165
+ *
3166
+ * // No arguments: both `cpu` and `gpu:0` default to the unlimited sentinel.
3167
+ * const manager = new ModelManager();
3168
+ *
3169
+ * await manager.register("llama-7b", model, 4_000_000_000n);
3136
3170
  * await manager.load("llama-7b");
3137
3171
  * ```
3138
3172
  */
3139
3173
  export declare class ModelManager {
3140
3174
  /**
3141
- * Create a new model manager with the given VRAM budget.
3175
+ * Create a new model manager with per-pool memory budgets.
3176
+ *
3177
+ * If `poolBudgets` is provided, it is used verbatim (pool labels are
3178
+ * parsed by [`parse_pool_label`]). Otherwise the manager uses
3179
+ * `cpuRamGb` (default `0`) for `Pool::Cpu` and `gpuVramGb`
3180
+ * (default `0`) for `Pool::Gpu(0)`.
3142
3181
  *
3143
- * Provide either `budgetGb` (gigabytes as a float) or `budgetBytes`
3144
- * (exact byte count). If both are given, `budgetGb` takes precedence.
3182
+ * When **all** fields are omitted, both `Pool::Cpu` and
3183
+ * `Pool::Gpu(0)` default to `u64::MAX` the unlimited-budget
3184
+ * sentinel used by integration tests that don't want to think
3185
+ * about capacity.
3145
3186
  */
3146
- constructor(config: ModelManagerConfig)
3187
+ constructor(config?: ModelManagerConfig | undefined | null)
3147
3188
  /**
3148
3189
  * Register a `CompletionModel`-backed local model with the manager.
3149
3190
  *
3150
- * The model starts in the unloaded state. An optional
3151
- * `vramEstimateBytes` overrides the model's self-reported estimate.
3191
+ * The model starts in the unloaded state. An optional
3192
+ * `memoryEstimateBytes` overrides the model's self-reported
3193
+ * estimate.
3152
3194
  *
3153
- * Only local in-process providers (mistral.rs, llama.cpp, candle) can be
3154
- * registered -- remote HTTP providers will throw. To register an
3155
- * arbitrary JS-managed resource (embedding model, tokenizer, custom
3156
- * runtime, …), use [`Self::register_local_model`] instead.
3195
+ * Only local in-process providers (mistral.rs, llama.cpp, candle)
3196
+ * can be registered remote HTTP providers will throw. To
3197
+ * register an arbitrary JS-managed resource (embedding model,
3198
+ * tokenizer, custom runtime, …), use
3199
+ * [`Self::register_local_model`] instead.
3157
3200
  */
3158
- register(id: string, model: JsCompletionModel, vramEstimateBytes?: bigint | undefined | null): Promise<void>
3201
+ register(id: string, model: JsCompletionModel, memoryEstimateBytes?: bigint | undefined | null): Promise<void>
3159
3202
  /**
3160
3203
  * Register an arbitrary JS-managed local model with the manager.
3161
3204
  *
3162
- * Unlike [`Self::register`] -- which expects a [`JsCompletionModel`]
3163
- * backed by an in-process provider -- this entrypoint takes raw
3164
- * lifecycle callbacks. The manager will invoke `load()` when the model
3165
- * is brought into VRAM (potentially after evicting an LRU peer) and
3166
- * `unload()` when it is evicted or explicitly released.
3205
+ * Unlike [`Self::register`] which expects a [`JsCompletionModel`]
3206
+ * backed by an in-process provider this entrypoint takes raw
3207
+ * lifecycle callbacks. The manager will invoke `load()` when the
3208
+ * model is brought into memory (potentially after evicting an LRU
3209
+ * peer) and `unload()` when it is evicted or explicitly released.
3167
3210
  *
3168
- * Both callbacks must return a `Promise<void>` (or be `async`). A
3169
- * rejection from `load()` aborts the load operation; a rejection from
3170
- * `unload()` is propagated as a manager error.
3211
+ * Both callbacks must return a `Promise<void>` (or be `async`).
3212
+ * A rejection from `load()` aborts the load operation; a rejection
3213
+ * from `unload()` is propagated as a manager error.
3171
3214
  *
3172
3215
  * `isLoaded()` is optional: when omitted, the manager's own
3173
3216
  * loaded-flag bookkeeping is the source of truth.
3174
- * `vramEstimateBytes` reports the model's footprint so the manager
3175
- * can enforce the global budget; defaults to `0` when not provided.
3217
+ * `memoryEstimateBytes` reports the model's footprint so the
3218
+ * manager can enforce the per-pool budget; defaults to `0` when
3219
+ * not provided. `device` selects which pool the model targets
3220
+ * (`"cpu"`, `"cuda:0"`, `"metal"`, …); defaults to `"cpu"` when
3221
+ * omitted.
3176
3222
  *
3177
3223
  * ```javascript
3178
3224
  * let loaded = false;
@@ -3182,22 +3228,24 @@ export declare class ModelManager {
3182
3228
  * async () => { /* release *\/ loaded = false; },
3183
3229
  * async () => loaded,
3184
3230
  * 2_000_000_000n,
3231
+ * "cuda:0",
3185
3232
  * );
3186
3233
  * ```
3187
3234
  *
3188
- * `isLoaded` is `null`-able (pass `null` or `undefined` to omit) and
3189
- * `vramEstimateBytes` may also be omitted.
3235
+ * `isLoaded`, `memoryEstimateBytes`, and `device` are all
3236
+ * nullable / optional (pass `null` or `undefined` to omit).
3190
3237
  */
3191
- registerLocalModel(id: string, load: LifecycleTsfn, unload: LifecycleTsfn, isLoaded?: IsLoadedTsfn | undefined | null, vramEstimateBytes?: bigint | undefined | null): Promise<void>
3238
+ registerLocalModel(id: string, load: LifecycleTsfn, unload: LifecycleTsfn, isLoaded?: IsLoadedTsfn | undefined | null, memoryEstimateBytes?: bigint | undefined | null, device?: string | undefined | null): Promise<void>
3192
3239
  /**
3193
- * Load a model, evicting LRU models if the budget would be exceeded.
3240
+ * Load a model, evicting LRU peers in the same pool if the budget
3241
+ * would be exceeded.
3194
3242
  *
3195
- * Throws if the model is not registered or its VRAM estimate exceeds the
3196
- * total budget.
3243
+ * Throws if the model is not registered or its memory estimate
3244
+ * exceeds the pool's total budget.
3197
3245
  */
3198
3246
  load(id: string): Promise<void>
3199
3247
  /**
3200
- * Unload a model, freeing its VRAM budget.
3248
+ * Unload a model, freeing its slice of the pool budget.
3201
3249
  *
3202
3250
  * Idempotent: unloading an already-unloaded model is a no-op.
3203
3251
  */
@@ -3207,14 +3255,28 @@ export declare class ModelManager {
3207
3255
  /**
3208
3256
  * Ensure a model is loaded.
3209
3257
  *
3210
- * If already loaded, updates the LRU timestamp. If not loaded, loads it
3211
- * (potentially evicting other models).
3258
+ * If already loaded, updates the LRU timestamp. If not loaded,
3259
+ * loads it (potentially evicting other models in the same pool).
3212
3260
  */
3213
3261
  ensureLoaded(id: string): Promise<void>
3214
- /** Total VRAM currently used by loaded models (in bytes). */
3215
- usedBytes(): Promise<bigint>
3216
- /** Available VRAM within the budget (in bytes). */
3217
- availableBytes(): Promise<bigint>
3262
+ /**
3263
+ * Total memory currently used by loaded models in the given pool,
3264
+ * in bytes. Defaults to `"cpu"` when no pool label is provided.
3265
+ */
3266
+ usedBytes(pool?: string | undefined | null): Promise<bigint>
3267
+ /**
3268
+ * Available memory within the given pool's budget, in bytes.
3269
+ * Defaults to `"cpu"` when no pool label is provided.
3270
+ */
3271
+ availableBytes(pool?: string | undefined | null): Promise<bigint>
3272
+ /**
3273
+ * Snapshot of the configured per-pool budgets.
3274
+ *
3275
+ * Returns one entry per pool the manager knows about; each entry
3276
+ * carries the pool label (`"cpu"` or `"gpu:N"`) and its budget in
3277
+ * bytes.
3278
+ */
3279
+ pools(): Array<JsPoolBudget>
3218
3280
  /** Status of all registered models. */
3219
3281
  status(): Promise<Array<JsModelStatus>>
3220
3282
  }
@@ -3265,8 +3327,11 @@ export declare class MusicProvider {
3265
3327
  get providerId(): string | null
3266
3328
  /** The base URL, if set. */
3267
3329
  get baseUrl(): string | null
3268
- /** Estimated VRAM footprint in bytes, if set. */
3269
- get vramEstimateBytes(): number | null
3330
+ /**
3331
+ * Estimated memory footprint in bytes (host RAM if the
3332
+ * provider targets the CPU, GPU VRAM otherwise), if set.
3333
+ */
3334
+ get memoryEstimateBytes(): number | null
3270
3335
  /** r" Generate music from a prompt. */
3271
3336
  generateMusic(request: any): Promise<any>
3272
3337
  /** r" Generate a sound effect from a prompt. */
@@ -4553,8 +4618,11 @@ export declare class ThreeDProvider {
4553
4618
  get providerId(): string | null
4554
4619
  /** The base URL, if set. */
4555
4620
  get baseUrl(): string | null
4556
- /** Estimated VRAM footprint in bytes, if set. */
4557
- get vramEstimateBytes(): number | null
4621
+ /**
4622
+ * Estimated memory footprint in bytes (host RAM if the
4623
+ * provider targets the CPU, GPU VRAM otherwise), if set.
4624
+ */
4625
+ get memoryEstimateBytes(): number | null
4558
4626
  /** r" Generate a 3D model from a prompt or image. */
4559
4627
  generate3d(request: any): Promise<any>
4560
4628
  }
@@ -4898,8 +4966,11 @@ export declare class TTSProvider {
4898
4966
  get providerId(): string | null
4899
4967
  /** The base URL, if set. */
4900
4968
  get baseUrl(): string | null
4901
- /** Estimated VRAM footprint in bytes, if set. */
4902
- get vramEstimateBytes(): number | null
4969
+ /**
4970
+ * Estimated memory footprint in bytes (host RAM if the
4971
+ * provider targets the CPU, GPU VRAM otherwise), if set.
4972
+ */
4973
+ get memoryEstimateBytes(): number | null
4903
4974
  /** r" Synthesize speech from text. */
4904
4975
  textToSpeech(request: any): Promise<any>
4905
4976
  }
@@ -5115,8 +5186,11 @@ export declare class VideoProvider {
5115
5186
  get providerId(): string | null
5116
5187
  /** The base URL, if set. */
5117
5188
  get baseUrl(): string | null
5118
- /** Estimated VRAM footprint in bytes, if set. */
5119
- get vramEstimateBytes(): number | null
5189
+ /**
5190
+ * Estimated memory footprint in bytes (host RAM if the
5191
+ * provider targets the CPU, GPU VRAM otherwise), if set.
5192
+ */
5193
+ get memoryEstimateBytes(): number | null
5120
5194
  /** r" Generate a video from a text prompt. */
5121
5195
  textToVideo(request: any): Promise<any>
5122
5196
  /** r" Generate a video from an image (image-to-video). */
@@ -5156,8 +5230,11 @@ export declare class VoiceProvider {
5156
5230
  get providerId(): string | null
5157
5231
  /** The base URL, if set. */
5158
5232
  get baseUrl(): string | null
5159
- /** Estimated VRAM footprint in bytes, if set. */
5160
- get vramEstimateBytes(): number | null
5233
+ /**
5234
+ * Estimated memory footprint in bytes (host RAM if the
5235
+ * provider targets the CPU, GPU VRAM otherwise), if set.
5236
+ */
5237
+ get memoryEstimateBytes(): number | null
5161
5238
  /** r" Clone a voice from audio samples. */
5162
5239
  cloneVoice(request: any): Promise<any>
5163
5240
  /** r" List all available voices. */
@@ -5823,8 +5900,11 @@ export interface CapabilityProviderConfig {
5823
5900
  providerId: string
5824
5901
  /** Optional base URL for HTTP-based providers. */
5825
5902
  baseUrl?: string
5826
- /** Optional estimated VRAM footprint in bytes when loaded. */
5827
- vramEstimateBytes?: number
5903
+ /**
5904
+ * Optional estimated memory footprint in bytes when loaded
5905
+ * (host RAM if the provider targets the CPU, GPU VRAM otherwise).
5906
+ */
5907
+ memoryEstimateBytes?: number
5828
5908
  }
5829
5909
 
5830
5910
  /** Options for creating a `ChatMessage`. */
@@ -5929,8 +6009,11 @@ export interface CompletionModelConfig {
5929
6009
  contextLength?: number
5930
6010
  /** Base URL for HTTP-based providers. */
5931
6011
  baseUrl?: string
5932
- /** Estimated VRAM footprint in bytes when loaded. */
5933
- vramEstimateBytes?: number
6012
+ /**
6013
+ * Estimated memory footprint in bytes when loaded (host RAM if
6014
+ * the provider targets the CPU, GPU VRAM otherwise).
6015
+ */
6016
+ memoryEstimateBytes?: number
5934
6017
  /** Maximum output tokens the model supports. */
5935
6018
  maxOutputTokens?: number
5936
6019
  }
@@ -7237,10 +7320,15 @@ export interface JsModelPricing {
7237
7320
  export interface JsModelStatus {
7238
7321
  /** Model identifier. */
7239
7322
  id: string
7240
- /** Whether the model is currently loaded into VRAM. */
7323
+ /** Whether the model is currently loaded into its pool. */
7241
7324
  loaded: boolean
7242
- /** Estimated VRAM footprint in bytes. */
7243
- vramEstimate: bigint
7325
+ /**
7326
+ * Estimated memory footprint in bytes (host RAM if `pool` is
7327
+ * `"cpu"`, GPU VRAM otherwise).
7328
+ */
7329
+ memoryEstimateBytes: bigint
7330
+ /** Pool label this model targets (`"cpu"` or `"gpu:N"`). */
7331
+ pool: string
7244
7332
  }
7245
7333
 
7246
7334
  export interface JsMusicRequest {
@@ -7326,6 +7414,14 @@ export interface JsPiperOptions {
7326
7414
  cacheDir?: string
7327
7415
  }
7328
7416
 
7417
+ /** Reported per-pool budget pair returned by [`JsModelManager::pools`]. */
7418
+ export interface JsPoolBudget {
7419
+ /** Pool label (`"cpu"` or `"gpu:N"`). */
7420
+ pool: string
7421
+ /** Configured budget for the pool in bytes. */
7422
+ budgetBytes: bigint
7423
+ }
7424
+
7329
7425
  /** A single `(provider, envVar)` pair returned by [`provider_env_vars`]. */
7330
7426
  export interface JsProviderEnvVar {
7331
7427
  /** Canonical provider name (e.g. `"openai"`, `"anthropic"`). */
@@ -8041,13 +8137,22 @@ export interface ModelInfo {
8041
8137
  /**
8042
8138
  * Configuration for creating a [`JsModelManager`].
8043
8139
  *
8044
- * Exactly one of `budgetGb` or `budgetBytes` must be provided.
8140
+ * Pass either the convenience pair (`cpuRamGb` / `gpuVramGb`) for the
8141
+ * common single-GPU desktop layout, or a fully explicit `poolBudgets`
8142
+ * map for multi-GPU or custom topologies. Omit everything to receive
8143
+ * the unlimited-budget defaults (useful for tests).
8045
8144
  */
8046
8145
  export interface ModelManagerConfig {
8047
- /** VRAM budget in gigabytes (e.g. `8.0` for 8 GiB). */
8048
- budgetGb?: number
8049
- /** VRAM budget in bytes (pass as JS `BigInt` to support values >4 GiB). */
8050
- budgetBytes?: bigint
8146
+ /** Host RAM budget in gigabytes for `Pool::Cpu`. */
8147
+ cpuRamGb?: number
8148
+ /** GPU VRAM budget in gigabytes for `Pool::Gpu(0)`. */
8149
+ gpuVramGb?: number
8150
+ /**
8151
+ * Explicit per-pool budget map. Keys: `"cpu"`, `"gpu"`, `"gpu:N"`.
8152
+ * Values: bytes as `BigInt`. When provided, overrides
8153
+ * `cpuRamGb` / `gpuVramGb`.
8154
+ */
8155
+ poolBudgets?: Record<string, bigint>
8051
8156
  }
8052
8157
 
8053
8158
  /** Build an empty [`JsRetryStack`] with every scope set to `null`. */
@@ -8241,8 +8346,11 @@ export interface ProviderConfig {
8241
8346
  contextLength?: number
8242
8347
  /** Maximum output tokens the model supports. */
8243
8348
  maxOutputTokens?: number
8244
- /** Estimated VRAM footprint in bytes when loaded. */
8245
- vramEstimateBytes?: number
8349
+ /**
8350
+ * Estimated memory footprint in bytes when loaded (host RAM if on CPU,
8351
+ * GPU VRAM otherwise).
8352
+ */
8353
+ memoryEstimateBytes?: number
8246
8354
  /** Pricing information for automatic cost tracking. */
8247
8355
  pricing?: JsModelPricing
8248
8356
  /** Capability flags. */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "blazen",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
4
  "description": "Blazen - Event-driven AI workflow framework for Node.js/TypeScript",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -88,13 +88,13 @@
88
88
  "verbose": true
89
89
  },
90
90
  "optionalDependencies": {
91
- "@blazen-dev/blazen-linux-x64-gnu": "0.5.1",
92
- "@blazen-dev/blazen-linux-x64-musl": "0.5.1",
93
- "@blazen-dev/blazen-linux-arm64-gnu": "0.5.1",
94
- "@blazen-dev/blazen-linux-arm64-musl": "0.5.1",
95
- "@blazen-dev/blazen-darwin-arm64": "0.5.1",
96
- "@blazen-dev/blazen-win32-x64-msvc": "0.5.1",
97
- "@blazen-dev/blazen-wasm32-wasi": "0.5.1"
91
+ "@blazen-dev/blazen-linux-x64-gnu": "0.5.2",
92
+ "@blazen-dev/blazen-linux-x64-musl": "0.5.2",
93
+ "@blazen-dev/blazen-linux-arm64-gnu": "0.5.2",
94
+ "@blazen-dev/blazen-linux-arm64-musl": "0.5.2",
95
+ "@blazen-dev/blazen-darwin-arm64": "0.5.2",
96
+ "@blazen-dev/blazen-win32-x64-msvc": "0.5.2",
97
+ "@blazen-dev/blazen-wasm32-wasi": "0.5.2"
98
98
  },
99
99
  "scripts": {
100
100
  "build": "napi build --release --platform --features local-all,langfuse --js index.js && node scripts/post-build.mjs",