npm - react-native-nitro-mlx - Versions diffs - 0.1.1 → 0.2.1 - Mend

react-native-nitro-mlx 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/ios/Sources/HybridLLM.swift +115 -5
package/lib/module/index.js +1 -1
package/lib/module/index.js.map +1 -1
package/lib/module/llm.js +23 -3
package/lib/module/llm.js.map +1 -1
package/lib/module/models.js +227 -0
package/lib/module/models.js.map +1 -1
package/lib/typescript/src/index.d.ts +3 -3
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/llm.d.ts +21 -3
package/lib/typescript/src/llm.d.ts.map +1 -1
package/lib/typescript/src/models.d.ts +27 -0
package/lib/typescript/src/models.d.ts.map +1 -1
package/lib/typescript/src/specs/LLM.nitro.d.ts +29 -2
package/lib/typescript/src/specs/LLM.nitro.d.ts.map +1 -1
package/nitrogen/generated/ios/MLXReactNative-Swift-Cxx-Bridge.hpp +87 -0
package/nitrogen/generated/ios/MLXReactNative-Swift-Cxx-Umbrella.hpp +7 -0
package/nitrogen/generated/ios/c++/HybridLLMSpecSwift.hpp +30 -2
package/nitrogen/generated/ios/swift/HybridLLMSpec.swift +4 -1
package/nitrogen/generated/ios/swift/HybridLLMSpec_cxx.swift +42 -7
package/nitrogen/generated/ios/swift/LLMLoadOptions.swift +138 -0
package/nitrogen/generated/ios/swift/LLMMessage.swift +47 -0
package/nitrogen/generated/shared/c++/HybridLLMSpec.cpp +3 -0
package/nitrogen/generated/shared/c++/HybridLLMSpec.hpp +12 -1
package/nitrogen/generated/shared/c++/LLMLoadOptions.hpp +87 -0
package/nitrogen/generated/shared/c++/LLMMessage.hpp +79 -0
package/package.json +1 -9
package/src/index.ts +10 -3
package/src/llm.ts +32 -4
package/src/models.ts +267 -0
package/src/specs/LLM.nitro.ts +34 -2

package/nitrogen/generated/shared/c++/LLMLoadOptions.hpp ADDED Viewed

@@ -0,0 +1,87 @@
+///
+/// LLMLoadOptions.hpp
+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
+/// https://github.com/mrousavy/nitro
+/// Copyright © 2025 Marc Rousavy @ Margelo
+///
+#pragma once
+#if __has_include(<NitroModules/JSIConverter.hpp>)
+#include <NitroModules/JSIConverter.hpp>
+#else
+#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
+#endif
+#if __has_include(<NitroModules/NitroDefines.hpp>)
+#include <NitroModules/NitroDefines.hpp>
+#else
+#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
+#endif
+#if __has_include(<NitroModules/JSIHelpers.hpp>)
+#include <NitroModules/JSIHelpers.hpp>
+#else
+#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
+#endif
+// Forward declaration of `LLMMessage` to properly resolve imports.
+namespace margelo::nitro::mlxreactnative { struct LLMMessage; }
+#include <functional>
+#include <optional>
+#include "LLMMessage.hpp"
+#include <vector>
+namespace margelo::nitro::mlxreactnative {
+  /**
+   * A struct which can be represented as a JavaScript object (LLMLoadOptions).
+   */
+  struct LLMLoadOptions {
+  public:
+    std::optional<std::function<void(double /* progress */)>> onProgress     SWIFT_PRIVATE;
+    std::optional<std::vector<LLMMessage>> additionalContext     SWIFT_PRIVATE;
+    std::optional<bool> manageHistory     SWIFT_PRIVATE;
+  public:
+    LLMLoadOptions() = default;
+    explicit LLMLoadOptions(std::optional<std::function<void(double /* progress */)>> onProgress, std::optional<std::vector<LLMMessage>> additionalContext, std::optional<bool> manageHistory): onProgress(onProgress), additionalContext(additionalContext), manageHistory(manageHistory) {}
+  };
+} // namespace margelo::nitro::mlxreactnative
+namespace margelo::nitro {
+  // C++ LLMLoadOptions <> JS LLMLoadOptions (object)
+  template <>
+  struct JSIConverter<margelo::nitro::mlxreactnative::LLMLoadOptions> final {
+    static inline margelo::nitro::mlxreactnative::LLMLoadOptions fromJSI(jsi::Runtime& runtime, const jsi::Value& arg) {
+      jsi::Object obj = arg.asObject(runtime);
+      return margelo::nitro::mlxreactnative::LLMLoadOptions(
+        JSIConverter<std::optional<std::function<void(double)>>>::fromJSI(runtime, obj.getProperty(runtime, "onProgress")),
+        JSIConverter<std::optional<std::vector<margelo::nitro::mlxreactnative::LLMMessage>>>::fromJSI(runtime, obj.getProperty(runtime, "additionalContext")),
+        JSIConverter<std::optional<bool>>::fromJSI(runtime, obj.getProperty(runtime, "manageHistory"))
+      );
+    }
+    static inline jsi::Value toJSI(jsi::Runtime& runtime, const margelo::nitro::mlxreactnative::LLMLoadOptions& arg) {
+      jsi::Object obj(runtime);
+      obj.setProperty(runtime, "onProgress", JSIConverter<std::optional<std::function<void(double)>>>::toJSI(runtime, arg.onProgress));
+      obj.setProperty(runtime, "additionalContext", JSIConverter<std::optional<std::vector<margelo::nitro::mlxreactnative::LLMMessage>>>::toJSI(runtime, arg.additionalContext));
+      obj.setProperty(runtime, "manageHistory", JSIConverter<std::optional<bool>>::toJSI(runtime, arg.manageHistory));
+      return obj;
+    }
+    static inline bool canConvert(jsi::Runtime& runtime, const jsi::Value& value) {
+      if (!value.isObject()) {
+        return false;
+      }
+      jsi::Object obj = value.getObject(runtime);
+      if (!nitro::isPlainObject(runtime, obj)) {
+        return false;
+      }
+      if (!JSIConverter<std::optional<std::function<void(double)>>>::canConvert(runtime, obj.getProperty(runtime, "onProgress"))) return false;
+      if (!JSIConverter<std::optional<std::vector<margelo::nitro::mlxreactnative::LLMMessage>>>::canConvert(runtime, obj.getProperty(runtime, "additionalContext"))) return false;
+      if (!JSIConverter<std::optional<bool>>::canConvert(runtime, obj.getProperty(runtime, "manageHistory"))) return false;
+      return true;
+    }
+  };
+} // namespace margelo::nitro

package/nitrogen/generated/shared/c++/LLMMessage.hpp ADDED Viewed

@@ -0,0 +1,79 @@
+///
+/// LLMMessage.hpp
+/// This file was generated by nitrogen. DO NOT MODIFY THIS FILE.
+/// https://github.com/mrousavy/nitro
+/// Copyright © 2025 Marc Rousavy @ Margelo
+///
+#pragma once
+#if __has_include(<NitroModules/JSIConverter.hpp>)
+#include <NitroModules/JSIConverter.hpp>
+#else
+#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
+#endif
+#if __has_include(<NitroModules/NitroDefines.hpp>)
+#include <NitroModules/NitroDefines.hpp>
+#else
+#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
+#endif
+#if __has_include(<NitroModules/JSIHelpers.hpp>)
+#include <NitroModules/JSIHelpers.hpp>
+#else
+#error NitroModules cannot be found! Are you sure you installed NitroModules properly?
+#endif
+#include <string>
+namespace margelo::nitro::mlxreactnative {
+  /**
+   * A struct which can be represented as a JavaScript object (LLMMessage).
+   */
+  struct LLMMessage {
+  public:
+    std::string role     SWIFT_PRIVATE;
+    std::string content     SWIFT_PRIVATE;
+  public:
+    LLMMessage() = default;
+    explicit LLMMessage(std::string role, std::string content): role(role), content(content) {}
+  };
+} // namespace margelo::nitro::mlxreactnative
+namespace margelo::nitro {
+  // C++ LLMMessage <> JS LLMMessage (object)
+  template <>
+  struct JSIConverter<margelo::nitro::mlxreactnative::LLMMessage> final {
+    static inline margelo::nitro::mlxreactnative::LLMMessage fromJSI(jsi::Runtime& runtime, const jsi::Value& arg) {
+      jsi::Object obj = arg.asObject(runtime);
+      return margelo::nitro::mlxreactnative::LLMMessage(
+        JSIConverter<std::string>::fromJSI(runtime, obj.getProperty(runtime, "role")),
+        JSIConverter<std::string>::fromJSI(runtime, obj.getProperty(runtime, "content"))
+      );
+    }
+    static inline jsi::Value toJSI(jsi::Runtime& runtime, const margelo::nitro::mlxreactnative::LLMMessage& arg) {
+      jsi::Object obj(runtime);
+      obj.setProperty(runtime, "role", JSIConverter<std::string>::toJSI(runtime, arg.role));
+      obj.setProperty(runtime, "content", JSIConverter<std::string>::toJSI(runtime, arg.content));
+      return obj;
+    }
+    static inline bool canConvert(jsi::Runtime& runtime, const jsi::Value& value) {
+      if (!value.isObject()) {
+        return false;
+      }
+      jsi::Object obj = value.getObject(runtime);
+      if (!nitro::isPlainObject(runtime, obj)) {
+        return false;
+      }
+      if (!JSIConverter<std::string>::canConvert(runtime, obj.getProperty(runtime, "role"))) return false;
+      if (!JSIConverter<std::string>::canConvert(runtime, obj.getProperty(runtime, "content"))) return false;
+      return true;
+    }
+  };
+} // namespace margelo::nitro

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "react-native-nitro-mlx",
-  "version": "0.1.1",
+  "version": "0.2.1",
   "description": "Nitro module package",
   "main": "./lib/module/index.js",
   "module": "./lib/module/index.js",
@@ -89,14 +89,6 @@
       "requireCleanWorkingDir": false
     },
     "plugins": {
-      "@release-it/bumper": {
-        "out": [
-          {
-            "file": "package.json",
-            "path": "version"
-          }
-        ]
-      },
       "@release-it/conventional-changelog": {
         "preset": {
           "name": "conventionalcommits",

package/src/index.ts CHANGED Viewed

@@ -1,6 +1,13 @@
-export { LLM } from './llm'
+export { LLM, type Message } from './llm'
 export { ModelManager } from './modelManager'
-export { MLXModel } from './models'
+export {
+  MLXModel,
+  MLXModels,
+  ModelFamily,
+  type ModelInfo,
+  ModelProvider,
+  type ModelQuantization,
+} from './models'
-export type { GenerationStats, LLM as LLMSpec } from './specs/LLM.nitro'
+export type { GenerationStats, LLM as LLMSpec, LLMLoadOptions } from './specs/LLM.nitro'
 export type { ModelManager as ModelManagerSpec } from './specs/ModelManager.nitro'

package/src/llm.ts CHANGED Viewed

@@ -1,8 +1,13 @@
 import { NitroModules } from 'react-native-nitro-modules'
-import type { GenerationStats, LLM as LLMSpec } from './specs/LLM.nitro'
+import type { GenerationStats, LLMLoadOptions, LLM as LLMSpec } from './specs/LLM.nitro'
 let instance: LLMSpec | null = null
+export type Message = {
+  role: 'user' | 'assistant' | 'system'
+  content: string
+}
 function getInstance(): LLMSpec {
   if (!instance) {
     instance = NitroModules.createHybridObject<LLMSpec>('LLM')
@@ -36,10 +41,10 @@ export const LLM = {
   /**
    * Load a model into memory. Downloads the model from HuggingFace if not already cached.
    * @param modelId - HuggingFace model ID (e.g., 'mlx-community/Qwen3-0.6B-4bit')
-   * @param onProgress - Callback invoked with loading progress (0-1)
+   * @param options - Callback invoked with loading progress (0-1)
    */
-  load(modelId: string, onProgress: (progress: number) => void): Promise<void> {
-    return getInstance().load(modelId, onProgress)
+  load(modelId: string, options: LLMLoadOptions): Promise<void> {
+    return getInstance().load(modelId, options)
   },
   /**
@@ -69,6 +74,14 @@ export const LLM = {
     getInstance().stop()
   },
+  /**
+   * Unload the current model and release memory.
+   * Call this when you're done with the model to free up memory.
+   */
+  unload(): void {
+    getInstance().unload()
+  },
   /**
    * Get statistics from the last generation.
    * @returns Statistics including token count, tokens/sec, TTFT, and total time
@@ -77,6 +90,21 @@ export const LLM = {
     return getInstance().getLastGenerationStats()
   },
+  /**
+   * Get the message history if management is enabled.
+   * @returns Array of messages in the history
+   */
+  getHistory(): Message[] {
+    return getInstance().getHistory() as Message[]
+  },
+  /**
+   * Clear the message history.
+   */
+  clearHistory(): void {
+    getInstance().clearHistory()
+  },
   /** Whether a model is currently loaded and ready for generation */
   get isLoaded(): boolean {
     return getInstance().isLoaded

package/src/models.ts CHANGED Viewed

@@ -1,3 +1,33 @@
+export enum ModelFamily {
+  Llama = 'Llama',
+  Qwen = 'Qwen',
+  Gemma = 'Gemma',
+  Phi = 'Phi',
+  SmolLM = 'SmolLM',
+  OpenELM = 'OpenELM',
+}
+export enum ModelProvider {
+  Meta = 'Meta',
+  Alibaba = 'Alibaba',
+  Google = 'Google',
+  Microsoft = 'Microsoft',
+  HuggingFace = 'HuggingFace',
+  Apple = 'Apple',
+}
+export type ModelQuantization = '4bit' | '8bit'
+export interface ModelInfo {
+  id: MLXModel
+  family: ModelFamily
+  provider: ModelProvider
+  parameters: string
+  quantization: ModelQuantization
+  displayName: string
+  downloadSize: number
+}
 export enum MLXModel {
   // Llama 3.2 (Meta) - 1B and 3B variants
   Llama_3_2_1B_Instruct_4bit = 'mlx-community/Llama-3.2-1B-Instruct-4bit',
@@ -43,3 +73,240 @@ export enum MLXModel {
   OpenELM_3B_4bit = 'mlx-community/OpenELM-3B-4bit',
   OpenELM_3B_8bit = 'mlx-community/OpenELM-3B-8bit',
 }
+export const MLXModels: ModelInfo[] = [
+  {
+    id: MLXModel.Llama_3_2_1B_Instruct_4bit,
+    family: ModelFamily.Llama,
+    provider: ModelProvider.Meta,
+    parameters: '1B',
+    quantization: '4bit',
+    displayName: 'Llama 3.2 1B Instruct (4-bit)',
+    downloadSize: 1407777762,
+  },
+  {
+    id: MLXModel.Llama_3_2_1B_Instruct_8bit,
+    family: ModelFamily.Llama,
+    provider: ModelProvider.Meta,
+    parameters: '1B',
+    quantization: '8bit',
+    displayName: 'Llama 3.2 1B Instruct (8-bit)',
+    downloadSize: 1313157436,
+  },
+  {
+    id: MLXModel.Llama_3_2_3B_Instruct_4bit,
+    family: ModelFamily.Llama,
+    provider: ModelProvider.Meta,
+    parameters: '3B',
+    quantization: '4bit',
+    displayName: 'Llama 3.2 3B Instruct (4-bit)',
+    downloadSize: 2019397474,
+  },
+  {
+    id: MLXModel.Llama_3_2_3B_Instruct_8bit,
+    family: ModelFamily.Llama,
+    provider: ModelProvider.Meta,
+    parameters: '3B',
+    quantization: '8bit',
+    displayName: 'Llama 3.2 3B Instruct (8-bit)',
+    downloadSize: 3413784042,
+  },
+  {
+    id: MLXModel.Qwen2_5_0_5B_Instruct_4bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '0.5B',
+    quantization: '4bit',
+    displayName: 'Qwen 2.5 0.5B Instruct (4-bit)',
+    downloadSize: 278064920,
+  },
+  {
+    id: MLXModel.Qwen2_5_0_5B_Instruct_8bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '0.5B',
+    quantization: '8bit',
+    displayName: 'Qwen 2.5 0.5B Instruct (8-bit)',
+    downloadSize: 525045902,
+  },
+  {
+    id: MLXModel.Qwen2_5_1_5B_Instruct_4bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '1.5B',
+    quantization: '4bit',
+    displayName: 'Qwen 2.5 1.5B Instruct (4-bit)',
+    downloadSize: 868628559,
+  },
+  {
+    id: MLXModel.Qwen2_5_1_5B_Instruct_8bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '1.5B',
+    quantization: '8bit',
+    displayName: 'Qwen 2.5 1.5B Instruct (8-bit)',
+    downloadSize: 1640414038,
+  },
+  {
+    id: MLXModel.Qwen2_5_3B_Instruct_4bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '3B',
+    quantization: '4bit',
+    displayName: 'Qwen 2.5 3B Instruct (4-bit)',
+    downloadSize: 1736293090,
+  },
+  {
+    id: MLXModel.Qwen2_5_3B_Instruct_8bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '3B',
+    quantization: '8bit',
+    displayName: 'Qwen 2.5 3B Instruct (8-bit)',
+    downloadSize: 3279142142,
+  },
+  {
+    id: MLXModel.Qwen3_1_7B_4bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '1.7B',
+    quantization: '4bit',
+    displayName: 'Qwen 3 1.7B (4-bit)',
+    downloadSize: 979502864,
+  },
+  {
+    id: MLXModel.Qwen3_1_7B_8bit,
+    family: ModelFamily.Qwen,
+    provider: ModelProvider.Alibaba,
+    parameters: '1.7B',
+    quantization: '8bit',
+    displayName: 'Qwen 3 1.7B (8-bit)',
+    downloadSize: 1839729195,
+  },
+  {
+    id: MLXModel.Gemma_3_1B_IT_4bit,
+    family: ModelFamily.Gemma,
+    provider: ModelProvider.Google,
+    parameters: '1B',
+    quantization: '4bit',
+    displayName: 'Gemma 3 1B IT (4-bit)',
+    downloadSize: 770650946,
+  },
+  {
+    id: MLXModel.Gemma_3_1B_IT_8bit,
+    family: ModelFamily.Gemma,
+    provider: ModelProvider.Google,
+    parameters: '1B',
+    quantization: '8bit',
+    displayName: 'Gemma 3 1B IT (8-bit)',
+    downloadSize: 1421522471,
+  },
+  {
+    id: MLXModel.Phi_3_5_Mini_Instruct_4bit,
+    family: ModelFamily.Phi,
+    provider: ModelProvider.Microsoft,
+    parameters: '3.8B',
+    quantization: '4bit',
+    displayName: 'Phi 3.5 Mini Instruct (4-bit)',
+    downloadSize: 2150195856,
+  },
+  {
+    id: MLXModel.Phi_3_5_Mini_Instruct_8bit,
+    family: ModelFamily.Phi,
+    provider: ModelProvider.Microsoft,
+    parameters: '3.8B',
+    quantization: '8bit',
+    displayName: 'Phi 3.5 Mini Instruct (8-bit)',
+    downloadSize: 4060636056,
+  },
+  {
+    id: MLXModel.Phi_4_Mini_Instruct_4bit,
+    family: ModelFamily.Phi,
+    provider: ModelProvider.Microsoft,
+    parameters: '3.8B',
+    quantization: '4bit',
+    displayName: 'Phi 4 Mini Instruct (4-bit)',
+    downloadSize: 2173624891,
+  },
+  {
+    id: MLXModel.Phi_4_Mini_Instruct_8bit,
+    family: ModelFamily.Phi,
+    provider: ModelProvider.Microsoft,
+    parameters: '3.8B',
+    quantization: '8bit',
+    displayName: 'Phi 4 Mini Instruct (8-bit)',
+    downloadSize: 4091536167,
+  },
+  {
+    id: MLXModel.SmolLM_1_7B_Instruct_4bit,
+    family: ModelFamily.SmolLM,
+    provider: ModelProvider.HuggingFace,
+    parameters: '1.7B',
+    quantization: '4bit',
+    displayName: 'SmolLM 1.7B Instruct (4-bit)',
+    downloadSize: 962855374,
+  },
+  {
+    id: MLXModel.SmolLM_1_7B_Instruct_8bit,
+    family: ModelFamily.SmolLM,
+    provider: ModelProvider.HuggingFace,
+    parameters: '1.7B',
+    quantization: '8bit',
+    displayName: 'SmolLM 1.7B Instruct (8-bit)',
+    downloadSize: 1818493993,
+  },
+  {
+    id: MLXModel.SmolLM2_1_7B_Instruct_4bit,
+    family: ModelFamily.SmolLM,
+    provider: ModelProvider.HuggingFace,
+    parameters: '1.7B',
+    quantization: '4bit',
+    displayName: 'SmolLM2 1.7B Instruct (4-bit)',
+    downloadSize: 980000000,
+  },
+  {
+    id: MLXModel.SmolLM2_1_7B_Instruct_8bit,
+    family: ModelFamily.SmolLM,
+    provider: ModelProvider.HuggingFace,
+    parameters: '1.7B',
+    quantization: '8bit',
+    displayName: 'SmolLM2 1.7B Instruct (8-bit)',
+    downloadSize: 1850000000,
+  },
+  {
+    id: MLXModel.OpenELM_1_1B_4bit,
+    family: ModelFamily.OpenELM,
+    provider: ModelProvider.Apple,
+    parameters: '1.1B',
+    quantization: '4bit',
+    displayName: 'OpenELM 1.1B (4-bit)',
+    downloadSize: 608162655,
+  },
+  {
+    id: MLXModel.OpenELM_1_1B_8bit,
+    family: ModelFamily.OpenELM,
+    provider: ModelProvider.Apple,
+    parameters: '1.1B',
+    quantization: '8bit',
+    displayName: 'OpenELM 1.1B (8-bit)',
+    downloadSize: 1148048397,
+  },
+  {
+    id: MLXModel.OpenELM_3B_4bit,
+    family: ModelFamily.OpenELM,
+    provider: ModelProvider.Apple,
+    parameters: '3B',
+    quantization: '4bit',
+    displayName: 'OpenELM 3B (4-bit)',
+    downloadSize: 1650000000,
+  },
+  {
+    id: MLXModel.OpenELM_3B_8bit,
+    family: ModelFamily.OpenELM,
+    provider: ModelProvider.Apple,
+    parameters: '3B',
+    quantization: '8bit',
+    displayName: 'OpenELM 3B (8-bit)',
+    downloadSize: 3100000000,
+  },
+]

package/src/specs/LLM.nitro.ts CHANGED Viewed

@@ -14,6 +14,22 @@ export interface GenerationStats {
   totalTime: number
 }
+export interface LLMMessage {
+  role: string
+  content: string
+}
+/** Options for loading a model.
+ */
+export interface LLMLoadOptions {
+  /** Callback invoked with loading progress (0-1) */
+  onProgress?: (progress: number) => void
+  /** Additional context to provide to the model */
+  additionalContext?: LLMMessage[]
+  /** Whether to automatically manage message history */
+  manageHistory?: boolean
+}
 /**
  * Low-level LLM interface for text generation using MLX.
  * @internal Use the `LLM` export from `react-native-nitro-mlx` instead.
@@ -22,9 +38,9 @@ export interface LLM extends HybridObject<{ ios: 'swift' }> {
   /**
    * Load a model into memory. Downloads from HuggingFace if not already cached.
    * @param modelId - HuggingFace model ID (e.g., 'mlx-community/Qwen3-0.6B-4bit')
-   * @param onProgress - Callback invoked with loading progress (0-1)
+   * @param options - Callback invoked with loading progress (0-1)
    */
-  load(modelId: string, onProgress: (progress: number) => void): Promise<void>
+  load(modelId: string, options?: LLMLoadOptions): Promise<void>
   /**
    * Generate a complete response for a prompt.
@@ -46,12 +62,28 @@ export interface LLM extends HybridObject<{ ios: 'swift' }> {
    */
   stop(): void
+  /**
+   * Unload the current model and release memory.
+   */
+  unload(): void
   /**
    * Get statistics from the last generation.
    * @returns Statistics including token count, speed, and timing
    */
   getLastGenerationStats(): GenerationStats
+  /**
+   * Get the message history if management is enabled.
+   * @returns Array of messages in the history
+   */
+  getHistory(): LLMMessage[]
+  /**
+   * Clear the message history.
+   */
+  clearHistory(): void
   /** Whether a model is currently loaded */
   readonly isLoaded: boolean
   /** Whether text is currently being generated */