react-native-litert-lm 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +331 -150
  2. package/android/build.gradle +1 -1
  3. package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +140 -37
  4. package/app.plugin.js +33 -0
  5. package/cpp/HybridLiteRTLM.cpp +577 -378
  6. package/cpp/HybridLiteRTLM.hpp +66 -23
  7. package/cpp/IOSDownloadHelper.h +24 -0
  8. package/cpp/cpp-adapter.cpp +10 -2
  9. package/cpp/include/litert_lm_engine.h +502 -0
  10. package/ios/IOSDownloadHelper.mm +129 -0
  11. package/ios/LiteRTLMAutolinking.mm +30 -0
  12. package/lib/hooks.d.ts +33 -3
  13. package/lib/hooks.js +54 -23
  14. package/lib/index.d.ts +4 -1
  15. package/lib/index.js +6 -6
  16. package/lib/memoryTracker.d.ts +128 -0
  17. package/lib/memoryTracker.js +155 -0
  18. package/lib/modelFactory.d.ts +21 -2
  19. package/lib/modelFactory.js +78 -11
  20. package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
  21. package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +28 -18
  22. package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
  23. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +39 -36
  24. package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +20 -22
  25. package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
  26. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +19 -18
  27. package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
  28. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
  29. package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
  30. package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
  31. package/package.json +12 -5
  32. package/react-native-litert-lm.podspec +20 -7
  33. package/scripts/build-ios-engine.sh +283 -0
  34. package/scripts/download-ios-frameworks.sh +72 -0
  35. package/scripts/postinstall.js +116 -0
  36. package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
  37. package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
  38. package/scripts/stubs/llguidance_stubs.c +101 -0
  39. package/src/hooks.ts +107 -41
  40. package/src/index.ts +13 -6
  41. package/src/memoryTracker.ts +268 -0
  42. package/src/modelFactory.ts +107 -11
  43. package/src/specs/LiteRTLM.nitro.ts +21 -0
@@ -0,0 +1,224 @@
1
+ /**
2
+ * cxx_bridge_stubs.cc
3
+ *
4
+ * Provides stub implementations for the Rust CXX bridge runtime.
5
+ *
6
+ * ARCHITECTURE NOTES:
7
+ * The CXX bridge generates two halves:
8
+ * 1. Rust .o files define C++ mangled wrappers (e.g., litert::lm::new_minijinja_template)
9
+ * that call extern "C" shims (e.g., _litert$lm$cxxbridge1$new_minijinja_template)
10
+ * 2. Generated C++ code defines those extern "C" shims, which call the actual Rust FFI entry points
11
+ *
12
+ * Since we only have half #1 (the Rust .o), we provide stub extern "C" shims.
13
+ *
14
+ * CRITICAL: These stubs must NOT forward (via __asm__) to the C++ mangled functions
15
+ * in the Rust .o files. Those functions call RIGHT BACK to these extern "C" stubs,
16
+ * creating infinite recursion → stack overflow → SIGBUS.
17
+ *
18
+ * ALLOCATOR CONSISTENCY: All stubs use calloc/free. Since our stub creates the objects
19
+ * (not real Rust code), our drop/dealloc stubs using free() are perfectly consistent.
20
+ * If the real Rust code created the objects, there would be a mismatch, but it doesn't
21
+ * because these stubs intercept the calls before Rust can run.
22
+ */
23
+
24
+ #include <cstddef>
25
+ #include <cstdlib>
26
+ #include <cstring>
27
+ #include <string>
28
+
29
+ // ============================================================================
30
+ // Part 1: CXX Runtime Types
31
+ // ============================================================================
32
+
33
+ namespace rust {
34
+ namespace cxxbridge1 {
35
+
36
+ class String;
37
+
38
+ class Str {
39
+ public:
40
+ const char* ptr;
41
+ size_t len;
42
+ Str(const char* s);
43
+ Str(const String& s);
44
+ const char* data() const;
45
+ size_t size() const;
46
+ operator std::string() const;
47
+ };
48
+
49
+ class String {
50
+ public:
51
+ struct Repr { char* ptr; size_t len; size_t cap; };
52
+ Repr repr_;
53
+ String(String&& o) noexcept;
54
+ String(const std::string& s);
55
+ ~String();
56
+ const char* data() const;
57
+ size_t size() const;
58
+ operator std::string() const;
59
+ };
60
+
61
+ Str::Str(const char* s) : ptr(s), len(s ? strlen(s) : 0) {}
62
+ Str::Str(const String& s) : ptr(s.data()), len(s.size()) {}
63
+ const char* Str::data() const { return ptr; }
64
+ size_t Str::size() const { return len; }
65
+ Str::operator std::string() const { return std::string(ptr, len); }
66
+
67
+ String::String(String&& o) noexcept : repr_(o.repr_) { o.repr_ = {nullptr,0,0}; }
68
+ String::String(const std::string& s) {
69
+ repr_.len = s.size();
70
+ repr_.cap = s.size() + 1;
71
+ repr_.ptr = static_cast<char*>(malloc(repr_.cap));
72
+ if (repr_.ptr) memcpy(repr_.ptr, s.c_str(), repr_.cap);
73
+ }
74
+ String::~String() { if (repr_.ptr) free(repr_.ptr); }
75
+ const char* String::data() const { return repr_.ptr ? repr_.ptr : ""; }
76
+ size_t String::size() const { return repr_.len; }
77
+ String::operator std::string() const { return std::string(data(), size()); }
78
+
79
+ template<typename T> class Vec {
80
+ public:
81
+ T* data_; size_t len_; size_t cap_;
82
+ Vec();
83
+ const T* data() const;
84
+ size_t size() const;
85
+ void drop();
86
+ };
87
+
88
+ template<typename T> Vec<T>::Vec() : data_(nullptr), len_(0), cap_(0) {}
89
+ template<typename T> const T* Vec<T>::data() const { return data_; }
90
+ template<typename T> size_t Vec<T>::size() const { return len_; }
91
+ template<typename T> void Vec<T>::drop() { if (data_) { free(data_); data_ = nullptr; } len_ = cap_ = 0; }
92
+
93
+ template class Vec<String>;
94
+
95
+ void sliceInit(void* s, const void* p, size_t l) {
96
+ auto* a = static_cast<const void**>(s);
97
+ a[0] = p; *reinterpret_cast<size_t*>(&a[1]) = l;
98
+ }
99
+ size_t sliceLen(const void* s) {
100
+ return *reinterpret_cast<const size_t*>(static_cast<const void* const*>(s)+1);
101
+ }
102
+ const void* slicePtr(const void* s) {
103
+ return static_cast<const void* const*>(s)[0];
104
+ }
105
+
106
+ } // namespace cxxbridge1
107
+ } // namespace rust
108
+
109
+
110
+ // ============================================================================
111
+ // Part 2: extern "C" stubs
112
+ //
113
+ // All stubs use calloc/free for allocation. NO __asm__ forwarding to the C++
114
+ // mangled Rust functions, which would create mutual recursion.
115
+ // ============================================================================
116
+ extern "C" {
117
+
118
+ // --- rust_vec for JsonValue ---
119
+ void* cxxbridge1$rust_vec$litert$lm$JsonValue$new() { return nullptr; }
120
+ const void* cxxbridge1$rust_vec$litert$lm$JsonValue$data(const void*) { return nullptr; }
121
+ size_t cxxbridge1$rust_vec$litert$lm$JsonValue$len(const void*) { return 0; }
122
+ size_t cxxbridge1$rust_vec$litert$lm$JsonValue$capacity(const void*) { return 0; }
123
+ void cxxbridge1$rust_vec$litert$lm$JsonValue$set_len(void*, size_t) {}
124
+ void cxxbridge1$rust_vec$litert$lm$JsonValue$truncate(void*, size_t) {}
125
+ void cxxbridge1$rust_vec$litert$lm$JsonValue$reserve_total(void*, size_t) {}
126
+ void cxxbridge1$rust_vec$litert$lm$JsonValue$drop(void*) {}
127
+
128
+ // --- Box<MinijinjaTemplate> ---
129
+ // Allocate zeroed memory; consistent with free() in drop/dealloc.
130
+ void* cxxbridge1$box$litert$lm$MinijinjaTemplate$alloc() {
131
+ return calloc(1, 64);
132
+ }
133
+ void cxxbridge1$box$litert$lm$MinijinjaTemplate$dealloc(void* p) {
134
+ if (p) free(p);
135
+ }
136
+ void cxxbridge1$box$litert$lm$MinijinjaTemplate$drop(void* p) {
137
+ if (p) free(p);
138
+ }
139
+
140
+ // --- Box<JsonValue> ---
141
+ void* cxxbridge1$box$litert$lm$JsonValue$alloc() {
142
+ return calloc(1, 64);
143
+ }
144
+ void cxxbridge1$box$litert$lm$JsonValue$dealloc(void* p) {
145
+ if (p) free(p);
146
+ }
147
+ void cxxbridge1$box$litert$lm$JsonValue$drop(void* p) {
148
+ if (p) free(p);
149
+ }
150
+
151
+ // --- MinijinjaTemplate FFI shims ---
152
+ // These are the Rust-side FFI entry points. The C++ mangled wrappers in the
153
+ // Rust .o call INTO these. We must NOT forward back to those wrappers.
154
+ size_t litert$lm$cxxbridge1$MinijinjaTemplate$operator$sizeof() { return 64; }
155
+ size_t litert$lm$cxxbridge1$MinijinjaTemplate$operator$alignof() { return 8; }
156
+ void litert$lm$cxxbridge1$MinijinjaTemplate$source(const void*, void* out) {
157
+ // Write an empty rust::cxxbridge1::String to out
158
+ if (out) memset(out, 0, 24);
159
+ }
160
+ void litert$lm$cxxbridge1$MinijinjaTemplate$apply(const void*, void* input, void* out) {
161
+ // Return an empty string result
162
+ if (out) memset(out, 0, 24);
163
+ }
164
+ void litert$lm$cxxbridge1$MinijinjaTemplate$clone_template(const void*, void* out) {
165
+ // Write a valid calloc'd pointer so drop() can safely free() it
166
+ if (out) {
167
+ void* cloned = calloc(1, 64);
168
+ memcpy(out, &cloned, sizeof(void*));
169
+ }
170
+ }
171
+ void litert$lm$cxxbridge1$MinijinjaTemplate$get_capabilities(const void*, void* out) {
172
+ if (out) memset(out, 0, 24);
173
+ }
174
+ void litert$lm$cxxbridge1$MinijinjaTemplate$get_error(const void*, void* out) {
175
+ if (out) memset(out, 0, 24);
176
+ }
177
+
178
+ // --- new_minijinja_template ---
179
+ // Creates a Box<MinijinjaTemplate>. Writes a valid calloc'd pointer to `out`
180
+ // so that later Box::drop() can free() it without issues.
181
+ void litert$lm$cxxbridge1$new_minijinja_template(void* input, void* out) {
182
+ if (out) {
183
+ void* tmpl = calloc(1, 64);
184
+ memcpy(out, &tmpl, sizeof(void*));
185
+ }
186
+ }
187
+
188
+ // --- JsonValue FFI shims ---
189
+ size_t litert$lm$cxxbridge1$JsonValue$operator$sizeof() { return 64; }
190
+ size_t litert$lm$cxxbridge1$JsonValue$operator$alignof() { return 8; }
191
+ void litert$lm$cxxbridge1$JsonValue$object_get(const void*, void*, void*) {}
192
+ int litert$lm$cxxbridge1$JsonValue$is_null(const void*) { return 1; }
193
+ int litert$lm$cxxbridge1$JsonValue$is_bool(const void*) { return 0; }
194
+ int litert$lm$cxxbridge1$JsonValue$is_number(const void*) { return 0; }
195
+ int litert$lm$cxxbridge1$JsonValue$is_string(const void*) { return 0; }
196
+ int litert$lm$cxxbridge1$JsonValue$is_array(const void*) { return 0; }
197
+ int litert$lm$cxxbridge1$JsonValue$is_object(const void*) { return 0; }
198
+ int litert$lm$cxxbridge1$JsonValue$get_bool(const void*) { return 0; }
199
+ double litert$lm$cxxbridge1$JsonValue$get_number(const void*) { return 0.0; }
200
+ void litert$lm$cxxbridge1$JsonValue$get_string(const void*, void*) {}
201
+ size_t litert$lm$cxxbridge1$JsonValue$array_len(const void*) { return 0; }
202
+ void litert$lm$cxxbridge1$JsonValue$array_get(const void*, size_t, void*) {}
203
+ int litert$lm$cxxbridge1$JsonValue$object_has_key(const void*, void*) { return 0; }
204
+ void litert$lm$cxxbridge1$JsonValue$object_keys(const void*, void*) {}
205
+
206
+ // --- Parser/template FFI shims ---
207
+ void litert$lm$cxxbridge1$parse_fc_expression(void*, void*) {}
208
+ void litert$lm$cxxbridge1$parse_json_expression(void*, void*) {}
209
+ void litert$lm$cxxbridge1$parse_python_expression(void*, void*) {}
210
+
211
+ // --- Tokenizers FFI stubs ---
212
+ void* byte_level_bpe_tokenizers_new_from_str(void*, void*, void*) { return nullptr; }
213
+ void* tokenizers_decode(void*, void*, size_t, int) { return nullptr; }
214
+ void* tokenizers_encode(void*, void*, size_t) { return nullptr; }
215
+ void* tokenizers_encode_batch(void*, void**, size_t, size_t) { return nullptr; }
216
+ void tokenizers_free(void*) {}
217
+ void tokenizers_free_encode_results(void*, size_t) {}
218
+ void* tokenizers_get_decode_str(void*) { return nullptr; }
219
+ int tokenizers_get_vocab_size(void*, int) { return 0; }
220
+ void* tokenizers_id_to_token(void*, int) { return nullptr; }
221
+ void* tokenizers_new_from_str(void*, size_t) { return nullptr; }
222
+ int tokenizers_token_to_id(void*, void*, size_t) { return 0; }
223
+
224
+ } // extern "C"
@@ -0,0 +1,46 @@
1
+ // Copyright 2026 Google LLC.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+
16
+ #include <cstdio>
17
+
18
+ struct LiteRtLmGemmaModelConstraintProvider {
19
+ int dummy;
20
+ };
21
+
22
+ extern "C" {
23
+
24
+ void* LiteRtLmGemmaModelConstraintProvider_Create(
25
+ const char* serialized_sp_model_proto, size_t serialized_sp_model_proto_len,
26
+ const int** stop_token_ids, const size_t* stop_token_lengths,
27
+ size_t num_stop_lists) {
28
+ fprintf(stderr, "\n[LiteRT-LM WARN] Gemma Constraint Provider is"
29
+ " STUBBED/DISABLED.\n");
30
+ fprintf(stderr, "[LiteRT-LM WARN] Any requests for grammar-constrained"
31
+ " decoding will be ignored or fail.\n\n");
32
+ // Return a dummy pointer so that engine creation doesn't fail!
33
+ return new LiteRtLmGemmaModelConstraintProvider{0};
34
+ }
35
+
36
+ void LiteRtLmGemmaModelConstraintProvider_Destroy(void* provider) {
37
+ if (provider) {
38
+ delete static_cast<LiteRtLmGemmaModelConstraintProvider*>(provider);
39
+ }
40
+ }
41
+
42
+ void* LiteRtLmGemmaModelConstraintProvider_CreateConstraintFromTools(
43
+ void* provider, void* tools, void* options) {
44
+ return nullptr;
45
+ }
46
+ } // extern "C"
@@ -0,0 +1,101 @@
1
+ /**
2
+ * llguidance_stubs.c
3
+ *
4
+ * Stub implementations for the llguidance C API used by LiteRT-LM's
5
+ * constrained decoding subsystem (llg_constraint, llg_constraint_provider).
6
+ * The real implementation is a Rust library (llguidance) which is not
7
+ * available in the iOS Bazel build.
8
+ *
9
+ * These stubs return error/null values so that constrained decoding
10
+ * gracefully fails at runtime. Basic LLM inference is unaffected.
11
+ */
12
+
13
+ #include <stddef.h>
14
+ #include <stdint.h>
15
+
16
+ /* Opaque types */
17
+ typedef struct LlgTokenizer LlgTokenizer;
18
+ typedef struct LlgConstraint LlgConstraint;
19
+ typedef struct LlgConstraintInit LlgConstraintInit;
20
+
21
+ /* llg_new_tokenizer: create a tokenizer handle */
22
+ int llg_new_tokenizer(void* init, void** out_tokenizer) {
23
+ if (out_tokenizer) *out_tokenizer = NULL;
24
+ return -1; /* error */
25
+ }
26
+
27
+ /* llg_free_tokenizer: release tokenizer */
28
+ void llg_free_tokenizer(void* tokenizer) {
29
+ (void)tokenizer;
30
+ }
31
+
32
+ /* llg_constraint_init_set_defaults: initialize constraint config */
33
+ void llg_constraint_init_set_defaults(void* init, void* tokenizer) {
34
+ (void)init;
35
+ (void)tokenizer;
36
+ }
37
+
38
+ /* llg_new_constraint: create grammar constraint */
39
+ void* llg_new_constraint(void* init, void* grammar) {
40
+ (void)init;
41
+ (void)grammar;
42
+ return NULL;
43
+ }
44
+
45
+ /* llg_new_constraint_json: create JSON schema constraint */
46
+ void* llg_new_constraint_json(void* init, const char* json_schema) {
47
+ (void)init;
48
+ (void)json_schema;
49
+ return NULL;
50
+ }
51
+
52
+ /* llg_new_constraint_regex: create regex constraint */
53
+ void* llg_new_constraint_regex(void* init, const char* regex) {
54
+ (void)init;
55
+ (void)regex;
56
+ return NULL;
57
+ }
58
+
59
+ /* llg_new_constraint_lark: create Lark grammar constraint */
60
+ void* llg_new_constraint_lark(void* init, const char* lark_grammar) {
61
+ (void)init;
62
+ (void)lark_grammar;
63
+ return NULL;
64
+ }
65
+
66
+ /* llg_clone_constraint: duplicate a constraint */
67
+ void* llg_clone_constraint(void* constraint) {
68
+ (void)constraint;
69
+ return NULL;
70
+ }
71
+
72
+ /* llg_compute_mask: compute next token mask */
73
+ int llg_compute_mask(void* constraint, void* result) {
74
+ (void)constraint;
75
+ (void)result;
76
+ return -1; /* error */
77
+ }
78
+
79
+ /* llg_commit_token: commit selected token */
80
+ int llg_commit_token(void* constraint, int32_t token) {
81
+ (void)constraint;
82
+ (void)token;
83
+ return -1; /* error */
84
+ }
85
+
86
+ /* llg_is_stopped: check if constraint reached accepting state */
87
+ int llg_is_stopped(void* constraint) {
88
+ (void)constraint;
89
+ return 1; /* stopped (nothing to do) */
90
+ }
91
+
92
+ /* llg_get_error: get last error message */
93
+ const char* llg_get_error(void* constraint) {
94
+ (void)constraint;
95
+ return "llguidance not available on iOS";
96
+ }
97
+
98
+ /* llg_free_constraint: release constraint */
99
+ void llg_free_constraint(void* constraint) {
100
+ (void)constraint;
101
+ }
package/src/hooks.ts CHANGED
@@ -1,51 +1,122 @@
1
- import { useState, useEffect, useRef, useCallback } from "react";
2
- import { LiteRTLM, LLMConfig } from "./index";
1
+ import { useState, useEffect, useRef, useCallback, useMemo } from "react";
2
+ import { LLMConfig } from "./index";
3
3
  import { createLLM } from "./modelFactory";
4
+ import type { LiteRTLMInstance } from "./modelFactory";
5
+ import type { MemoryTracker, MemoryTrackerSummary } from "./memoryTracker";
4
6
 
5
7
  export interface UseModelConfig extends LLMConfig {
6
8
  autoLoad?: boolean;
9
+ /**
10
+ * Enable memory tracking using native ArrayBuffers (v0.35+).
11
+ * When enabled, memory usage is tracked after each inference call
12
+ * using `NitroModules.createNativeArrayBuffer()` for zero-copy storage.
13
+ * @default false
14
+ */
15
+ enableMemoryTracking?: boolean;
16
+ /**
17
+ * Maximum number of memory snapshots to store.
18
+ * Each snapshot uses 32 bytes of native memory.
19
+ * @default 256
20
+ */
21
+ maxMemorySnapshots?: number;
7
22
  }
8
23
 
9
24
  export interface UseModelResult {
10
- model: LiteRTLM | null;
25
+ model: LiteRTLMInstance | null;
11
26
  isReady: boolean;
12
27
  isGenerating: boolean;
13
28
  downloadProgress: number;
14
29
  error: string | null;
15
30
  generate: (prompt: string) => Promise<string>;
16
31
  reset: () => void;
17
- deleteModel: (fileName: string) => Promise<void>;
32
+ /**
33
+ * Delete the model file. If no fileName is provided, derives it from
34
+ * the URL/path passed to useModel.
35
+ */
36
+ deleteModel: (fileName?: string) => Promise<void>;
18
37
  load: () => Promise<void>;
38
+ /**
39
+ * Memory tracker instance (available when enableMemoryTracking is true).
40
+ * Uses native ArrayBuffers allocated via `NitroModules.createNativeArrayBuffer()`
41
+ * for efficient, zero-copy memory usage tracking.
42
+ */
43
+ memoryTracker: MemoryTracker | null;
44
+ /**
45
+ * Current memory tracking summary (null if tracking is disabled).
46
+ * Updates automatically after each inference call.
47
+ */
48
+ memorySummary: MemoryTrackerSummary | null;
49
+ }
50
+
51
+ /**
52
+ * Extract a filename from a URL or file path.
53
+ */
54
+ function extractFileName(pathOrUrl: string): string {
55
+ return pathOrUrl.split("/").pop() || "model.bin";
19
56
  }
20
57
 
21
58
  export function useModel(
22
59
  pathOrUrl: string,
23
60
  config?: UseModelConfig,
24
61
  ): UseModelResult {
25
- const modelRef = useRef<LiteRTLM | null>(null);
62
+ const modelRef = useRef<LiteRTLMInstance | null>(null);
26
63
  const [isReady, setIsReady] = useState(false);
27
64
  const [isGenerating, setIsGenerating] = useState(false);
28
65
  const [downloadProgress, setDownloadProgress] = useState(0);
29
66
  const [error, setError] = useState<string | null>(null);
67
+ const [memorySummary, setMemorySummary] = useState<MemoryTrackerSummary | null>(null);
30
68
 
31
- // Extract autoLoad (default true)
69
+ // Destructure config into primitive values for stable dependency arrays.
70
+ // This prevents infinite re-render loops when consumers pass inline config
71
+ // objects (e.g. useModel(url, { backend: 'cpu' })) without useMemo.
32
72
  const autoLoad = config?.autoLoad ?? true;
73
+ const enableMemoryTracking = config?.enableMemoryTracking ?? false;
74
+ const maxMemorySnapshots = config?.maxMemorySnapshots ?? 256;
75
+ const backend = config?.backend;
76
+ const systemPrompt = config?.systemPrompt;
77
+ const maxTokens = config?.maxTokens;
78
+ const temperature = config?.temperature;
79
+ const topK = config?.topK;
80
+ const topP = config?.topP;
81
+
82
+ // Build a stable config object from the destructured primitives
83
+ const nativeConfig = useMemo<LLMConfig>(
84
+ () => ({
85
+ ...(backend !== undefined && { backend }),
86
+ ...(systemPrompt !== undefined && { systemPrompt }),
87
+ ...(maxTokens !== undefined && { maxTokens }),
88
+ ...(temperature !== undefined && { temperature }),
89
+ ...(topK !== undefined && { topK }),
90
+ ...(topP !== undefined && { topP }),
91
+ }),
92
+ [backend, systemPrompt, maxTokens, temperature, topK, topP],
93
+ );
94
+
95
+ /**
96
+ * Refresh memory summary from the tracker's native buffer.
97
+ */
98
+ const refreshMemorySummary = useCallback(() => {
99
+ if (modelRef.current?.memoryTracker) {
100
+ setMemorySummary(modelRef.current.memoryTracker.getSummary());
101
+ }
102
+ }, []);
33
103
 
34
104
  // Initialize the model instance
35
105
  useEffect(() => {
36
- modelRef.current = createLLM();
37
- let isMounted = true;
106
+ modelRef.current = createLLM({
107
+ enableMemoryTracking,
108
+ maxMemorySnapshots,
109
+ });
38
110
 
39
111
  // Cleanup on unmount
40
112
  return () => {
41
- isMounted = false;
42
113
  try {
43
114
  modelRef.current?.close();
44
115
  } catch (e) {
45
116
  console.warn("Failed to close model", e);
46
117
  }
47
118
  };
48
- }, []);
119
+ }, [enableMemoryTracking, maxMemorySnapshots]);
49
120
 
50
121
  const load = useCallback(async () => {
51
122
  setIsReady(false);
@@ -53,36 +124,24 @@ export function useModel(
53
124
  setDownloadProgress(0);
54
125
 
55
126
  try {
56
- let modelPath = pathOrUrl;
57
-
58
- // Handle URL download manually to capture progress
59
- if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
60
- const fileName = pathOrUrl.split("/").pop() || "model.bin";
61
-
62
- if (modelRef.current) {
63
- modelPath = await modelRef.current.downloadModel(
64
- pathOrUrl,
65
- fileName,
66
- (progress) => {
67
- setDownloadProgress(progress);
68
- },
69
- );
70
- }
71
- }
72
-
73
127
  if (modelRef.current) {
74
- // Create a clean config object for native loadModel (excluding autoLoad)
75
- const nativeConfig: LLMConfig = { ...config };
76
- delete (nativeConfig as any).autoLoad;
77
-
78
- await modelRef.current.loadModel(modelPath, nativeConfig);
128
+ // Delegate URL handling + download to the factory's loadModel,
129
+ // passing our progress setter as the callback (eliminates
130
+ // duplicate download logic that was previously in this hook).
131
+ await modelRef.current.loadModel(
132
+ pathOrUrl,
133
+ nativeConfig,
134
+ (progress) => {
135
+ setDownloadProgress(progress);
136
+ },
137
+ );
79
138
  setIsReady(true);
80
139
  }
81
140
  } catch (e: any) {
82
141
  setError(e.message || "Failed to load model");
83
142
  console.error(e);
84
143
  }
85
- }, [pathOrUrl, config]);
144
+ }, [pathOrUrl, nativeConfig]);
86
145
 
87
146
  useEffect(() => {
88
147
  if (autoLoad) {
@@ -106,6 +165,7 @@ export function useModel(
106
165
  (token: string, done: boolean) => {
107
166
  fullResponse += token;
108
167
  if (done) {
168
+ refreshMemorySummary();
109
169
  resolve(fullResponse);
110
170
  }
111
171
  },
@@ -121,7 +181,7 @@ export function useModel(
121
181
  setIsGenerating(false);
122
182
  }
123
183
  },
124
- [isReady],
184
+ [isReady, refreshMemorySummary],
125
185
  );
126
186
 
127
187
  const reset = useCallback(() => {
@@ -130,13 +190,17 @@ export function useModel(
130
190
  }
131
191
  }, []);
132
192
 
133
- const deleteModel = useCallback(async (fileName: string): Promise<void> => {
134
- if (modelRef.current) {
135
- await modelRef.current.deleteModel(fileName);
136
- setIsReady(false);
137
- setDownloadProgress(0);
138
- }
139
- }, []);
193
+ const deleteModel = useCallback(
194
+ async (fileName?: string): Promise<void> => {
195
+ if (modelRef.current) {
196
+ const resolvedName = fileName ?? extractFileName(pathOrUrl);
197
+ await modelRef.current.deleteModel(resolvedName);
198
+ setIsReady(false);
199
+ setDownloadProgress(0);
200
+ }
201
+ },
202
+ [pathOrUrl],
203
+ );
140
204
 
141
205
  return {
142
206
  model: modelRef.current,
@@ -148,5 +212,7 @@ export function useModel(
148
212
  reset,
149
213
  deleteModel,
150
214
  load,
215
+ memoryTracker: modelRef.current?.memoryTracker ?? null,
216
+ memorySummary,
151
217
  };
152
218
  }
package/src/index.ts CHANGED
@@ -7,6 +7,7 @@ import type {
7
7
  Backend,
8
8
  Role,
9
9
  GenerationStats,
10
+ MemoryUsage,
10
11
  } from "./specs/LiteRTLM.nitro";
11
12
 
12
13
  export type {
@@ -16,6 +17,7 @@ export type {
16
17
  Backend,
17
18
  Role,
18
19
  GenerationStats,
20
+ MemoryUsage,
19
21
  } from "./specs/LiteRTLM.nitro";
20
22
 
21
23
  // Re-export template utilities
@@ -26,6 +28,15 @@ export {
26
28
  applyLlamaTemplate,
27
29
  } from "./templates";
28
30
 
31
+ // Re-export memory tracking utilities (uses NitroModules.createNativeArrayBuffer v0.35+)
32
+ export type {
33
+ MemorySnapshot,
34
+ MemoryTracker,
35
+ MemoryTrackerSummary,
36
+ } from "./memoryTracker";
37
+ export { createMemoryTracker, createNativeBuffer } from "./memoryTracker";
38
+
39
+ export type { LiteRTLMInstance } from "./modelFactory";
29
40
  export * from "./hooks";
30
41
 
31
42
  /**
@@ -121,14 +132,10 @@ export function checkBackendSupport(backend: Backend): string | undefined {
121
132
  return "NPU backend requires compatible hardware (Qualcomm Hexagon, MediaTek APU, etc.). Will fall back to GPU if unavailable.";
122
133
  }
123
134
  if (Platform.OS === "ios") {
124
- return "NPU/CoreML is not yet supported on iOS. LiteRT-LM iOS support is pending.";
135
+ return "NPU (Neural Engine) is not yet supported on iOS. Use 'gpu' (Metal) or 'cpu' instead.";
125
136
  }
126
137
  }
127
138
 
128
- if (Platform.OS === "ios" && backend !== "cpu") {
129
- return "LiteRT-LM iOS is not yet released. Only CPU backend may work via fallback.";
130
- }
131
-
132
139
  return undefined;
133
140
  }
134
141
 
@@ -151,7 +158,7 @@ export function checkBackendSupport(backend: Backend): string | undefined {
151
158
  */
152
159
  export function checkMultimodalSupport(): string | undefined {
153
160
  if (Platform.OS === "ios") {
154
- return "Multimodal (image/audio) is not yet supported on iOS. LiteRT-LM iOS SDK is pending.";
161
+ return "Multimodal (image/audio) is experimental on iOS. Vision and audio executors may not be available in the current build.";
155
162
  }
156
163
  return undefined;
157
164
  }