react-native-litert-lm 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +331 -150
- package/android/build.gradle +1 -1
- package/android/src/main/java/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLM.kt +140 -37
- package/app.plugin.js +33 -0
- package/cpp/HybridLiteRTLM.cpp +577 -378
- package/cpp/HybridLiteRTLM.hpp +66 -23
- package/cpp/IOSDownloadHelper.h +24 -0
- package/cpp/cpp-adapter.cpp +10 -2
- package/cpp/include/litert_lm_engine.h +502 -0
- package/ios/IOSDownloadHelper.mm +129 -0
- package/ios/LiteRTLMAutolinking.mm +30 -0
- package/lib/hooks.d.ts +33 -3
- package/lib/hooks.js +54 -23
- package/lib/index.d.ts +4 -1
- package/lib/index.js +6 -6
- package/lib/memoryTracker.d.ts +128 -0
- package/lib/memoryTracker.js +155 -0
- package/lib/modelFactory.d.ts +21 -2
- package/lib/modelFactory.js +78 -11
- package/lib/specs/LiteRTLM.nitro.d.ts +19 -0
- package/nitrogen/generated/android/LiteRTLMOnLoad.cpp +28 -18
- package/nitrogen/generated/android/LiteRTLMOnLoad.hpp +13 -4
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.cpp +39 -36
- package/nitrogen/generated/android/c++/JHybridLiteRTLMSpec.hpp +20 -22
- package/nitrogen/generated/android/c++/JMemoryUsage.hpp +69 -0
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/HybridLiteRTLMSpec.kt +19 -18
- package/nitrogen/generated/android/kotlin/com/margelo/nitro/dev/litert/litertlm/MemoryUsage.kt +47 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.cpp +1 -0
- package/nitrogen/generated/shared/c++/HybridLiteRTLMSpec.hpp +4 -0
- package/nitrogen/generated/shared/c++/MemoryUsage.hpp +95 -0
- package/package.json +12 -5
- package/react-native-litert-lm.podspec +20 -7
- package/scripts/build-ios-engine.sh +283 -0
- package/scripts/download-ios-frameworks.sh +72 -0
- package/scripts/postinstall.js +116 -0
- package/scripts/stubs/cxx_bridge_stubs.cc +224 -0
- package/scripts/stubs/gemma_model_constraint_provider.cc +46 -0
- package/scripts/stubs/llguidance_stubs.c +101 -0
- package/src/hooks.ts +107 -41
- package/src/index.ts +13 -6
- package/src/memoryTracker.ts +268 -0
- package/src/modelFactory.ts +107 -11
- package/src/specs/LiteRTLM.nitro.ts +21 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cxx_bridge_stubs.cc
|
|
3
|
+
*
|
|
4
|
+
* Provides stub implementations for the Rust CXX bridge runtime.
|
|
5
|
+
*
|
|
6
|
+
* ARCHITECTURE NOTES:
|
|
7
|
+
* The CXX bridge generates two halves:
|
|
8
|
+
* 1. Rust .o files define C++ mangled wrappers (e.g., litert::lm::new_minijinja_template)
|
|
9
|
+
* that call extern "C" shims (e.g., _litert$lm$cxxbridge1$new_minijinja_template)
|
|
10
|
+
* 2. Generated C++ code defines those extern "C" shims, which call the actual Rust FFI entry points
|
|
11
|
+
*
|
|
12
|
+
* Since we only have half #1 (the Rust .o), we provide stub extern "C" shims.
|
|
13
|
+
*
|
|
14
|
+
* CRITICAL: These stubs must NOT forward (via __asm__) to the C++ mangled functions
|
|
15
|
+
* in the Rust .o files. Those functions call RIGHT BACK to these extern "C" stubs,
|
|
16
|
+
* creating infinite recursion → stack overflow → SIGBUS.
|
|
17
|
+
*
|
|
18
|
+
* ALLOCATOR CONSISTENCY: All stubs use calloc/free. Since our stub creates the objects
|
|
19
|
+
* (not real Rust code), our drop/dealloc stubs using free() are perfectly consistent.
|
|
20
|
+
* If the real Rust code created the objects, there would be a mismatch, but it doesn't
|
|
21
|
+
* because these stubs intercept the calls before Rust can run.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
#include <cstddef>
|
|
25
|
+
#include <cstdlib>
|
|
26
|
+
#include <cstring>
|
|
27
|
+
#include <string>
|
|
28
|
+
|
|
29
|
+
// ============================================================================
|
|
30
|
+
// Part 1: CXX Runtime Types
|
|
31
|
+
// ============================================================================
|
|
32
|
+
|
|
33
|
+
namespace rust {
|
|
34
|
+
namespace cxxbridge1 {
|
|
35
|
+
|
|
36
|
+
class String;
|
|
37
|
+
|
|
38
|
+
class Str {
|
|
39
|
+
public:
|
|
40
|
+
const char* ptr;
|
|
41
|
+
size_t len;
|
|
42
|
+
Str(const char* s);
|
|
43
|
+
Str(const String& s);
|
|
44
|
+
const char* data() const;
|
|
45
|
+
size_t size() const;
|
|
46
|
+
operator std::string() const;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
class String {
|
|
50
|
+
public:
|
|
51
|
+
struct Repr { char* ptr; size_t len; size_t cap; };
|
|
52
|
+
Repr repr_;
|
|
53
|
+
String(String&& o) noexcept;
|
|
54
|
+
String(const std::string& s);
|
|
55
|
+
~String();
|
|
56
|
+
const char* data() const;
|
|
57
|
+
size_t size() const;
|
|
58
|
+
operator std::string() const;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
Str::Str(const char* s) : ptr(s), len(s ? strlen(s) : 0) {}
|
|
62
|
+
Str::Str(const String& s) : ptr(s.data()), len(s.size()) {}
|
|
63
|
+
const char* Str::data() const { return ptr; }
|
|
64
|
+
size_t Str::size() const { return len; }
|
|
65
|
+
Str::operator std::string() const { return std::string(ptr, len); }
|
|
66
|
+
|
|
67
|
+
String::String(String&& o) noexcept : repr_(o.repr_) { o.repr_ = {nullptr,0,0}; }
|
|
68
|
+
String::String(const std::string& s) {
|
|
69
|
+
repr_.len = s.size();
|
|
70
|
+
repr_.cap = s.size() + 1;
|
|
71
|
+
repr_.ptr = static_cast<char*>(malloc(repr_.cap));
|
|
72
|
+
if (repr_.ptr) memcpy(repr_.ptr, s.c_str(), repr_.cap);
|
|
73
|
+
}
|
|
74
|
+
String::~String() { if (repr_.ptr) free(repr_.ptr); }
|
|
75
|
+
const char* String::data() const { return repr_.ptr ? repr_.ptr : ""; }
|
|
76
|
+
size_t String::size() const { return repr_.len; }
|
|
77
|
+
String::operator std::string() const { return std::string(data(), size()); }
|
|
78
|
+
|
|
79
|
+
template<typename T> class Vec {
|
|
80
|
+
public:
|
|
81
|
+
T* data_; size_t len_; size_t cap_;
|
|
82
|
+
Vec();
|
|
83
|
+
const T* data() const;
|
|
84
|
+
size_t size() const;
|
|
85
|
+
void drop();
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
template<typename T> Vec<T>::Vec() : data_(nullptr), len_(0), cap_(0) {}
|
|
89
|
+
template<typename T> const T* Vec<T>::data() const { return data_; }
|
|
90
|
+
template<typename T> size_t Vec<T>::size() const { return len_; }
|
|
91
|
+
template<typename T> void Vec<T>::drop() { if (data_) { free(data_); data_ = nullptr; } len_ = cap_ = 0; }
|
|
92
|
+
|
|
93
|
+
template class Vec<String>;
|
|
94
|
+
|
|
95
|
+
void sliceInit(void* s, const void* p, size_t l) {
|
|
96
|
+
auto* a = static_cast<const void**>(s);
|
|
97
|
+
a[0] = p; *reinterpret_cast<size_t*>(&a[1]) = l;
|
|
98
|
+
}
|
|
99
|
+
size_t sliceLen(const void* s) {
|
|
100
|
+
return *reinterpret_cast<const size_t*>(static_cast<const void* const*>(s)+1);
|
|
101
|
+
}
|
|
102
|
+
const void* slicePtr(const void* s) {
|
|
103
|
+
return static_cast<const void* const*>(s)[0];
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
} // namespace cxxbridge1
|
|
107
|
+
} // namespace rust
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
// ============================================================================
|
|
111
|
+
// Part 2: extern "C" stubs
|
|
112
|
+
//
|
|
113
|
+
// All stubs use calloc/free for allocation. NO __asm__ forwarding to the C++
|
|
114
|
+
// mangled Rust functions, which would create mutual recursion.
|
|
115
|
+
// ============================================================================
|
|
116
|
+
extern "C" {
|
|
117
|
+
|
|
118
|
+
// --- rust_vec for JsonValue ---
|
|
119
|
+
void* cxxbridge1$rust_vec$litert$lm$JsonValue$new() { return nullptr; }
|
|
120
|
+
const void* cxxbridge1$rust_vec$litert$lm$JsonValue$data(const void*) { return nullptr; }
|
|
121
|
+
size_t cxxbridge1$rust_vec$litert$lm$JsonValue$len(const void*) { return 0; }
|
|
122
|
+
size_t cxxbridge1$rust_vec$litert$lm$JsonValue$capacity(const void*) { return 0; }
|
|
123
|
+
void cxxbridge1$rust_vec$litert$lm$JsonValue$set_len(void*, size_t) {}
|
|
124
|
+
void cxxbridge1$rust_vec$litert$lm$JsonValue$truncate(void*, size_t) {}
|
|
125
|
+
void cxxbridge1$rust_vec$litert$lm$JsonValue$reserve_total(void*, size_t) {}
|
|
126
|
+
void cxxbridge1$rust_vec$litert$lm$JsonValue$drop(void*) {}
|
|
127
|
+
|
|
128
|
+
// --- Box<MinijinjaTemplate> ---
|
|
129
|
+
// Allocate zeroed memory; consistent with free() in drop/dealloc.
|
|
130
|
+
void* cxxbridge1$box$litert$lm$MinijinjaTemplate$alloc() {
|
|
131
|
+
return calloc(1, 64);
|
|
132
|
+
}
|
|
133
|
+
void cxxbridge1$box$litert$lm$MinijinjaTemplate$dealloc(void* p) {
|
|
134
|
+
if (p) free(p);
|
|
135
|
+
}
|
|
136
|
+
void cxxbridge1$box$litert$lm$MinijinjaTemplate$drop(void* p) {
|
|
137
|
+
if (p) free(p);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// --- Box<JsonValue> ---
|
|
141
|
+
void* cxxbridge1$box$litert$lm$JsonValue$alloc() {
|
|
142
|
+
return calloc(1, 64);
|
|
143
|
+
}
|
|
144
|
+
void cxxbridge1$box$litert$lm$JsonValue$dealloc(void* p) {
|
|
145
|
+
if (p) free(p);
|
|
146
|
+
}
|
|
147
|
+
void cxxbridge1$box$litert$lm$JsonValue$drop(void* p) {
|
|
148
|
+
if (p) free(p);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// --- MinijinjaTemplate FFI shims ---
|
|
152
|
+
// These are the Rust-side FFI entry points. The C++ mangled wrappers in the
|
|
153
|
+
// Rust .o call INTO these. We must NOT forward back to those wrappers.
|
|
154
|
+
size_t litert$lm$cxxbridge1$MinijinjaTemplate$operator$sizeof() { return 64; }
|
|
155
|
+
size_t litert$lm$cxxbridge1$MinijinjaTemplate$operator$alignof() { return 8; }
|
|
156
|
+
void litert$lm$cxxbridge1$MinijinjaTemplate$source(const void*, void* out) {
|
|
157
|
+
// Write an empty rust::cxxbridge1::String to out
|
|
158
|
+
if (out) memset(out, 0, 24);
|
|
159
|
+
}
|
|
160
|
+
void litert$lm$cxxbridge1$MinijinjaTemplate$apply(const void*, void* input, void* out) {
|
|
161
|
+
// Return an empty string result
|
|
162
|
+
if (out) memset(out, 0, 24);
|
|
163
|
+
}
|
|
164
|
+
void litert$lm$cxxbridge1$MinijinjaTemplate$clone_template(const void*, void* out) {
|
|
165
|
+
// Write a valid calloc'd pointer so drop() can safely free() it
|
|
166
|
+
if (out) {
|
|
167
|
+
void* cloned = calloc(1, 64);
|
|
168
|
+
memcpy(out, &cloned, sizeof(void*));
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
void litert$lm$cxxbridge1$MinijinjaTemplate$get_capabilities(const void*, void* out) {
|
|
172
|
+
if (out) memset(out, 0, 24);
|
|
173
|
+
}
|
|
174
|
+
void litert$lm$cxxbridge1$MinijinjaTemplate$get_error(const void*, void* out) {
|
|
175
|
+
if (out) memset(out, 0, 24);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// --- new_minijinja_template ---
|
|
179
|
+
// Creates a Box<MinijinjaTemplate>. Writes a valid calloc'd pointer to `out`
|
|
180
|
+
// so that later Box::drop() can free() it without issues.
|
|
181
|
+
void litert$lm$cxxbridge1$new_minijinja_template(void* input, void* out) {
|
|
182
|
+
if (out) {
|
|
183
|
+
void* tmpl = calloc(1, 64);
|
|
184
|
+
memcpy(out, &tmpl, sizeof(void*));
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// --- JsonValue FFI shims ---
|
|
189
|
+
size_t litert$lm$cxxbridge1$JsonValue$operator$sizeof() { return 64; }
|
|
190
|
+
size_t litert$lm$cxxbridge1$JsonValue$operator$alignof() { return 8; }
|
|
191
|
+
void litert$lm$cxxbridge1$JsonValue$object_get(const void*, void*, void*) {}
|
|
192
|
+
int litert$lm$cxxbridge1$JsonValue$is_null(const void*) { return 1; }
|
|
193
|
+
int litert$lm$cxxbridge1$JsonValue$is_bool(const void*) { return 0; }
|
|
194
|
+
int litert$lm$cxxbridge1$JsonValue$is_number(const void*) { return 0; }
|
|
195
|
+
int litert$lm$cxxbridge1$JsonValue$is_string(const void*) { return 0; }
|
|
196
|
+
int litert$lm$cxxbridge1$JsonValue$is_array(const void*) { return 0; }
|
|
197
|
+
int litert$lm$cxxbridge1$JsonValue$is_object(const void*) { return 0; }
|
|
198
|
+
int litert$lm$cxxbridge1$JsonValue$get_bool(const void*) { return 0; }
|
|
199
|
+
double litert$lm$cxxbridge1$JsonValue$get_number(const void*) { return 0.0; }
|
|
200
|
+
void litert$lm$cxxbridge1$JsonValue$get_string(const void*, void*) {}
|
|
201
|
+
size_t litert$lm$cxxbridge1$JsonValue$array_len(const void*) { return 0; }
|
|
202
|
+
void litert$lm$cxxbridge1$JsonValue$array_get(const void*, size_t, void*) {}
|
|
203
|
+
int litert$lm$cxxbridge1$JsonValue$object_has_key(const void*, void*) { return 0; }
|
|
204
|
+
void litert$lm$cxxbridge1$JsonValue$object_keys(const void*, void*) {}
|
|
205
|
+
|
|
206
|
+
// --- Parser/template FFI shims ---
|
|
207
|
+
void litert$lm$cxxbridge1$parse_fc_expression(void*, void*) {}
|
|
208
|
+
void litert$lm$cxxbridge1$parse_json_expression(void*, void*) {}
|
|
209
|
+
void litert$lm$cxxbridge1$parse_python_expression(void*, void*) {}
|
|
210
|
+
|
|
211
|
+
// --- Tokenizers FFI stubs ---
|
|
212
|
+
void* byte_level_bpe_tokenizers_new_from_str(void*, void*, void*) { return nullptr; }
|
|
213
|
+
void* tokenizers_decode(void*, void*, size_t, int) { return nullptr; }
|
|
214
|
+
void* tokenizers_encode(void*, void*, size_t) { return nullptr; }
|
|
215
|
+
void* tokenizers_encode_batch(void*, void**, size_t, size_t) { return nullptr; }
|
|
216
|
+
void tokenizers_free(void*) {}
|
|
217
|
+
void tokenizers_free_encode_results(void*, size_t) {}
|
|
218
|
+
void* tokenizers_get_decode_str(void*) { return nullptr; }
|
|
219
|
+
int tokenizers_get_vocab_size(void*, int) { return 0; }
|
|
220
|
+
void* tokenizers_id_to_token(void*, int) { return nullptr; }
|
|
221
|
+
void* tokenizers_new_from_str(void*, size_t) { return nullptr; }
|
|
222
|
+
int tokenizers_token_to_id(void*, void*, size_t) { return 0; }
|
|
223
|
+
|
|
224
|
+
} // extern "C"
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// Copyright 2026 Google LLC.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
#include <cstdio>
|
|
17
|
+
|
|
18
|
+
struct LiteRtLmGemmaModelConstraintProvider {
|
|
19
|
+
int dummy;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
extern "C" {
|
|
23
|
+
|
|
24
|
+
void* LiteRtLmGemmaModelConstraintProvider_Create(
|
|
25
|
+
const char* serialized_sp_model_proto, size_t serialized_sp_model_proto_len,
|
|
26
|
+
const int** stop_token_ids, const size_t* stop_token_lengths,
|
|
27
|
+
size_t num_stop_lists) {
|
|
28
|
+
fprintf(stderr, "\n[LiteRT-LM WARN] Gemma Constraint Provider is"
|
|
29
|
+
" STUBBED/DISABLED.\n");
|
|
30
|
+
fprintf(stderr, "[LiteRT-LM WARN] Any requests for grammar-constrained"
|
|
31
|
+
" decoding will be ignored or fail.\n\n");
|
|
32
|
+
// Return a dummy pointer so that engine creation doesn't fail!
|
|
33
|
+
return new LiteRtLmGemmaModelConstraintProvider{0};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
void LiteRtLmGemmaModelConstraintProvider_Destroy(void* provider) {
|
|
37
|
+
if (provider) {
|
|
38
|
+
delete static_cast<LiteRtLmGemmaModelConstraintProvider*>(provider);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
void* LiteRtLmGemmaModelConstraintProvider_CreateConstraintFromTools(
|
|
43
|
+
void* provider, void* tools, void* options) {
|
|
44
|
+
return nullptr;
|
|
45
|
+
}
|
|
46
|
+
} // extern "C"
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llguidance_stubs.c
|
|
3
|
+
*
|
|
4
|
+
* Stub implementations for the llguidance C API used by LiteRT-LM's
|
|
5
|
+
* constrained decoding subsystem (llg_constraint, llg_constraint_provider).
|
|
6
|
+
* The real implementation is a Rust library (llguidance) which is not
|
|
7
|
+
* available in the iOS Bazel build.
|
|
8
|
+
*
|
|
9
|
+
* These stubs return error/null values so that constrained decoding
|
|
10
|
+
* gracefully fails at runtime. Basic LLM inference is unaffected.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
#include <stddef.h>
|
|
14
|
+
#include <stdint.h>
|
|
15
|
+
|
|
16
|
+
/* Opaque types */
|
|
17
|
+
typedef struct LlgTokenizer LlgTokenizer;
|
|
18
|
+
typedef struct LlgConstraint LlgConstraint;
|
|
19
|
+
typedef struct LlgConstraintInit LlgConstraintInit;
|
|
20
|
+
|
|
21
|
+
/* llg_new_tokenizer: create a tokenizer handle */
|
|
22
|
+
int llg_new_tokenizer(void* init, void** out_tokenizer) {
|
|
23
|
+
if (out_tokenizer) *out_tokenizer = NULL;
|
|
24
|
+
return -1; /* error */
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/* llg_free_tokenizer: release tokenizer */
|
|
28
|
+
void llg_free_tokenizer(void* tokenizer) {
|
|
29
|
+
(void)tokenizer;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/* llg_constraint_init_set_defaults: initialize constraint config */
|
|
33
|
+
void llg_constraint_init_set_defaults(void* init, void* tokenizer) {
|
|
34
|
+
(void)init;
|
|
35
|
+
(void)tokenizer;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/* llg_new_constraint: create grammar constraint */
|
|
39
|
+
void* llg_new_constraint(void* init, void* grammar) {
|
|
40
|
+
(void)init;
|
|
41
|
+
(void)grammar;
|
|
42
|
+
return NULL;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/* llg_new_constraint_json: create JSON schema constraint */
|
|
46
|
+
void* llg_new_constraint_json(void* init, const char* json_schema) {
|
|
47
|
+
(void)init;
|
|
48
|
+
(void)json_schema;
|
|
49
|
+
return NULL;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/* llg_new_constraint_regex: create regex constraint */
|
|
53
|
+
void* llg_new_constraint_regex(void* init, const char* regex) {
|
|
54
|
+
(void)init;
|
|
55
|
+
(void)regex;
|
|
56
|
+
return NULL;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/* llg_new_constraint_lark: create Lark grammar constraint */
|
|
60
|
+
void* llg_new_constraint_lark(void* init, const char* lark_grammar) {
|
|
61
|
+
(void)init;
|
|
62
|
+
(void)lark_grammar;
|
|
63
|
+
return NULL;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/* llg_clone_constraint: duplicate a constraint */
|
|
67
|
+
void* llg_clone_constraint(void* constraint) {
|
|
68
|
+
(void)constraint;
|
|
69
|
+
return NULL;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/* llg_compute_mask: compute next token mask */
|
|
73
|
+
int llg_compute_mask(void* constraint, void* result) {
|
|
74
|
+
(void)constraint;
|
|
75
|
+
(void)result;
|
|
76
|
+
return -1; /* error */
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/* llg_commit_token: commit selected token */
|
|
80
|
+
int llg_commit_token(void* constraint, int32_t token) {
|
|
81
|
+
(void)constraint;
|
|
82
|
+
(void)token;
|
|
83
|
+
return -1; /* error */
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/* llg_is_stopped: check if constraint reached accepting state */
|
|
87
|
+
int llg_is_stopped(void* constraint) {
|
|
88
|
+
(void)constraint;
|
|
89
|
+
return 1; /* stopped (nothing to do) */
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/* llg_get_error: get last error message */
|
|
93
|
+
const char* llg_get_error(void* constraint) {
|
|
94
|
+
(void)constraint;
|
|
95
|
+
return "llguidance not available on iOS";
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/* llg_free_constraint: release constraint */
|
|
99
|
+
void llg_free_constraint(void* constraint) {
|
|
100
|
+
(void)constraint;
|
|
101
|
+
}
|
package/src/hooks.ts
CHANGED
|
@@ -1,51 +1,122 @@
|
|
|
1
|
-
import { useState, useEffect, useRef, useCallback } from "react";
|
|
2
|
-
import {
|
|
1
|
+
import { useState, useEffect, useRef, useCallback, useMemo } from "react";
|
|
2
|
+
import { LLMConfig } from "./index";
|
|
3
3
|
import { createLLM } from "./modelFactory";
|
|
4
|
+
import type { LiteRTLMInstance } from "./modelFactory";
|
|
5
|
+
import type { MemoryTracker, MemoryTrackerSummary } from "./memoryTracker";
|
|
4
6
|
|
|
5
7
|
export interface UseModelConfig extends LLMConfig {
|
|
6
8
|
autoLoad?: boolean;
|
|
9
|
+
/**
|
|
10
|
+
* Enable memory tracking using native ArrayBuffers (v0.35+).
|
|
11
|
+
* When enabled, memory usage is tracked after each inference call
|
|
12
|
+
* using `NitroModules.createNativeArrayBuffer()` for zero-copy storage.
|
|
13
|
+
* @default false
|
|
14
|
+
*/
|
|
15
|
+
enableMemoryTracking?: boolean;
|
|
16
|
+
/**
|
|
17
|
+
* Maximum number of memory snapshots to store.
|
|
18
|
+
* Each snapshot uses 32 bytes of native memory.
|
|
19
|
+
* @default 256
|
|
20
|
+
*/
|
|
21
|
+
maxMemorySnapshots?: number;
|
|
7
22
|
}
|
|
8
23
|
|
|
9
24
|
export interface UseModelResult {
|
|
10
|
-
model:
|
|
25
|
+
model: LiteRTLMInstance | null;
|
|
11
26
|
isReady: boolean;
|
|
12
27
|
isGenerating: boolean;
|
|
13
28
|
downloadProgress: number;
|
|
14
29
|
error: string | null;
|
|
15
30
|
generate: (prompt: string) => Promise<string>;
|
|
16
31
|
reset: () => void;
|
|
17
|
-
|
|
32
|
+
/**
|
|
33
|
+
* Delete the model file. If no fileName is provided, derives it from
|
|
34
|
+
* the URL/path passed to useModel.
|
|
35
|
+
*/
|
|
36
|
+
deleteModel: (fileName?: string) => Promise<void>;
|
|
18
37
|
load: () => Promise<void>;
|
|
38
|
+
/**
|
|
39
|
+
* Memory tracker instance (available when enableMemoryTracking is true).
|
|
40
|
+
* Uses native ArrayBuffers allocated via `NitroModules.createNativeArrayBuffer()`
|
|
41
|
+
* for efficient, zero-copy memory usage tracking.
|
|
42
|
+
*/
|
|
43
|
+
memoryTracker: MemoryTracker | null;
|
|
44
|
+
/**
|
|
45
|
+
* Current memory tracking summary (null if tracking is disabled).
|
|
46
|
+
* Updates automatically after each inference call.
|
|
47
|
+
*/
|
|
48
|
+
memorySummary: MemoryTrackerSummary | null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Extract a filename from a URL or file path.
|
|
53
|
+
*/
|
|
54
|
+
function extractFileName(pathOrUrl: string): string {
|
|
55
|
+
return pathOrUrl.split("/").pop() || "model.bin";
|
|
19
56
|
}
|
|
20
57
|
|
|
21
58
|
export function useModel(
|
|
22
59
|
pathOrUrl: string,
|
|
23
60
|
config?: UseModelConfig,
|
|
24
61
|
): UseModelResult {
|
|
25
|
-
const modelRef = useRef<
|
|
62
|
+
const modelRef = useRef<LiteRTLMInstance | null>(null);
|
|
26
63
|
const [isReady, setIsReady] = useState(false);
|
|
27
64
|
const [isGenerating, setIsGenerating] = useState(false);
|
|
28
65
|
const [downloadProgress, setDownloadProgress] = useState(0);
|
|
29
66
|
const [error, setError] = useState<string | null>(null);
|
|
67
|
+
const [memorySummary, setMemorySummary] = useState<MemoryTrackerSummary | null>(null);
|
|
30
68
|
|
|
31
|
-
//
|
|
69
|
+
// Destructure config into primitive values for stable dependency arrays.
|
|
70
|
+
// This prevents infinite re-render loops when consumers pass inline config
|
|
71
|
+
// objects (e.g. useModel(url, { backend: 'cpu' })) without useMemo.
|
|
32
72
|
const autoLoad = config?.autoLoad ?? true;
|
|
73
|
+
const enableMemoryTracking = config?.enableMemoryTracking ?? false;
|
|
74
|
+
const maxMemorySnapshots = config?.maxMemorySnapshots ?? 256;
|
|
75
|
+
const backend = config?.backend;
|
|
76
|
+
const systemPrompt = config?.systemPrompt;
|
|
77
|
+
const maxTokens = config?.maxTokens;
|
|
78
|
+
const temperature = config?.temperature;
|
|
79
|
+
const topK = config?.topK;
|
|
80
|
+
const topP = config?.topP;
|
|
81
|
+
|
|
82
|
+
// Build a stable config object from the destructured primitives
|
|
83
|
+
const nativeConfig = useMemo<LLMConfig>(
|
|
84
|
+
() => ({
|
|
85
|
+
...(backend !== undefined && { backend }),
|
|
86
|
+
...(systemPrompt !== undefined && { systemPrompt }),
|
|
87
|
+
...(maxTokens !== undefined && { maxTokens }),
|
|
88
|
+
...(temperature !== undefined && { temperature }),
|
|
89
|
+
...(topK !== undefined && { topK }),
|
|
90
|
+
...(topP !== undefined && { topP }),
|
|
91
|
+
}),
|
|
92
|
+
[backend, systemPrompt, maxTokens, temperature, topK, topP],
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Refresh memory summary from the tracker's native buffer.
|
|
97
|
+
*/
|
|
98
|
+
const refreshMemorySummary = useCallback(() => {
|
|
99
|
+
if (modelRef.current?.memoryTracker) {
|
|
100
|
+
setMemorySummary(modelRef.current.memoryTracker.getSummary());
|
|
101
|
+
}
|
|
102
|
+
}, []);
|
|
33
103
|
|
|
34
104
|
// Initialize the model instance
|
|
35
105
|
useEffect(() => {
|
|
36
|
-
modelRef.current = createLLM(
|
|
37
|
-
|
|
106
|
+
modelRef.current = createLLM({
|
|
107
|
+
enableMemoryTracking,
|
|
108
|
+
maxMemorySnapshots,
|
|
109
|
+
});
|
|
38
110
|
|
|
39
111
|
// Cleanup on unmount
|
|
40
112
|
return () => {
|
|
41
|
-
isMounted = false;
|
|
42
113
|
try {
|
|
43
114
|
modelRef.current?.close();
|
|
44
115
|
} catch (e) {
|
|
45
116
|
console.warn("Failed to close model", e);
|
|
46
117
|
}
|
|
47
118
|
};
|
|
48
|
-
}, []);
|
|
119
|
+
}, [enableMemoryTracking, maxMemorySnapshots]);
|
|
49
120
|
|
|
50
121
|
const load = useCallback(async () => {
|
|
51
122
|
setIsReady(false);
|
|
@@ -53,36 +124,24 @@ export function useModel(
|
|
|
53
124
|
setDownloadProgress(0);
|
|
54
125
|
|
|
55
126
|
try {
|
|
56
|
-
let modelPath = pathOrUrl;
|
|
57
|
-
|
|
58
|
-
// Handle URL download manually to capture progress
|
|
59
|
-
if (pathOrUrl.startsWith("http://") || pathOrUrl.startsWith("https://")) {
|
|
60
|
-
const fileName = pathOrUrl.split("/").pop() || "model.bin";
|
|
61
|
-
|
|
62
|
-
if (modelRef.current) {
|
|
63
|
-
modelPath = await modelRef.current.downloadModel(
|
|
64
|
-
pathOrUrl,
|
|
65
|
-
fileName,
|
|
66
|
-
(progress) => {
|
|
67
|
-
setDownloadProgress(progress);
|
|
68
|
-
},
|
|
69
|
-
);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
127
|
if (modelRef.current) {
|
|
74
|
-
//
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
128
|
+
// Delegate URL handling + download to the factory's loadModel,
|
|
129
|
+
// passing our progress setter as the callback (eliminates
|
|
130
|
+
// duplicate download logic that was previously in this hook).
|
|
131
|
+
await modelRef.current.loadModel(
|
|
132
|
+
pathOrUrl,
|
|
133
|
+
nativeConfig,
|
|
134
|
+
(progress) => {
|
|
135
|
+
setDownloadProgress(progress);
|
|
136
|
+
},
|
|
137
|
+
);
|
|
79
138
|
setIsReady(true);
|
|
80
139
|
}
|
|
81
140
|
} catch (e: any) {
|
|
82
141
|
setError(e.message || "Failed to load model");
|
|
83
142
|
console.error(e);
|
|
84
143
|
}
|
|
85
|
-
}, [pathOrUrl,
|
|
144
|
+
}, [pathOrUrl, nativeConfig]);
|
|
86
145
|
|
|
87
146
|
useEffect(() => {
|
|
88
147
|
if (autoLoad) {
|
|
@@ -106,6 +165,7 @@ export function useModel(
|
|
|
106
165
|
(token: string, done: boolean) => {
|
|
107
166
|
fullResponse += token;
|
|
108
167
|
if (done) {
|
|
168
|
+
refreshMemorySummary();
|
|
109
169
|
resolve(fullResponse);
|
|
110
170
|
}
|
|
111
171
|
},
|
|
@@ -121,7 +181,7 @@ export function useModel(
|
|
|
121
181
|
setIsGenerating(false);
|
|
122
182
|
}
|
|
123
183
|
},
|
|
124
|
-
[isReady],
|
|
184
|
+
[isReady, refreshMemorySummary],
|
|
125
185
|
);
|
|
126
186
|
|
|
127
187
|
const reset = useCallback(() => {
|
|
@@ -130,13 +190,17 @@ export function useModel(
|
|
|
130
190
|
}
|
|
131
191
|
}, []);
|
|
132
192
|
|
|
133
|
-
const deleteModel = useCallback(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
193
|
+
const deleteModel = useCallback(
|
|
194
|
+
async (fileName?: string): Promise<void> => {
|
|
195
|
+
if (modelRef.current) {
|
|
196
|
+
const resolvedName = fileName ?? extractFileName(pathOrUrl);
|
|
197
|
+
await modelRef.current.deleteModel(resolvedName);
|
|
198
|
+
setIsReady(false);
|
|
199
|
+
setDownloadProgress(0);
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
[pathOrUrl],
|
|
203
|
+
);
|
|
140
204
|
|
|
141
205
|
return {
|
|
142
206
|
model: modelRef.current,
|
|
@@ -148,5 +212,7 @@ export function useModel(
|
|
|
148
212
|
reset,
|
|
149
213
|
deleteModel,
|
|
150
214
|
load,
|
|
215
|
+
memoryTracker: modelRef.current?.memoryTracker ?? null,
|
|
216
|
+
memorySummary,
|
|
151
217
|
};
|
|
152
218
|
}
|
package/src/index.ts
CHANGED
|
@@ -7,6 +7,7 @@ import type {
|
|
|
7
7
|
Backend,
|
|
8
8
|
Role,
|
|
9
9
|
GenerationStats,
|
|
10
|
+
MemoryUsage,
|
|
10
11
|
} from "./specs/LiteRTLM.nitro";
|
|
11
12
|
|
|
12
13
|
export type {
|
|
@@ -16,6 +17,7 @@ export type {
|
|
|
16
17
|
Backend,
|
|
17
18
|
Role,
|
|
18
19
|
GenerationStats,
|
|
20
|
+
MemoryUsage,
|
|
19
21
|
} from "./specs/LiteRTLM.nitro";
|
|
20
22
|
|
|
21
23
|
// Re-export template utilities
|
|
@@ -26,6 +28,15 @@ export {
|
|
|
26
28
|
applyLlamaTemplate,
|
|
27
29
|
} from "./templates";
|
|
28
30
|
|
|
31
|
+
// Re-export memory tracking utilities (uses NitroModules.createNativeArrayBuffer v0.35+)
|
|
32
|
+
export type {
|
|
33
|
+
MemorySnapshot,
|
|
34
|
+
MemoryTracker,
|
|
35
|
+
MemoryTrackerSummary,
|
|
36
|
+
} from "./memoryTracker";
|
|
37
|
+
export { createMemoryTracker, createNativeBuffer } from "./memoryTracker";
|
|
38
|
+
|
|
39
|
+
export type { LiteRTLMInstance } from "./modelFactory";
|
|
29
40
|
export * from "./hooks";
|
|
30
41
|
|
|
31
42
|
/**
|
|
@@ -121,14 +132,10 @@ export function checkBackendSupport(backend: Backend): string | undefined {
|
|
|
121
132
|
return "NPU backend requires compatible hardware (Qualcomm Hexagon, MediaTek APU, etc.). Will fall back to GPU if unavailable.";
|
|
122
133
|
}
|
|
123
134
|
if (Platform.OS === "ios") {
|
|
124
|
-
return "NPU
|
|
135
|
+
return "NPU (Neural Engine) is not yet supported on iOS. Use 'gpu' (Metal) or 'cpu' instead.";
|
|
125
136
|
}
|
|
126
137
|
}
|
|
127
138
|
|
|
128
|
-
if (Platform.OS === "ios" && backend !== "cpu") {
|
|
129
|
-
return "LiteRT-LM iOS is not yet released. Only CPU backend may work via fallback.";
|
|
130
|
-
}
|
|
131
|
-
|
|
132
139
|
return undefined;
|
|
133
140
|
}
|
|
134
141
|
|
|
@@ -151,7 +158,7 @@ export function checkBackendSupport(backend: Backend): string | undefined {
|
|
|
151
158
|
*/
|
|
152
159
|
export function checkMultimodalSupport(): string | undefined {
|
|
153
160
|
if (Platform.OS === "ios") {
|
|
154
|
-
return "Multimodal (image/audio) is
|
|
161
|
+
return "Multimodal (image/audio) is experimental on iOS. Vision and audio executors may not be available in the current build.";
|
|
155
162
|
}
|
|
156
163
|
return undefined;
|
|
157
164
|
}
|