react-native-executorch 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/android/libs/classes.jar +0 -0
  2. package/android/src/main/cpp/CMakeLists.txt +23 -14
  3. package/common/rnexecutorch/RnExecutorchInstaller.cpp +4 -21
  4. package/common/rnexecutorch/host_objects/ModelHostObject.h +67 -51
  5. package/common/rnexecutorch/models/llm/LLM.cpp +24 -1
  6. package/common/rnexecutorch/models/llm/LLM.h +4 -1
  7. package/common/rnexecutorch/threads/GlobalThreadPool.h +79 -0
  8. package/common/rnexecutorch/threads/HighPerformanceThreadPool.h +364 -0
  9. package/common/rnexecutorch/threads/utils/ThreadUtils.h +29 -0
  10. package/common/runner/runner.cpp +9 -3
  11. package/common/runner/runner.h +4 -3
  12. package/common/runner/text_token_generator.h +28 -10
  13. package/lib/module/controllers/LLMController.js +21 -2
  14. package/lib/module/controllers/LLMController.js.map +1 -1
  15. package/lib/module/hooks/natural_language_processing/useLLM.js +6 -2
  16. package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
  17. package/lib/module/modules/natural_language_processing/LLMModule.js +4 -2
  18. package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
  19. package/lib/module/types/llm.js.map +1 -1
  20. package/lib/typescript/controllers/LLMController.d.ts +4 -2
  21. package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
  22. package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
  23. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +3 -2
  24. package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
  25. package/lib/typescript/types/llm.d.ts +7 -1
  26. package/lib/typescript/types/llm.d.ts.map +1 -1
  27. package/package.json +3 -1
  28. package/react-native-executorch.podspec +12 -31
  29. package/src/controllers/LLMController.ts +29 -5
  30. package/src/hooks/natural_language_processing/useLLM.ts +15 -1
  31. package/src/modules/natural_language_processing/LLMModule.ts +10 -2
  32. package/src/types/llm.ts +8 -0
  33. package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
  34. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  35. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  36. package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
  37. package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a +0 -0
  38. package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a +0 -0
  39. package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a +0 -0
  40. package/third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a +0 -0
  41. package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a +0 -0
  42. package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a +0 -0
  43. package/third-party/include/c10/macros/Export.h +2 -86
  44. package/third-party/include/c10/macros/Macros.h +28 -5
  45. package/third-party/include/c10/util/BFloat16-inl.h +1 -4
  46. package/third-party/include/c10/util/BFloat16.h +5 -8
  47. package/third-party/include/c10/util/Half.h +5 -0
  48. package/third-party/include/c10/util/bit_cast.h +1 -1
  49. package/third-party/include/c10/util/complex.h +639 -0
  50. package/third-party/include/c10/util/complex_math.h +399 -0
  51. package/third-party/include/c10/util/complex_utils.h +41 -0
  52. package/third-party/include/c10/util/irange.h +2 -2
  53. package/third-party/include/c10/util/overflows.h +95 -0
  54. package/third-party/include/executorch/ExecuTorchError.h +75 -0
  55. package/third-party/include/executorch/ExecuTorchModule.h +115 -11
  56. package/third-party/include/executorch/ExecuTorchTensor.h +731 -51
  57. package/third-party/include/executorch/ExecuTorchValue.h +61 -9
  58. package/third-party/include/executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h +181 -0
  59. package/third-party/include/executorch/extension/kernel_util/meta_programming.h +108 -0
  60. package/third-party/include/executorch/extension/kernel_util/type_list.h +137 -0
  61. package/third-party/include/executorch/extension/module/bundled_module.h +131 -0
  62. package/third-party/include/executorch/extension/module/module.h +46 -20
  63. package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +1 -3
  64. package/third-party/include/executorch/extension/threadpool/threadpool.h +1 -3
  65. package/third-party/include/executorch/extension/threadpool/threadpool_guard.h +35 -0
  66. package/third-party/include/executorch/runtime/backend/backend_execution_context.h +3 -3
  67. package/third-party/include/executorch/runtime/backend/backend_init_context.h +12 -6
  68. package/third-party/include/executorch/runtime/backend/backend_option_context.h +34 -0
  69. package/third-party/include/executorch/runtime/backend/interface.h +70 -9
  70. package/third-party/include/executorch/runtime/backend/options.h +206 -0
  71. package/third-party/include/executorch/runtime/core/evalue.h +19 -25
  72. package/third-party/include/executorch/runtime/core/event_tracer.h +32 -17
  73. package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +23 -14
  74. package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +32 -9
  75. package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +3 -2
  76. package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +43 -75
  77. package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +88 -87
  78. package/third-party/include/executorch/runtime/core/function_ref.h +100 -0
  79. package/third-party/include/executorch/runtime/core/named_data_map.h +14 -14
  80. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +2 -86
  81. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +28 -5
  82. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -4
  83. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +5 -8
  84. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +5 -0
  85. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -1
  86. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +639 -0
  87. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_math.h +399 -0
  88. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_utils.h +41 -0
  89. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +2 -2
  90. package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/overflows.h +95 -0
  91. package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +88 -0
  92. package/third-party/include/executorch/runtime/core/portable_type/complex.h +6 -29
  93. package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +20 -0
  94. package/third-party/include/executorch/runtime/core/span.h +4 -0
  95. package/third-party/include/executorch/runtime/core/tag.h +19 -0
  96. package/third-party/include/executorch/runtime/core/tensor_layout.h +2 -2
  97. package/third-party/include/executorch/runtime/executor/method.h +15 -3
  98. package/third-party/include/executorch/runtime/executor/method_meta.h +34 -5
  99. package/third-party/include/executorch/runtime/executor/program.h +3 -4
  100. package/third-party/include/executorch/runtime/executor/pte_data_map.h +9 -8
  101. package/third-party/include/executorch/runtime/executor/tensor_parser.h +14 -13
  102. package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +5 -5
  103. package/third-party/include/executorch/runtime/kernel/operator_registry.h +21 -19
  104. package/third-party/include/executorch/runtime/platform/compiler.h +8 -0
  105. package/third-party/include/executorch/runtime/platform/platform.h +126 -0
  106. package/third-party/include/headeronly/macros/Export.h +88 -0
  107. package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
  108. package/third-party/include/torch/headeronly/macros/Export.h +88 -0
  109. package/third-party/ios/ExecutorchLib.xcframework/Info.plist +43 -0
  110. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  111. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
  112. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  113. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
  114. package/third-party/ios/libs/cpuinfo/libcpuinfo.a +0 -0
  115. package/third-party/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a +0 -0
  116. package/third-party/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a +0 -0
  117. package/ios/libs/executorch/libbackend_coreml_ios.a +0 -0
  118. package/ios/libs/executorch/libbackend_coreml_simulator.a +0 -0
  119. package/ios/libs/executorch/libbackend_mps_ios.a +0 -0
  120. package/ios/libs/executorch/libbackend_mps_simulator.a +0 -0
  121. package/ios/libs/executorch/libbackend_xnnpack_ios.a +0 -0
  122. package/ios/libs/executorch/libbackend_xnnpack_simulator.a +0 -0
  123. package/ios/libs/executorch/libexecutorch_ios.a +0 -0
  124. package/ios/libs/executorch/libexecutorch_simulator.a +0 -0
  125. package/ios/libs/executorch/libkernels_custom_ios.a +0 -0
  126. package/ios/libs/executorch/libkernels_custom_simulator.a +0 -0
  127. package/ios/libs/executorch/libkernels_optimized_ios.a +0 -0
  128. package/ios/libs/executorch/libkernels_optimized_simulator.a +0 -0
  129. package/ios/libs/executorch/libkernels_portable_ios.a +0 -0
  130. package/ios/libs/executorch/libkernels_portable_simulator.a +0 -0
  131. package/ios/libs/executorch/libkernels_quantized_ios.a +0 -0
  132. package/ios/libs/executorch/libkernels_quantized_simulator.a +0 -0
  133. package/third-party/ios/ios.toolchain.cmake +0 -1122
  134. /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
  135. /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a +0 -0
  136. /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
  137. /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
  138. /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a +0 -0
  139. /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
@@ -0,0 +1,364 @@
1
+ // HighPerformanceThreadPool.h
2
+ #pragma once
3
+
4
+ #include <algorithm>
5
+ #include <atomic>
6
+ #include <chrono>
7
+ #include <condition_variable>
8
+ #include <fstream>
9
+ #include <functional>
10
+ #include <future>
11
+ #include <memory>
12
+ #include <mutex>
13
+ #include <pthread.h>
14
+ #include <queue>
15
+ #include <ranges>
16
+ #include <sched.h>
17
+ #include <sys/resource.h>
18
+ #include <thread>
19
+ #include <unistd.h>
20
+ #include <vector>
21
+
22
+ #include <executorch/extension/threadpool/cpuinfo_utils.h>
23
+ #include <rnexecutorch/Log.h>
24
+
25
+ #ifdef __APPLE__
26
+ #include <sys/syscall.h>
27
+ #endif
28
+
29
+ #ifdef __ANDROID__
30
+ #include <sys/types.h>
31
+ #endif
32
+
33
+ namespace rnexecutorch::threads {
34
+
35
+ enum class Priority { LOW, NORMAL, HIGH, REALTIME };
36
+
37
+ struct ThreadConfig {
38
+ bool pinToPerformanceCores{true};
39
+ std::string namePrefix{"RN_ET_Worker"};
40
+ };
41
+
42
+ class HighPerformanceThreadPool {
43
+ public:
44
+ explicit HighPerformanceThreadPool(size_t numThreads = 1,
45
+ ThreadConfig cfg = ThreadConfig())
46
+ : config(std::move(cfg)) {
47
+
48
+ #ifdef __ANDROID__
49
+ detectCPUTopology();
50
+ numThreads = std::min(numThreads, performanceCores.size());
51
+ #endif
52
+
53
+ for (size_t i = 0; i < numThreads; i++) {
54
+ workers.emplace_back(&HighPerformanceThreadPool::workerThread, this, i);
55
+ }
56
+
57
+ log(LOG_LEVEL::Debug, "Thread pool initialized with", numThreads,
58
+ "workers.");
59
+ }
60
+
61
+ ~HighPerformanceThreadPool() { shutdown(); }
62
+
63
+ // Submit a task and get a future for the result
64
+ template <typename Func, typename... Args>
65
+ auto submit(Func &&func, Args &&...args)
66
+ -> std::future<decltype(func(args...))> {
67
+ return submitWithPriority(Priority::NORMAL, std::forward<Func>(func),
68
+ std::forward<Args>(args)...);
69
+ }
70
+
71
+ // Submit a task with specific priority
72
+ template <typename Func, typename... Args>
73
+ auto submitWithPriority(Priority priority, Func &&func, Args &&...args)
74
+ -> std::future<decltype(func(args...))> {
75
+
76
+ using ReturnType = decltype(func(args...));
77
+
78
+ // Create a packaged task
79
+ auto boundFunc =
80
+ std::bind(std::forward<Func>(func), std::forward<Args>(args)...);
81
+ auto task = std::make_unique<Task<decltype(boundFunc), ReturnType>>(
82
+ std::move(boundFunc));
83
+ auto future = task->getFuture();
84
+
85
+ // Add to queue
86
+ {
87
+ std::scoped_lock lock(queueMutex);
88
+
89
+ if (!running) {
90
+ throw std::runtime_error("Thread pool is shutting down");
91
+ }
92
+
93
+ WorkItem item(std::move(task), priority,
94
+ std::chrono::steady_clock::now());
95
+
96
+ taskQueue.push(std::move(item));
97
+ }
98
+
99
+ condition.notify_one();
100
+ return future;
101
+ }
102
+
103
+ // Execute a task and wait for result
104
+ template <typename Func, typename... Args>
105
+ auto execute(Func &&func, Args &&...args) -> decltype(func(args...)) {
106
+ auto future = submit(std::forward<Func>(func), std::forward<Args>(args)...);
107
+ return future.get();
108
+ }
109
+
110
+ // Fire and forget task
111
+ template <typename Func, typename... Args>
112
+ void submitDetached(Func &&func, Args &&...args) {
113
+ submit(std::forward<Func>(func), std::forward<Args>(args)...);
114
+ // Future is destroyed, task still runs
115
+ }
116
+
117
+ void shutdown() {
118
+ if (!running.exchange(false)) {
119
+ return;
120
+ }
121
+
122
+ condition.notify_all();
123
+
124
+ for (auto &worker : workers) {
125
+ if (worker.joinable()) {
126
+ worker.join();
127
+ }
128
+ }
129
+ }
130
+
131
+ private:
132
+ // Task wrapper that can hold any callable
133
+ class ITask {
134
+ public:
135
+ virtual ~ITask() = default;
136
+ virtual void execute() = 0;
137
+ };
138
+
139
+ template <typename Func, typename Result> class Task : public ITask {
140
+ public:
141
+ Task(Func &&f) : func(std::forward<Func>(f)) {}
142
+
143
+ void execute() override {
144
+ try {
145
+ if constexpr (std::is_void_v<Result>) {
146
+ func();
147
+ promise.set_value();
148
+ } else {
149
+ promise.set_value(func());
150
+ }
151
+ } catch (...) {
152
+ promise.set_exception(std::current_exception());
153
+ }
154
+ }
155
+
156
+ std::future<Result> getFuture() { return promise.get_future(); }
157
+
158
+ private:
159
+ Func func;
160
+ std::promise<Result> promise;
161
+ };
162
+
163
+ class WorkItem {
164
+ public:
165
+ WorkItem() = default;
166
+ WorkItem(std::unique_ptr<ITask> task, Priority priority,
167
+ std::chrono::steady_clock::time_point enqueueTime)
168
+ : task(std::move(task)), priority(priority), enqueueTime(enqueueTime) {}
169
+
170
+ std::unique_ptr<ITask> task;
171
+
172
+ bool operator<(const WorkItem &other) const {
173
+ return priority != other.priority ? priority < other.priority
174
+ : enqueueTime > other.enqueueTime;
175
+ }
176
+
177
+ private:
178
+ Priority priority;
179
+ std::chrono::steady_clock::time_point enqueueTime;
180
+ };
181
+
182
+ // Thread pool state
183
+ std::vector<std::thread> workers;
184
+ std::priority_queue<WorkItem> taskQueue;
185
+ std::mutex queueMutex;
186
+ std::condition_variable condition;
187
+ std::atomic<bool> running{true};
188
+ std::atomic<size_t> activeWorkers{0};
189
+ std::atomic<size_t> totalTasksProcessed{0};
190
+
191
+ #ifdef __ANDROID__
192
+ // Performance cores
193
+ std::vector<int32_t> performanceCores;
194
+ std::vector<int32_t> efficiencyCores;
195
+ #endif
196
+
197
+ // Configuration
198
+ ThreadConfig config;
199
+
200
+ void detectCPUTopology() {
201
+ #ifdef __ANDROID__
202
+ struct CoreInfo {
203
+ int32_t id;
204
+ int64_t maxFreq;
205
+ };
206
+
207
+ std::vector<CoreInfo> cores;
208
+ const auto numOfCores = std::thread::hardware_concurrency();
209
+
210
+ for (int32_t i = 0; std::cmp_less(i, numOfCores); ++i) {
211
+ std::string path = "/sys/devices/system/cpu/cpu" + std::to_string(i) +
212
+ "/cpufreq/cpuinfo_max_freq";
213
+ std::ifstream file(path);
214
+ if (!file.good()) {
215
+ break;
216
+ }
217
+
218
+ CoreInfo info;
219
+ info.id = i;
220
+ file >> info.maxFreq;
221
+ cores.push_back(info);
222
+ }
223
+
224
+ if (cores.empty()) {
225
+ log(LOG_LEVEL::Debug, "Could not detect CPU topology");
226
+ return;
227
+ }
228
+
229
+ // Sort by frequency
230
+ std::ranges::sort(cores, [](const CoreInfo &a, const CoreInfo &b) {
231
+ return a.maxFreq > b.maxFreq;
232
+ });
233
+
234
+ // Classify cores
235
+ const auto numOfPerfCores =
236
+ ::executorch::extension::cpuinfo::get_num_performant_cores();
237
+
238
+ constexpr float kKiloToGigaRatio = 1e6;
239
+ for (int32_t i = 0; i < cores.size(); ++i) {
240
+ if (i < numOfPerfCores) {
241
+ performanceCores.push_back(cores[i].id);
242
+ log(LOG_LEVEL::Debug, "Performance core:", cores[i].id, "(",
243
+ cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
244
+ } else {
245
+ efficiencyCores.push_back(cores[i].id);
246
+ log(LOG_LEVEL::Debug, "Efficiency core:", cores[i].id, "(",
247
+ cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
248
+ }
249
+ }
250
+ #endif
251
+ }
252
+
253
+ #ifdef __ANDROID__
254
+ inline uint64_t getCurrentThreadId() { return gettid(); }
255
+ #endif
256
+
257
+ inline void setCurrentThreadName(const std::string &name) {
258
+ #ifdef __ANDROID__
259
+ pthread_setname_np(pthread_self(), name.c_str());
260
+ #elif defined(__APPLE__)
261
+ pthread_setname_np(name.c_str());
262
+ #endif
263
+ }
264
+
265
+ void configureThread(uint32_t workerIndex) {
266
+ std::string threadName = config.namePrefix + std::to_string(workerIndex);
267
+ setCurrentThreadName(threadName.c_str());
268
+
269
+ #ifdef __ANDROID__
270
+ if (config.pinToPerformanceCores && !performanceCores.empty()) {
271
+ setCPUAffinity();
272
+ }
273
+ #endif
274
+
275
+ setThreadPriority();
276
+
277
+ log(LOG_LEVEL::Debug, "Worker", workerIndex,
278
+ "configured:", threadName.c_str());
279
+ }
280
+
281
+ void setCPUAffinity() {
282
+ // AFAIK it is not possible on iOS
283
+ #ifdef __ANDROID__
284
+ if (performanceCores.empty()) {
285
+ log(LOG_LEVEL::Error, "No cores specified for affinity setting");
286
+ return;
287
+ }
288
+
289
+ cpu_set_t cpuset;
290
+ CPU_ZERO(&cpuset);
291
+
292
+ for (int32_t core : performanceCores) {
293
+ CPU_SET(core, &cpuset);
294
+ }
295
+
296
+ pid_t tid = getCurrentThreadId();
297
+ log(LOG_LEVEL::Debug, "Thread id", tid);
298
+ if (sched_setaffinity(tid, sizeof(cpuset), &cpuset) == 0) {
299
+ log(LOG_LEVEL::Debug, "Thread pinned to cores:", performanceCores);
300
+ } else {
301
+ log(LOG_LEVEL::Debug, "Failed to set CPU affinity (error:", errno,
302
+ "). Continuing without affinity.");
303
+ }
304
+ #endif
305
+ }
306
+
307
+ void setThreadPriority() {
308
+ // pthread_setschedparam doesn't work on android because permissions reasons
309
+ // and in general does not provide visible improvements on iOS
310
+
311
+ // Set nice value as fallback or additional priority boost
312
+ constexpr int nice_value = 0;
313
+ if (setpriority(PRIO_PROCESS, 0, nice_value) != 0) {
314
+ log(LOG_LEVEL::Debug, "Failed to set nice value");
315
+ } else {
316
+ log(LOG_LEVEL::Debug, "Set nice value", nice_value);
317
+ }
318
+ }
319
+
320
+ void processTask(const WorkItem &item) {
321
+ activeWorkers++;
322
+
323
+ try {
324
+ item.task->execute();
325
+ } catch (const std::exception &e) {
326
+ log(LOG_LEVEL::Error, "Task failed:", e.what());
327
+ activeWorkers--;
328
+ throw;
329
+ }
330
+
331
+ activeWorkers--;
332
+ totalTasksProcessed++;
333
+ }
334
+
335
+ void workerThread(int workerIndex) {
336
+ configureThread(workerIndex);
337
+
338
+ while (running) {
339
+ WorkItem item;
340
+
341
+ {
342
+ std::unique_lock<std::mutex> lock(queueMutex);
343
+ condition.wait(lock, [this] { return !taskQueue.empty() || !running; });
344
+
345
+ if (!running && taskQueue.empty()) {
346
+ break;
347
+ }
348
+
349
+ if (!taskQueue.empty()) {
350
+ item = std::move(const_cast<WorkItem &>(taskQueue.top()));
351
+ taskQueue.pop();
352
+ } else {
353
+ continue;
354
+ }
355
+ }
356
+
357
+ processTask(item);
358
+ }
359
+
360
+ log(LOG_LEVEL::Debug, "Worker", workerIndex, "shutting down");
361
+ }
362
+ };
363
+
364
+ } // namespace rnexecutorch::threads
@@ -0,0 +1,29 @@
1
+ #pragma once
2
+
3
+ #include <executorch/extension/threadpool/cpuinfo_utils.h>
4
+ #include <executorch/extension/threadpool/threadpool.h>
5
+ #include <rnexecutorch/Log.h>
6
+
7
+ namespace rnexecutorch::threads::utils {
8
+
9
+ void unsafeSetupThreadPool(uint32_t num_of_cores = 0) {
10
+ auto num_of_perf_cores =
11
+ ::executorch::extension::cpuinfo::get_num_performant_cores();
12
+ log(LOG_LEVEL::Info, "Detected ", num_of_perf_cores, " performant cores");
13
+ // setting num_of_cores to floor(num_of_perf_cores / 2) + 1) because
14
+ // depending on cpu arch as when possible we want to leave at least 2
15
+ // performant cores for other tasks (setting more actually results in drop
16
+ // of performance). For older devices (i.e. samsung s22) resolves to 3
17
+ // cores, and for newer ones (like OnePlus 12) resolves to 4, which when
18
+ // benchmarked gives highest throughput. For iPhones they usually have 2
19
+ // performance cores
20
+ auto _num_of_cores = num_of_cores
21
+ ? num_of_cores
22
+ : static_cast<uint32_t>(num_of_perf_cores / 2) + 1;
23
+ const auto threadpool = ::executorch::extension::threadpool::get_threadpool();
24
+ threadpool->_unsafe_reset_threadpool(_num_of_cores);
25
+ log(LOG_LEVEL::Info, "Configuring xnnpack for",
26
+ threadpool->get_thread_count(), "threads");
27
+ }
28
+
29
+ } // namespace rnexecutorch::threads::utils
@@ -222,9 +222,6 @@ Error Runner::generate(const std::string &prompt,
222
222
  RUNNER_ET_LOG(warmup, "RSS after prompt prefill: %f MiB (0 if unsupported)",
223
223
  llm::get_rss_bytes() / 1024.0 / 1024.0);
224
224
 
225
- if (cur_decoded != "�") {
226
- wrapped_callback(cur_decoded);
227
- }
228
225
  // start the main loop
229
226
  prompt_tokens_uint64.push_back(cur_token);
230
227
  int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate(
@@ -275,4 +272,13 @@ void Runner::stop() {
275
272
  ET_LOG(Error, "Token generator is not loaded, cannot stop");
276
273
  }
277
274
  }
275
+
276
+ void Runner::set_count_interval(size_t count_interval) {
277
+ text_token_generator_->set_count_interval(count_interval);
278
+ }
279
+
280
+ void Runner::set_time_interval(size_t time_interval) {
281
+ text_token_generator_->set_time_interval(time_interval);
282
+ }
283
+
278
284
  } // namespace example
@@ -43,8 +43,12 @@ public:
43
43
  stats_callback = {},
44
44
  bool echo = true, bool warming = false);
45
45
  ::executorch::runtime::Error warmup(const std::string &prompt);
46
+ void set_count_interval(size_t count_interval);
47
+ void set_time_interval(size_t time_interval);
46
48
  void stop();
47
49
 
50
+ ::executorch::extension::llm::Stats stats_;
51
+
48
52
  private:
49
53
  float temperature_;
50
54
  bool shouldStop_{false};
@@ -59,9 +63,6 @@ private:
59
63
  std::unique_ptr<::executorch::extension::llm::TextPrefiller> text_prefiller_;
60
64
  std::unique_ptr<::executorch::extension::llm::TextTokenGenerator>
61
65
  text_token_generator_;
62
-
63
- // stats
64
- ::executorch::extension::llm::Stats stats_;
65
66
  };
66
67
 
67
68
  } // namespace example
@@ -11,6 +11,7 @@
11
11
 
12
12
  #include "stats.h"
13
13
  #include "text_decoder_runner.h"
14
+ #include <chrono>
14
15
  #include <executorch/extension/tensor/tensor.h>
15
16
  #include <iostream>
16
17
  #include <tokenizers-cpp/tokenizers_cpp.h>
@@ -27,7 +28,7 @@ public:
27
28
  Stats *stats)
28
29
  : tokenizer_(tokenizer), text_decoder_runner_(text_decoder_runner),
29
30
  eos_ids_(std::move(eos_ids)), use_kv_cache_(use_kv_cache),
30
- stats_(stats) {}
31
+ stats_(stats), timestamp_(std::chrono::high_resolution_clock::now()) {}
31
32
 
32
33
  /**
33
34
  * Token generation loop.
@@ -55,12 +56,8 @@ public:
55
56
  uint64_t prev_token;
56
57
  // cache to keep tokens if they were decoded into illegal character
57
58
  std::vector<int32_t> token_cache;
58
- // if first token after prefill was part of multi-token character we need to
59
- // add this to cache here
60
- if (tokenizer_->Decode(
61
- std::vector<int32_t>{static_cast<int32_t>(cur_token)}) == "�") {
62
- token_cache.push_back(static_cast<int32_t>(cur_token));
63
- }
59
+ // add first token after prefill to cache here
60
+ token_cache.push_back(static_cast<int32_t>(cur_token));
64
61
 
65
62
  if (use_kv_cache_) {
66
63
  // hard code these to size 1 as kv cache is locked to static size right
@@ -79,7 +76,7 @@ public:
79
76
  from_blob(&pos, {1}, executorch::aten::ScalarType::Long);
80
77
 
81
78
  should_stop_ = false;
82
-
79
+ timestamp_ = std::chrono::high_resolution_clock::now();
83
80
  // Generate our tokens
84
81
  while (pos < seq_len - 1) {
85
82
  // Run the model
@@ -112,9 +109,19 @@ public:
112
109
  token_cache.push_back(static_cast<int32_t>(cur_token));
113
110
  const std::string cache_decoded = tokenizer_->Decode(token_cache);
114
111
 
115
- if (cache_decoded != "�" && cache_decoded != " �") {
112
+ const auto timeIntervalElapsed =
113
+ std::chrono::duration_cast<std::chrono::milliseconds>(
114
+ std::chrono::high_resolution_clock::now() - timestamp_) >
115
+ time_interval_;
116
+ const auto countIntervalElapsed = token_cache.size() > count_interval_;
117
+ const auto eos_reached = eos_ids_->contains(cur_token);
118
+
119
+ if (!cache_decoded.ends_with("�") &&
120
+ (countIntervalElapsed || timeIntervalElapsed || should_stop_ ||
121
+ eos_reached)) {
116
122
  token_callback(cache_decoded);
117
123
  token_cache.clear();
124
+ timestamp_ = std::chrono::high_resolution_clock::now();
118
125
  }
119
126
 
120
127
  if (should_stop_) {
@@ -122,7 +129,7 @@ public:
122
129
  }
123
130
 
124
131
  // data-dependent terminating condition: we have n_eos_ number of EOS
125
- if (eos_ids_->find(cur_token) != eos_ids_->end()) {
132
+ if (eos_reached) {
126
133
  printf("\n");
127
134
  ET_LOG(Info, "\nReached to the end of generation");
128
135
  break;
@@ -136,11 +143,22 @@ public:
136
143
  */
137
144
  inline void stop() { should_stop_ = true; }
138
145
 
146
+ void set_count_interval(size_t count_interval) {
147
+ count_interval_ = count_interval;
148
+ }
149
+
150
+ void set_time_interval(size_t time_interval) {
151
+ time_interval_ = std::chrono::milliseconds(time_interval);
152
+ }
153
+
139
154
  private:
140
155
  tokenizers::Tokenizer *tokenizer_;
141
156
  TextDecoderRunner *text_decoder_runner_;
142
157
  std::unique_ptr<std::unordered_set<uint64_t>> eos_ids_;
143
158
  bool use_kv_cache_;
159
+ size_t count_interval_{10};
160
+ std::chrono::milliseconds time_interval_{120};
161
+ std::chrono::high_resolution_clock::time_point timestamp_;
144
162
 
145
163
  // state machine
146
164
  bool should_stop_ = false;
@@ -84,7 +84,16 @@ export class LLMController {
84
84
  this.nativeModule = global.loadLLM(modelPath, tokenizerPath);
85
85
  this.isReadyCallback(true);
86
86
  this.onToken = data => {
87
- if (!data || SPECIAL_TOKENS.EOS_TOKEN in this.tokenizerConfig && data === this.tokenizerConfig.eos_token || SPECIAL_TOKENS.PAD_TOKEN in this.tokenizerConfig && data === this.tokenizerConfig.pad_token) {
87
+ if (!data) {
88
+ return;
89
+ }
90
+ if (SPECIAL_TOKENS.EOS_TOKEN in this.tokenizerConfig && data.indexOf(this.tokenizerConfig.eos_token) >= 0) {
91
+ data = data.replaceAll(this.tokenizerConfig.eos_token, '');
92
+ }
93
+ if (SPECIAL_TOKENS.PAD_TOKEN in this.tokenizerConfig && data.indexOf(this.tokenizerConfig.pad_token) >= 0) {
94
+ data = data.replaceAll(this.tokenizerConfig.pad_token, '');
95
+ }
96
+ if (data.length === 0) {
88
97
  return;
89
98
  }
90
99
  this.tokenCallback(data);
@@ -100,13 +109,20 @@ export class LLMController {
100
109
  }
101
110
  configure({
102
111
  chatConfig,
103
- toolsConfig
112
+ toolsConfig,
113
+ generationConfig
104
114
  }) {
105
115
  this.chatConfig = {
106
116
  ...DEFAULT_CHAT_CONFIG,
107
117
  ...chatConfig
108
118
  };
109
119
  this.toolsConfig = toolsConfig;
120
+ if (generationConfig?.outputTokenBatchSize) {
121
+ this.nativeModule.setCountInterval(generationConfig.outputTokenBatchSize);
122
+ }
123
+ if (generationConfig?.batchTimeInterval) {
124
+ this.nativeModule.setTimeInterval(generationConfig.batchTimeInterval);
125
+ }
110
126
 
111
127
  // reset inner state when loading new configuration
112
128
  this.responseCallback('');
@@ -142,6 +158,9 @@ export class LLMController {
142
158
  interrupt() {
143
159
  this.nativeModule.interrupt();
144
160
  }
161
+ getGeneratedTokenCount() {
162
+ return this.nativeModule.getGeneratedTokenCount();
163
+ }
145
164
  async generate(messages, tools) {
146
165
  if (!this._isReady) {
147
166
  throw new Error(getError(ETError.ModuleNotLoaded));
@@ -1 +1 @@
1
- {"version":3,"names":["ResourceFetcher","ETError","getError","Template","DEFAULT_CHAT_CONFIG","readAsStringAsync","SPECIAL_TOKENS","parseToolCall","Logger","LLMController","chatConfig","_response","_isReady","_isGenerating","_messageHistory","constructor","tokenCallback","responseCallback","messageHistoryCallback","isReadyCallback","isGeneratingCallback","undefined","warn","token","response","messageHistory","isReady","isGenerating","load","modelSource","tokenizerSource","tokenizerConfigSource","onDownloadProgressCallback","initialMessageHistory","tokenizersPromise","fetch","modelPromise","tokenizersResults","modelResult","Promise","all","tokenizerPath","tokenizerConfigPath","modelPath","Error","tokenizerConfig","JSON","parse","nativeModule","global","loadLLM","onToken","data","EOS_TOKEN","eos_token","PAD_TOKEN","pad_token","e","setTokenCallback","configure","toolsConfig","delete","ModelGenerating","unload","forward","input","ModuleNotLoaded","generate","interrupt","messages","tools","length","role","renderedChat","applyChatTemplate","tools_in_user_message","add_generation_prompt","sendMessage","message","content","messageHistoryWithPrompt","systemPrompt","slice","contextWindowLength","displayToolCalls","toolCalls","toolCall","executeToolCallback","then","toolResponse","deleteMessage","index","newMessageHistory","templateFlags","chat_template","template","specialTokens","Object","fromEntries","keys","filter","key","map","result","render"],"sourceRoot":"../../../src","sources":["controllers/LLMController.ts"],"mappings":";;AACA,SAASA,eAAe,QAAQ,0BAA0B;AAC1D,SAASC,OAAO,EAAEC,QAAQ,QAAQ,UAAU;AAC5C,SAASC,QAAQ,QAAQ,oBAAoB;AAC7C,SAASC,mBAAmB,QAAQ,0BAA0B;AAC9D,SAASC,iBAAiB,QAAQ,kBAAkB;AACpD,SAIEC,cAAc,QAET,cAAc;AACrB,SAASC,aAAa,QAAQ,cAAc;AAC5C,SAASC,MAAM,QAAQ,kBAAkB;AAEzC,OAAO,MAAMC,aAAa,CAAC;EAEjBC,UAAU,GAAeN,mBAAmB;EAI5CO,SAAS,GAAG,EAAE;EACdC,QAAQ,GAAG,KAAK;EAChBC,aAAa,GAAG,KAAK;EACrBC,eAAe,GAAc,EAAE;;EAEvC;;EAOAC,WAAWA,CAAC;IACVC,aAAa;IACbC,gBAAgB;IAChBC,sBAAsB;IACtBC,eAAe;IACfC;EAOF,CAAC,EAAE;IACD,IAAIH,gBAAgB,KAAKI,SAAS,EAAE;MAClCb,MAAM,CAACc,IAAI,CACT,sEACF,CAAC;IACH;IACA,IAAI,CAACN,aAAa,GAAIO,KAAK,IAAK;MAC9BP,aAAa,GAAGO,KAAK,CAAC;IACxB,CAAC;IACD,IAAI,CAACN,gBAAgB,GAAIO,QAAQ,IAAK;MACpC,IAAI,CAACb,SAAS,GAAGa,QAAQ;MACzBP,gBAAgB,GAAGO,QAAQ,CAAC;IAC9B,CAAC;IACD,IAAI,CAACN,sBAAsB,GAAIO,cAAc,IAAK;MAChD,IAAI,CAACX,eAAe,GAAGW,cAAc;MACrCP,sBAAsB,GAAGO,cAAc,CAAC;IAC1C,CAAC;IACD,IAAI,CAACN,eAAe,GAAIO,OAAO,IAAK;MAClC,IAAI,CAACd,QAAQ,GAAGc,OAAO;MACvBP,eAAe,GAAGO,OAAO,CAAC;IAC5B,CAAC;IACD,IAAI,CAACN,oBAAoB,GAAIO,YAAY,IAAK;MAC5C,IAAI,CAACd,aAAa,GAAGc,YAAY;MACjCP,oBAAoB,GAAGO,YAAY,CAAC;IACtC,CAAC;EACH;EAEA,IAAWH,QAAQA,CAAA,EAAG;IACpB,OAAO,IAAI,CAACb,SAAS;EACvB;EACA,IAAWe,OAAOA,CAAA,EAAG;IACnB,OAAO,IAAI,CAACd,QAAQ;EACtB;EACA,IAAWe,YAAYA,CAAA,EAAG;IACxB,OAAO,IAAI,CAACd,aAAa;EAC3B;EACA,IAAWY,cAAcA,CAAA,EAAG;IAC1B,OAAO,IAAI,CAACX,eAAe;EAC7B;EAEA,MAAac,IAAIA,CAAC;IAChBC,WAAW;IACXC,eAAe;IACfC,qBAAqB;IACrBC;EAMF,CAAC,EAAE;IACD;IACA,IAAI,CAACf,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;IAChC,IAAI,CAACD,eAAe,CAAC,KAAK,CAAC;IAE3B,IAAI;MACF,MAAMe,iBAAiB,GAAGlC,eAAe,CAACmC,KAAK,CAC7Cd,SAAS,EACTS,eAAe,EACfC,qBACF,CAAC;MAED,MAAMK,YAAY,GAAGpC,eAAe,CAACmC,KAAK,CACxCH,0BAA0B,EAC1BH,WACF,CAAC;MAED,MAAM,CAACQ,iBAAiB,EAAEC,WAAW,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CACzDN,iBAAiB,EACjBE,YAAY,CACb,CAAC;MAEF,MAAMK,aAAa,GAAGJ,iBAAiB,GAAG,CAAC,CAAC;MAC5C,MAAMK,mBAAmB,GAAGL,iBAAiB,GAAG,CAAC,CAAC;MAClD,MAAMM,SAAS,GAAGL,WAAW,GAAG,CAAC,CAAC;MAElC,IAAI,CAACG,aAAa,IAAI,CAACC,mBAAmB,IAAI,CAACC,SAAS,EAAE;QACxD,MAAM,IAAIC,KAAK,CAAC,uBAAuB,CAAC;MAC1C;MAEA,IAAI,CAACC,eAAe,GAAGC,IAAI,CAACC,KAAK,CAC/B,MAAM1C,iBAAiB,CAAC,SAAS,GAAGqC,mBAAoB,CAC1D,CAAC;MACD,IAAI,CAACM,YAAY,GAAGC,MAAM,CAACC,OAAO,CAACP,SAAS,EAAEF,aAAa,CAAC;MAC5D,IAAI,CAACtB,eAAe,CAAC,IAAI,CAAC;MAC1B,IAAI,CAACgC,OAAO,GAAIC,IAAY,IAAK;QAC/B,IACE,CAACA,IAAI,IACJ9C,cAAc,CAAC+C,SAAS,IAAI,IAAI,CAACR,eAAe,IAC/CO,IAAI,KAAK,IAAI,CAACP,eAAe,CAACS,SAAU,IACzChD,cAAc,CAACiD,SAAS,IAAI,IAAI,CAACV,eAAe,IAC/CO,IAAI,KAAK,IAAI,CAACP,eAAe,CAACW,SAAU,EAC1C;UACA;QACF;QAEA,IAAI,CAACxC,aAAa,CAACoC,IAAI,CAAC;QACxB,IAAI,CAACnC,gBAAgB,CAAC,IAAI,CAACN,SAAS,GAAGyC,IAAI,CAAC;MAC9C,CAAC;IACH,CAAC,CAAC,OAAOK,CAAC,EAAE;MACV,IAAI,CAACtC,eAAe,CAAC,KAAK,CAAC;MAC3B,MAAM,IAAIyB,KAAK,CAAC1C,QAAQ,CAACuD,CAAC,CAAC,CAAC;IAC9B;EACF;EAEOC,gBAAgBA,CAAC1C,aAAsC,EAAE;IAC9D,IAAI,CAACA,aAAa,GAAGA,aAAa;EACpC;EAEO2C,SAASA,CAAC;IACfjD,UAAU;IACVkD;EAIF,CAAC,EAAE;IACD,IAAI,CAAClD,UAAU,GAAG;MAAE,GAAGN,mBAAmB;MAAE,GAAGM;IAAW,CAAC;IAC3D,IAAI,CAACkD,WAAW,GAAGA,WAAW;;IAE9B;IACA,IAAI,CAAC3C,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEOyC,MAAMA,CAAA,EAAG;IACd,IAAI,IAAI,CAAChD,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CACb1C,QAAQ,CAACD,OAAO,CAAC6D,eAAe,CAAC,GAC/B,+DACJ,CAAC;IACH;IACA,IAAI,CAACX,OAAO,GAAG,MAAM,CAAC,CAAC;IACvB,IAAI,CAACH,YAAY,CAACe,MAAM,CAAC,CAAC;IAC1B,IAAI,CAAC5C,eAAe,CAAC,KAAK,CAAC;IAC3B,IAAI,CAACC,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEA,MAAa4C,OAAOA,CAACC,KAAa,EAAE;IAClC,IAAI,CAAC,IAAI,CAACrD,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACiE,eAAe,CAAC,CAAC;IACpD;IACA,IAAI,IAAI,CAACrD,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAAC6D,eAAe,CAAC,CAAC;IACpD;IACA,IAAI;MACF,IAAI,CAAC7C,gBAAgB,CAAC,EAAE,CAAC;MACzB,IAAI,CAACG,oBAAoB,CAAC,IAAI,CAAC;MAC/B,MAAM,IAAI,CAAC4B,YAAY,CAACmB,QAAQ,CAACF,KAAK,EAAE,IAAI,CAACd,OAAO,CAAC;IACvD,CAAC,CAAC,OAAOM,CAAC,EAAE;MACV,MAAM,IAAIb,KAAK,CAAC1C,QAAQ,CAACuD,CAAC,CAAC,CAAC;IAC9B,CAAC,SAAS;MACR,IAAI,CAACrC,oBAAoB,CAAC,KAAK,CAAC;IAClC;EACF;EAEOgD,SAASA,CAAA,EAAG;IACjB,IAAI,CAACpB,YAAY,CAACoB,SAAS,CAAC,CAAC;EAC/B;EAEA,MAAaD,QAAQA,CAACE,QAAmB,EAAEC,KAAiB,EAAE;IAC5D,IAAI,CAAC,IAAI,CAAC1D,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACiE,eAAe,CAAC,CAAC;IACpD;IACA,IAAIG,QAAQ,CAACE,MAAM,KAAK,CAAC,EAAE;MACzB,MAAM,IAAI3B,KAAK,CAAC,yBAAyB,CAAC;IAC5C;IACA,IAAIyB,QAAQ,CAAC,CAAC,CAAC,IAAIA,QAAQ,CAAC,CAAC,CAAC,CAACG,IAAI,KAAK,QAAQ,EAAE;MAChDhE,MAAM,CAACc,IAAI,CACT,0LACF,CAAC;IACH;IAEA,MAAMmD,YAAoB,GAAG,IAAI,CAACC,iBAAiB,CACjDL,QAAQ,EACR,IAAI,CAACxB,eAAe,EACpByB,KAAK;IACL;IACA;MAAEK,qBAAqB,EAAE,KAAK;MAAEC,qBAAqB,EAAE;IAAK,CAC9D,CAAC;IAED,MAAM,IAAI,CAACZ,OAAO,CAACS,YAAY,CAAC;EAClC;EAEA,MAAaI,WAAWA,CAACC,OAAe,EAAE;IACxC,IAAI,CAAC5D,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;MAAEiE,OAAO,EAAED,OAAO;MAAEN,IAAI,EAAE;IAAO,CAAC,CACnC,CAAC;IAEF,MAAMQ,wBAAmC,GAAG,CAC1C;MAAED,OAAO,EAAE,IAAI,CAACrE,UAAU,CAACuE,YAAY;MAAET,IAAI,EAAE;IAAS,CAAC,EACzD,GAAG,IAAI,CAAC1D,eAAe,CAACoE,KAAK,CAAC,CAAC,IAAI,CAACxE,UAAU,CAACyE,mBAAmB,CAAC,CACpE;IAED,MAAM,IAAI,CAAChB,QAAQ,CAACa,wBAAwB,EAAE,IAAI,CAACpB,WAAW,EAAEU,KAAK,CAAC;IAEtE,IAAI,CAAC,IAAI,CAACV,WAAW,IAAI,IAAI,CAACA,WAAW,CAACwB,gBAAgB,EAAE;MAC1D,IAAI,CAAClE,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;QAAEiE,OAAO,EAAE,IAAI,CAACpE,SAAS;QAAE6D,IAAI,EAAE;MAAY,CAAC,CAC/C,CAAC;IACJ;IACA,IAAI,CAAC,IAAI,CAACZ,WAAW,EAAE;MACrB;IACF;IAEA,MAAMyB,SAAS,GAAG9E,aAAa,CAAC,IAAI,CAACI,SAAS,CAAC;IAE/C,KAAK,MAAM2E,QAAQ,IAAID,SAAS,EAAE;MAChC,IAAI,CAACzB,WAAW,CACb2B,mBAAmB,CAACD,QAAQ,CAAC,CAC7BE,IAAI,CAAEC,YAA2B,IAAK;QACrC,IAAIA,YAAY,EAAE;UAChB,IAAI,CAACvE,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;YAAEiE,OAAO,EAAEU,YAAY;YAAEjB,IAAI,EAAE;UAAY,CAAC,CAC7C,CAAC;QACJ;MACF,CAAC,CAAC;IACN;EACF;EAEOkB,aAAaA,CAACC,KAAa,EAAE;IAClC;IACA;IACA,MAAMC,iBAAiB,GAAG,IAAI,CAAC9E,eAAe,CAACoE,KAAK,CAAC,CAAC,EAAES,KAAK,CAAC;IAE9D,IAAI,CAACzE,sBAAsB,CAAC0E,iBAAiB,CAAC;EAChD;EAEQlB,iBAAiBA,CACvBL,QAAmB,EACnBxB,eAAoB,EACpByB,KAAiB,EACjBuB,aAAsB,EACd;IACR,IAAI,CAAChD,eAAe,CAACiD,aAAa,EAAE;MAClC,MAAMlD,KAAK,CAAC,gDAAgD,CAAC;IAC/D;IACA,MAAMmD,QAAQ,GAAG,IAAI5F,QAAQ,CAAC0C,eAAe,CAACiD,aAAa,CAAC;IAE5D,MAAME,aAAa,GAAGC,MAAM,CAACC,WAAW,CACtCD,MAAM,CAACE,IAAI,CAAC7F,cAAc,CAAC,CACxB8F,MAAM,CAAEC,GAAG,IAAKA,GAAG,IAAIxD,eAAe,CAAC,CACvCyD,GAAG,CAAED,GAAG,IAAK,CAACA,GAAG,EAAExD,eAAe,CAACwD,GAAG,CAAC,CAAC,CAC7C,CAAC;IAED,MAAME,MAAM,GAAGR,QAAQ,CAACS,MAAM,CAAC;MAC7BnC,QAAQ;MACRC,KAAK;MACL,GAAGuB,aAAa;MAChB,GAAGG;IACL,CAAC,CAAC;IACF,OAAOO,MAAM;EACf;AACF","ignoreList":[]}
1
+ {"version":3,"names":["ResourceFetcher","ETError","getError","Template","DEFAULT_CHAT_CONFIG","readAsStringAsync","SPECIAL_TOKENS","parseToolCall","Logger","LLMController","chatConfig","_response","_isReady","_isGenerating","_messageHistory","constructor","tokenCallback","responseCallback","messageHistoryCallback","isReadyCallback","isGeneratingCallback","undefined","warn","token","response","messageHistory","isReady","isGenerating","load","modelSource","tokenizerSource","tokenizerConfigSource","onDownloadProgressCallback","initialMessageHistory","tokenizersPromise","fetch","modelPromise","tokenizersResults","modelResult","Promise","all","tokenizerPath","tokenizerConfigPath","modelPath","Error","tokenizerConfig","JSON","parse","nativeModule","global","loadLLM","onToken","data","EOS_TOKEN","indexOf","eos_token","replaceAll","PAD_TOKEN","pad_token","length","e","setTokenCallback","configure","toolsConfig","generationConfig","outputTokenBatchSize","setCountInterval","batchTimeInterval","setTimeInterval","delete","ModelGenerating","unload","forward","input","ModuleNotLoaded","generate","interrupt","getGeneratedTokenCount","messages","tools","role","renderedChat","applyChatTemplate","tools_in_user_message","add_generation_prompt","sendMessage","message","content","messageHistoryWithPrompt","systemPrompt","slice","contextWindowLength","displayToolCalls","toolCalls","toolCall","executeToolCallback","then","toolResponse","deleteMessage","index","newMessageHistory","templateFlags","chat_template","template","specialTokens","Object","fromEntries","keys","filter","key","map","result","render"],"sourceRoot":"../../../src","sources":["controllers/LLMController.ts"],"mappings":";;AACA,SAASA,eAAe,QAAQ,0BAA0B;AAC1D,SAASC,OAAO,EAAEC,QAAQ,QAAQ,UAAU;AAC5C,SAASC,QAAQ,QAAQ,oBAAoB;AAC7C,SAASC,mBAAmB,QAAQ,0BAA0B;AAC9D,SAASC,iBAAiB,QAAQ,kBAAkB;AACpD,SAKEC,cAAc,QAET,cAAc;AACrB,SAASC,aAAa,QAAQ,cAAc;AAC5C,SAASC,MAAM,QAAQ,kBAAkB;AAEzC,OAAO,MAAMC,aAAa,CAAC;EAEjBC,UAAU,GAAeN,mBAAmB;EAI5CO,SAAS,GAAG,EAAE;EACdC,QAAQ,GAAG,KAAK;EAChBC,aAAa,GAAG,KAAK;EACrBC,eAAe,GAAc,EAAE;;EAEvC;;EAOAC,WAAWA,CAAC;IACVC,aAAa;IACbC,gBAAgB;IAChBC,sBAAsB;IACtBC,eAAe;IACfC;EAOF,CAAC,EAAE;IACD,IAAIH,gBAAgB,KAAKI,SAAS,EAAE;MAClCb,MAAM,CAACc,IAAI,CACT,sEACF,CAAC;IACH;IACA,IAAI,CAACN,aAAa,GAAIO,KAAK,IAAK;MAC9BP,aAAa,GAAGO,KAAK,CAAC;IACxB,CAAC;IACD,IAAI,CAACN,gBAAgB,GAAIO,QAAQ,IAAK;MACpC,IAAI,CAACb,SAAS,GAAGa,QAAQ;MACzBP,gBAAgB,GAAGO,QAAQ,CAAC;IAC9B,CAAC;IACD,IAAI,CAACN,sBAAsB,GAAIO,cAAc,IAAK;MAChD,IAAI,CAACX,eAAe,GAAGW,cAAc;MACrCP,sBAAsB,GAAGO,cAAc,CAAC;IAC1C,CAAC;IACD,IAAI,CAACN,eAAe,GAAIO,OAAO,IAAK;MAClC,IAAI,CAACd,QAAQ,GAAGc,OAAO;MACvBP,eAAe,GAAGO,OAAO,CAAC;IAC5B,CAAC;IACD,IAAI,CAACN,oBAAoB,GAAIO,YAAY,IAAK;MAC5C,IAAI,CAACd,aAAa,GAAGc,YAAY;MACjCP,oBAAoB,GAAGO,YAAY,CAAC;IACtC,CAAC;EACH;EAEA,IAAWH,QAAQA,CAAA,EAAG;IACpB,OAAO,IAAI,CAACb,SAAS;EACvB;EACA,IAAWe,OAAOA,CAAA,EAAG;IACnB,OAAO,IAAI,CAACd,QAAQ;EACtB;EACA,IAAWe,YAAYA,CAAA,EAAG;IACxB,OAAO,IAAI,CAACd,aAAa;EAC3B;EACA,IAAWY,cAAcA,CAAA,EAAG;IAC1B,OAAO,IAAI,CAACX,eAAe;EAC7B;EAEA,MAAac,IAAIA,CAAC;IAChBC,WAAW;IACXC,eAAe;IACfC,qBAAqB;IACrBC;EAMF,CAAC,EAAE;IACD;IACA,IAAI,CAACf,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;IAChC,IAAI,CAACD,eAAe,CAAC,KAAK,CAAC;IAE3B,IAAI;MACF,MAAMe,iBAAiB,GAAGlC,eAAe,CAACmC,KAAK,CAC7Cd,SAAS,EACTS,eAAe,EACfC,qBACF,CAAC;MAED,MAAMK,YAAY,GAAGpC,eAAe,CAACmC,KAAK,CACxCH,0BAA0B,EAC1BH,WACF,CAAC;MAED,MAAM,CAACQ,iBAAiB,EAAEC,WAAW,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CACzDN,iBAAiB,EACjBE,YAAY,CACb,CAAC;MAEF,MAAMK,aAAa,GAAGJ,iBAAiB,GAAG,CAAC,CAAC;MAC5C,MAAMK,mBAAmB,GAAGL,iBAAiB,GAAG,CAAC,CAAC;MAClD,MAAMM,SAAS,GAAGL,WAAW,GAAG,CAAC,CAAC;MAElC,IAAI,CAACG,aAAa,IAAI,CAACC,mBAAmB,IAAI,CAACC,SAAS,EAAE;QACxD,MAAM,IAAIC,KAAK,CAAC,uBAAuB,CAAC;MAC1C;MAEA,IAAI,CAACC,eAAe,GAAGC,IAAI,CAACC,KAAK,CAC/B,MAAM1C,iBAAiB,CAAC,SAAS,GAAGqC,mBAAoB,CAC1D,CAAC;MACD,IAAI,CAACM,YAAY,GAAGC,MAAM,CAACC,OAAO,CAACP,SAAS,EAAEF,aAAa,CAAC;MAC5D,IAAI,CAACtB,eAAe,CAAC,IAAI,CAAC;MAC1B,IAAI,CAACgC,OAAO,GAAIC,IAAY,IAAK;QAC/B,IAAI,CAACA,IAAI,EAAE;UACT;QACF;QAEA,IACE9C,cAAc,CAAC+C,SAAS,IAAI,IAAI,CAACR,eAAe,IAChDO,IAAI,CAACE,OAAO,CAAC,IAAI,CAACT,eAAe,CAACU,SAAS,CAAC,IAAI,CAAC,EACjD;UACAH,IAAI,GAAGA,IAAI,CAACI,UAAU,CAAC,IAAI,CAACX,eAAe,CAACU,SAAS,EAAE,EAAE,CAAC;QAC5D;QACA,IACEjD,cAAc,CAACmD,SAAS,IAAI,IAAI,CAACZ,eAAe,IAChDO,IAAI,CAACE,OAAO,CAAC,IAAI,CAACT,eAAe,CAACa,SAAS,CAAC,IAAI,CAAC,EACjD;UACAN,IAAI,GAAGA,IAAI,CAACI,UAAU,CAAC,IAAI,CAACX,eAAe,CAACa,SAAS,EAAE,EAAE,CAAC;QAC5D;QACA,IAAIN,IAAI,CAACO,MAAM,KAAK,CAAC,EAAE;UACrB;QACF;QAEA,IAAI,CAAC3C,aAAa,CAACoC,IAAI,CAAC;QACxB,IAAI,CAACnC,gBAAgB,CAAC,IAAI,CAACN,SAAS,GAAGyC,IAAI,CAAC;MAC9C,CAAC;IACH,CAAC,CAAC,OAAOQ,CAAC,EAAE;MACV,IAAI,CAACzC,eAAe,CAAC,KAAK,CAAC;MAC3B,MAAM,IAAIyB,KAAK,CAAC1C,QAAQ,CAAC0D,CAAC,CAAC,CAAC;IAC9B;EACF;EAEOC,gBAAgBA,CAAC7C,aAAsC,EAAE;IAC9D,IAAI,CAACA,aAAa,GAAGA,aAAa;EACpC;EAEO8C,SAASA,CAAC;IACfpD,UAAU;IACVqD,WAAW;IACXC;EAKF,CAAC,EAAE;IACD,IAAI,CAACtD,UAAU,GAAG;MAAE,GAAGN,mBAAmB;MAAE,GAAGM;IAAW,CAAC;IAC3D,IAAI,CAACqD,WAAW,GAAGA,WAAW;IAE9B,IAAIC,gBAAgB,EAAEC,oBAAoB,EAAE;MAC1C,IAAI,CAACjB,YAAY,CAACkB,gBAAgB,CAACF,gBAAgB,CAACC,oBAAoB,CAAC;IAC3E;IACA,IAAID,gBAAgB,EAAEG,iBAAiB,EAAE;MACvC,IAAI,CAACnB,YAAY,CAACoB,eAAe,CAACJ,gBAAgB,CAACG,iBAAiB,CAAC;IACvE;;IAEA;IACA,IAAI,CAAClD,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEOiD,MAAMA,CAAA,EAAG;IACd,IAAI,IAAI,CAACxD,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CACb1C,QAAQ,CAACD,OAAO,CAACqE,eAAe,CAAC,GAC/B,+DACJ,CAAC;IACH;IACA,IAAI,CAACnB,OAAO,GAAG,MAAM,CAAC,CAAC;IACvB,IAAI,CAACH,YAAY,CAACuB,MAAM,CAAC,CAAC;IAC1B,IAAI,CAACpD,eAAe,CAAC,KAAK,CAAC;IAC3B,IAAI,CAACC,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEA,MAAaoD,OAAOA,CAACC,KAAa,EAAE;IAClC,IAAI,CAAC,IAAI,CAAC7D,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACyE,eAAe,CAAC,CAAC;IACpD;IACA,IAAI,IAAI,CAAC7D,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACqE,eAAe,CAAC,CAAC;IACpD;IACA,IAAI;MACF,IAAI,CAACrD,gBAAgB,CAAC,EAAE,CAAC;MACzB,IAAI,CAACG,oBAAoB,CAAC,IAAI,CAAC;MAC/B,MAAM,IAAI,CAAC4B,YAAY,CAAC2B,QAAQ,CAACF,KAAK,EAAE,IAAI,CAACtB,OAAO,CAAC;IACvD,CAAC,CAAC,OAAOS,CAAC,EAAE;MACV,MAAM,IAAIhB,KAAK,CAAC1C,QAAQ,CAAC0D,CAAC,CAAC,CAAC;IAC9B,CAAC,SAAS;MACR,IAAI,CAACxC,oBAAoB,CAAC,KAAK,CAAC;IAClC;EACF;EAEOwD,SAASA,CAAA,EAAG;IACjB,IAAI,CAAC5B,YAAY,CAAC4B,SAAS,CAAC,CAAC;EAC/B;EAEOC,sBAAsBA,CAAA,EAAW;IACtC,OAAO,IAAI,CAAC7B,YAAY,CAAC6B,sBAAsB,CAAC,CAAC;EACnD;EAEA,MAAaF,QAAQA,CAACG,QAAmB,EAAEC,KAAiB,EAAE;IAC5D,IAAI,CAAC,IAAI,CAACnE,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACyE,eAAe,CAAC,CAAC;IACpD;IACA,IAAII,QAAQ,CAACnB,MAAM,KAAK,CAAC,EAAE;MACzB,MAAM,IAAIf,KAAK,CAAC,yBAAyB,CAAC;IAC5C;IACA,IAAIkC,QAAQ,CAAC,CAAC,CAAC,IAAIA,QAAQ,CAAC,CAAC,CAAC,CAACE,IAAI,KAAK,QAAQ,EAAE;MAChDxE,MAAM,CAACc,IAAI,CACT,0LACF,CAAC;IACH;IAEA,MAAM2D,YAAoB,GAAG,IAAI,CAACC,iBAAiB,CACjDJ,QAAQ,EACR,IAAI,CAACjC,eAAe,EACpBkC,KAAK;IACL;IACA;MAAEI,qBAAqB,EAAE,KAAK;MAAEC,qBAAqB,EAAE;IAAK,CAC9D,CAAC;IAED,MAAM,IAAI,CAACZ,OAAO,CAACS,YAAY,CAAC;EAClC;EAEA,MAAaI,WAAWA,CAACC,OAAe,EAAE;IACxC,IAAI,CAACpE,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;MAAEyE,OAAO,EAAED,OAAO;MAAEN,IAAI,EAAE;IAAO,CAAC,CACnC,CAAC;IAEF,MAAMQ,wBAAmC,GAAG,CAC1C;MAAED,OAAO,EAAE,IAAI,CAAC7E,UAAU,CAAC+E,YAAY;MAAET,IAAI,EAAE;IAAS,CAAC,EACzD,GAAG,IAAI,CAAClE,eAAe,CAAC4E,KAAK,CAAC,CAAC,IAAI,CAAChF,UAAU,CAACiF,mBAAmB,CAAC,CACpE;IAED,MAAM,IAAI,CAAChB,QAAQ,CAACa,wBAAwB,EAAE,IAAI,CAACzB,WAAW,EAAEgB,KAAK,CAAC;IAEtE,IAAI,CAAC,IAAI,CAAChB,WAAW,IAAI,IAAI,CAACA,WAAW,CAAC6B,gBAAgB,EAAE;MAC1D,IAAI,CAAC1E,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;QAAEyE,OAAO,EAAE,IAAI,CAAC5E,SAAS;QAAEqE,IAAI,EAAE;MAAY,CAAC,CAC/C,CAAC;IACJ;IACA,IAAI,CAAC,IAAI,CAACjB,WAAW,EAAE;MACrB;IACF;IAEA,MAAM8B,SAAS,GAAGtF,aAAa,CAAC,IAAI,CAACI,SAAS,CAAC;IAE/C,KAAK,MAAMmF,QAAQ,IAAID,SAAS,EAAE;MAChC,IAAI,CAAC9B,WAAW,CACbgC,mBAAmB,CAACD,QAAQ,CAAC,CAC7BE,IAAI,CAAEC,YAA2B,IAAK;QACrC,IAAIA,YAAY,EAAE;UAChB,IAAI,CAAC/E,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;YAAEyE,OAAO,EAAEU,YAAY;YAAEjB,IAAI,EAAE;UAAY,CAAC,CAC7C,CAAC;QACJ;MACF,CAAC,CAAC;IACN;EACF;EAEOkB,aAAaA,CAACC,KAAa,EAAE;IAClC;IACA;IACA,MAAMC,iBAAiB,GAAG,IAAI,CAACtF,eAAe,CAAC4E,KAAK,CAAC,CAAC,EAAES,KAAK,CAAC;IAE9D,IAAI,CAACjF,sBAAsB,CAACkF,iBAAiB,CAAC;EAChD;EAEQlB,iBAAiBA,CACvBJ,QAAmB,EACnBjC,eAAoB,EACpBkC,KAAiB,EACjBsB,aAAsB,EACd;IACR,IAAI,CAACxD,eAAe,CAACyD,aAAa,EAAE;MAClC,MAAM1D,KAAK,CAAC,gDAAgD,CAAC;IAC/D;IACA,MAAM2D,QAAQ,GAAG,IAAIpG,QAAQ,CAAC0C,eAAe,CAACyD,aAAa,CAAC;IAE5D,MAAME,aAAa,GAAGC,MAAM,CAACC,WAAW,CACtCD,MAAM,CAACE,IAAI,CAACrG,cAAc,CAAC,CACxBsG,MAAM,CAAEC,GAAG,IAAKA,GAAG,IAAIhE,eAAe,CAAC,CACvCiE,GAAG,CAAED,GAAG,IAAK,CAACA,GAAG,EAAEhE,eAAe,CAACgE,GAAG,CAAC,CAAC,CAC7C,CAAC;IAED,MAAME,MAAM,GAAGR,QAAQ,CAACS,MAAM,CAAC;MAC7BlC,QAAQ;MACRC,KAAK;MACL,GAAGsB,aAAa;MAChB,GAAGG;IACL,CAAC,CAAC;IACF,OAAOO,MAAM;EACf;AACF","ignoreList":[]}
@@ -51,10 +51,12 @@ export const useLLM = ({
51
51
  // memoization of returned functions
52
52
  const configure = useCallback(({
53
53
  chatConfig,
54
- toolsConfig
54
+ toolsConfig,
55
+ generationConfig
55
56
  }) => controllerInstance.configure({
56
57
  chatConfig,
57
- toolsConfig
58
+ toolsConfig,
59
+ generationConfig
58
60
  }), [controllerInstance]);
59
61
  const generate = useCallback((messages, tools) => {
60
62
  setResponse('');
@@ -66,6 +68,7 @@ export const useLLM = ({
66
68
  }, [controllerInstance]);
67
69
  const deleteMessage = useCallback(index => controllerInstance.deleteMessage(index), [controllerInstance]);
68
70
  const interrupt = useCallback(() => controllerInstance.interrupt(), [controllerInstance]);
71
+ const getGeneratedTokenCount = useCallback(() => controllerInstance.getGeneratedTokenCount(), [controllerInstance]);
69
72
  return {
70
73
  messageHistory,
71
74
  response,
@@ -74,6 +77,7 @@ export const useLLM = ({
74
77
  isGenerating,
75
78
  downloadProgress,
76
79
  error,
80
+ getGeneratedTokenCount: getGeneratedTokenCount,
77
81
  configure: configure,
78
82
  generate: generate,
79
83
  sendMessage: sendMessage,