react-native-executorch 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/libs/classes.jar +0 -0
- package/android/src/main/cpp/CMakeLists.txt +23 -14
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +4 -21
- package/common/rnexecutorch/host_objects/ModelHostObject.h +67 -51
- package/common/rnexecutorch/models/llm/LLM.cpp +24 -1
- package/common/rnexecutorch/models/llm/LLM.h +4 -1
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +2 -5
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +1 -1
- package/common/rnexecutorch/threads/GlobalThreadPool.h +79 -0
- package/common/rnexecutorch/threads/HighPerformanceThreadPool.h +364 -0
- package/common/rnexecutorch/threads/utils/ThreadUtils.h +29 -0
- package/common/runner/runner.cpp +9 -3
- package/common/runner/runner.h +4 -3
- package/common/runner/text_token_generator.h +28 -10
- package/lib/module/controllers/LLMController.js +21 -2
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +6 -2
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/modules/natural_language_processing/LLMModule.js +4 -2
- package/lib/module/modules/natural_language_processing/LLMModule.js.map +1 -1
- package/lib/module/types/llm.js.map +1 -1
- package/lib/typescript/controllers/LLMController.d.ts +4 -2
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts +3 -2
- package/lib/typescript/modules/natural_language_processing/LLMModule.d.ts.map +1 -1
- package/lib/typescript/types/llm.d.ts +7 -1
- package/lib/typescript/types/llm.d.ts.map +1 -1
- package/package.json +3 -1
- package/react-native-executorch.podspec +12 -31
- package/src/controllers/LLMController.ts +29 -5
- package/src/hooks/natural_language_processing/useLLM.ts +15 -1
- package/src/modules/natural_language_processing/LLMModule.ts +10 -2
- package/src/types/llm.ts +8 -0
- package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libsentencepiece.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/arm64-v8a/libtokenizers_cpp.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libsentencepiece.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_c.a +0 -0
- package/third-party/android/libs/tokenizers-cpp/x86_64/libtokenizers_cpp.a +0 -0
- package/third-party/include/c10/macros/Export.h +2 -86
- package/third-party/include/c10/macros/Macros.h +28 -5
- package/third-party/include/c10/util/BFloat16-inl.h +1 -4
- package/third-party/include/c10/util/BFloat16.h +5 -8
- package/third-party/include/c10/util/Half.h +5 -0
- package/third-party/include/c10/util/bit_cast.h +1 -1
- package/third-party/include/c10/util/complex.h +639 -0
- package/third-party/include/c10/util/complex_math.h +399 -0
- package/third-party/include/c10/util/complex_utils.h +41 -0
- package/third-party/include/c10/util/irange.h +2 -2
- package/third-party/include/c10/util/overflows.h +95 -0
- package/third-party/include/executorch/ExecuTorchError.h +75 -0
- package/third-party/include/executorch/ExecuTorchModule.h +115 -11
- package/third-party/include/executorch/ExecuTorchTensor.h +731 -51
- package/third-party/include/executorch/ExecuTorchValue.h +61 -9
- package/third-party/include/executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h +181 -0
- package/third-party/include/executorch/extension/kernel_util/meta_programming.h +108 -0
- package/third-party/include/executorch/extension/kernel_util/type_list.h +137 -0
- package/third-party/include/executorch/extension/module/bundled_module.h +131 -0
- package/third-party/include/executorch/extension/module/module.h +46 -20
- package/third-party/include/executorch/extension/threadpool/cpuinfo_utils.h +1 -3
- package/third-party/include/executorch/extension/threadpool/threadpool.h +1 -3
- package/third-party/include/executorch/extension/threadpool/threadpool_guard.h +35 -0
- package/third-party/include/executorch/runtime/backend/backend_execution_context.h +3 -3
- package/third-party/include/executorch/runtime/backend/backend_init_context.h +12 -6
- package/third-party/include/executorch/runtime/backend/backend_option_context.h +34 -0
- package/third-party/include/executorch/runtime/backend/interface.h +70 -9
- package/third-party/include/executorch/runtime/backend/options.h +206 -0
- package/third-party/include/executorch/runtime/core/evalue.h +19 -25
- package/third-party/include/executorch/runtime/core/event_tracer.h +32 -17
- package/third-party/include/executorch/runtime/core/event_tracer_hooks.h +23 -14
- package/third-party/include/executorch/runtime/core/exec_aten/exec_aten.h +32 -9
- package/third-party/include/executorch/runtime/core/exec_aten/util/dim_order_util.h +3 -2
- package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +43 -75
- package/third-party/include/executorch/runtime/core/exec_aten/util/tensor_util.h +88 -87
- package/third-party/include/executorch/runtime/core/function_ref.h +100 -0
- package/third-party/include/executorch/runtime/core/named_data_map.h +14 -14
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +2 -86
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +28 -5
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -4
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +5 -8
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +5 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -1
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +639 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_math.h +399 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex_utils.h +41 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +2 -2
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/overflows.h +95 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +88 -0
- package/third-party/include/executorch/runtime/core/portable_type/complex.h +6 -29
- package/third-party/include/executorch/runtime/core/portable_type/tensor_impl.h +20 -0
- package/third-party/include/executorch/runtime/core/span.h +4 -0
- package/third-party/include/executorch/runtime/core/tag.h +19 -0
- package/third-party/include/executorch/runtime/core/tensor_layout.h +2 -2
- package/third-party/include/executorch/runtime/executor/method.h +15 -3
- package/third-party/include/executorch/runtime/executor/method_meta.h +34 -5
- package/third-party/include/executorch/runtime/executor/program.h +3 -4
- package/third-party/include/executorch/runtime/executor/pte_data_map.h +9 -8
- package/third-party/include/executorch/runtime/executor/tensor_parser.h +14 -13
- package/third-party/include/executorch/runtime/kernel/kernel_runtime_context.h +5 -5
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +21 -19
- package/third-party/include/executorch/runtime/platform/compiler.h +8 -0
- package/third-party/include/executorch/runtime/platform/platform.h +126 -0
- package/third-party/include/headeronly/macros/Export.h +88 -0
- package/third-party/include/tokenizers-cpp/tokenizers_c.h +61 -0
- package/third-party/include/torch/headeronly/macros/Export.h +88 -0
- package/third-party/ios/ExecutorchLib.xcframework/Info.plist +43 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/libs/cpuinfo/libcpuinfo.a +0 -0
- package/third-party/ios/libs/pthreadpool/physical-arm64-release/libpthreadpool.a +0 -0
- package/third-party/ios/libs/pthreadpool/simulator-arm64-debug/libpthreadpool.a +0 -0
- package/ios/libs/executorch/libbackend_coreml_ios.a +0 -0
- package/ios/libs/executorch/libbackend_coreml_simulator.a +0 -0
- package/ios/libs/executorch/libbackend_mps_ios.a +0 -0
- package/ios/libs/executorch/libbackend_mps_simulator.a +0 -0
- package/ios/libs/executorch/libbackend_xnnpack_ios.a +0 -0
- package/ios/libs/executorch/libbackend_xnnpack_simulator.a +0 -0
- package/ios/libs/executorch/libexecutorch_ios.a +0 -0
- package/ios/libs/executorch/libexecutorch_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_custom_ios.a +0 -0
- package/ios/libs/executorch/libkernels_custom_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_optimized_ios.a +0 -0
- package/ios/libs/executorch/libkernels_optimized_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_portable_ios.a +0 -0
- package/ios/libs/executorch/libkernels_portable_simulator.a +0 -0
- package/ios/libs/executorch/libkernels_quantized_ios.a +0 -0
- package/ios/libs/executorch/libkernels_quantized_simulator.a +0 -0
- package/third-party/ios/ios.toolchain.cmake +0 -1122
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libsentencepiece.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_c.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/physical-arm64-release/libtokenizers_cpp.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libsentencepiece.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_c.a +0 -0
- /package/{ios → third-party/ios}/libs/tokenizers-cpp/simulator-arm64-debug/libtokenizers_cpp.a +0 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
// HighPerformanceThreadPool.h
|
|
2
|
+
#pragma once
|
|
3
|
+
|
|
4
|
+
#include <algorithm>
|
|
5
|
+
#include <atomic>
|
|
6
|
+
#include <chrono>
|
|
7
|
+
#include <condition_variable>
|
|
8
|
+
#include <fstream>
|
|
9
|
+
#include <functional>
|
|
10
|
+
#include <future>
|
|
11
|
+
#include <memory>
|
|
12
|
+
#include <mutex>
|
|
13
|
+
#include <pthread.h>
|
|
14
|
+
#include <queue>
|
|
15
|
+
#include <ranges>
|
|
16
|
+
#include <sched.h>
|
|
17
|
+
#include <sys/resource.h>
|
|
18
|
+
#include <thread>
|
|
19
|
+
#include <unistd.h>
|
|
20
|
+
#include <vector>
|
|
21
|
+
|
|
22
|
+
#include <executorch/extension/threadpool/cpuinfo_utils.h>
|
|
23
|
+
#include <rnexecutorch/Log.h>
|
|
24
|
+
|
|
25
|
+
#ifdef __APPLE__
|
|
26
|
+
#include <sys/syscall.h>
|
|
27
|
+
#endif
|
|
28
|
+
|
|
29
|
+
#ifdef __ANDROID__
|
|
30
|
+
#include <sys/types.h>
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
namespace rnexecutorch::threads {
|
|
34
|
+
|
|
35
|
+
enum class Priority { LOW, NORMAL, HIGH, REALTIME };
|
|
36
|
+
|
|
37
|
+
struct ThreadConfig {
|
|
38
|
+
bool pinToPerformanceCores{true};
|
|
39
|
+
std::string namePrefix{"RN_ET_Worker"};
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
class HighPerformanceThreadPool {
|
|
43
|
+
public:
|
|
44
|
+
explicit HighPerformanceThreadPool(size_t numThreads = 1,
|
|
45
|
+
ThreadConfig cfg = ThreadConfig())
|
|
46
|
+
: config(std::move(cfg)) {
|
|
47
|
+
|
|
48
|
+
#ifdef __ANDROID__
|
|
49
|
+
detectCPUTopology();
|
|
50
|
+
numThreads = std::min(numThreads, performanceCores.size());
|
|
51
|
+
#endif
|
|
52
|
+
|
|
53
|
+
for (size_t i = 0; i < numThreads; i++) {
|
|
54
|
+
workers.emplace_back(&HighPerformanceThreadPool::workerThread, this, i);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
log(LOG_LEVEL::Debug, "Thread pool initialized with", numThreads,
|
|
58
|
+
"workers.");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
~HighPerformanceThreadPool() { shutdown(); }
|
|
62
|
+
|
|
63
|
+
// Submit a task and get a future for the result
|
|
64
|
+
template <typename Func, typename... Args>
|
|
65
|
+
auto submit(Func &&func, Args &&...args)
|
|
66
|
+
-> std::future<decltype(func(args...))> {
|
|
67
|
+
return submitWithPriority(Priority::NORMAL, std::forward<Func>(func),
|
|
68
|
+
std::forward<Args>(args)...);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Submit a task with specific priority
|
|
72
|
+
template <typename Func, typename... Args>
|
|
73
|
+
auto submitWithPriority(Priority priority, Func &&func, Args &&...args)
|
|
74
|
+
-> std::future<decltype(func(args...))> {
|
|
75
|
+
|
|
76
|
+
using ReturnType = decltype(func(args...));
|
|
77
|
+
|
|
78
|
+
// Create a packaged task
|
|
79
|
+
auto boundFunc =
|
|
80
|
+
std::bind(std::forward<Func>(func), std::forward<Args>(args)...);
|
|
81
|
+
auto task = std::make_unique<Task<decltype(boundFunc), ReturnType>>(
|
|
82
|
+
std::move(boundFunc));
|
|
83
|
+
auto future = task->getFuture();
|
|
84
|
+
|
|
85
|
+
// Add to queue
|
|
86
|
+
{
|
|
87
|
+
std::scoped_lock lock(queueMutex);
|
|
88
|
+
|
|
89
|
+
if (!running) {
|
|
90
|
+
throw std::runtime_error("Thread pool is shutting down");
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
WorkItem item(std::move(task), priority,
|
|
94
|
+
std::chrono::steady_clock::now());
|
|
95
|
+
|
|
96
|
+
taskQueue.push(std::move(item));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
condition.notify_one();
|
|
100
|
+
return future;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Execute a task and wait for result
|
|
104
|
+
template <typename Func, typename... Args>
|
|
105
|
+
auto execute(Func &&func, Args &&...args) -> decltype(func(args...)) {
|
|
106
|
+
auto future = submit(std::forward<Func>(func), std::forward<Args>(args)...);
|
|
107
|
+
return future.get();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Fire and forget task
|
|
111
|
+
template <typename Func, typename... Args>
|
|
112
|
+
void submitDetached(Func &&func, Args &&...args) {
|
|
113
|
+
submit(std::forward<Func>(func), std::forward<Args>(args)...);
|
|
114
|
+
// Future is destroyed, task still runs
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
void shutdown() {
|
|
118
|
+
if (!running.exchange(false)) {
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
condition.notify_all();
|
|
123
|
+
|
|
124
|
+
for (auto &worker : workers) {
|
|
125
|
+
if (worker.joinable()) {
|
|
126
|
+
worker.join();
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
private:
|
|
132
|
+
// Task wrapper that can hold any callable
|
|
133
|
+
class ITask {
|
|
134
|
+
public:
|
|
135
|
+
virtual ~ITask() = default;
|
|
136
|
+
virtual void execute() = 0;
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
template <typename Func, typename Result> class Task : public ITask {
|
|
140
|
+
public:
|
|
141
|
+
Task(Func &&f) : func(std::forward<Func>(f)) {}
|
|
142
|
+
|
|
143
|
+
void execute() override {
|
|
144
|
+
try {
|
|
145
|
+
if constexpr (std::is_void_v<Result>) {
|
|
146
|
+
func();
|
|
147
|
+
promise.set_value();
|
|
148
|
+
} else {
|
|
149
|
+
promise.set_value(func());
|
|
150
|
+
}
|
|
151
|
+
} catch (...) {
|
|
152
|
+
promise.set_exception(std::current_exception());
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
std::future<Result> getFuture() { return promise.get_future(); }
|
|
157
|
+
|
|
158
|
+
private:
|
|
159
|
+
Func func;
|
|
160
|
+
std::promise<Result> promise;
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
class WorkItem {
|
|
164
|
+
public:
|
|
165
|
+
WorkItem() = default;
|
|
166
|
+
WorkItem(std::unique_ptr<ITask> task, Priority priority,
|
|
167
|
+
std::chrono::steady_clock::time_point enqueueTime)
|
|
168
|
+
: task(std::move(task)), priority(priority), enqueueTime(enqueueTime) {}
|
|
169
|
+
|
|
170
|
+
std::unique_ptr<ITask> task;
|
|
171
|
+
|
|
172
|
+
bool operator<(const WorkItem &other) const {
|
|
173
|
+
return priority != other.priority ? priority < other.priority
|
|
174
|
+
: enqueueTime > other.enqueueTime;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
private:
|
|
178
|
+
Priority priority;
|
|
179
|
+
std::chrono::steady_clock::time_point enqueueTime;
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
// Thread pool state
|
|
183
|
+
std::vector<std::thread> workers;
|
|
184
|
+
std::priority_queue<WorkItem> taskQueue;
|
|
185
|
+
std::mutex queueMutex;
|
|
186
|
+
std::condition_variable condition;
|
|
187
|
+
std::atomic<bool> running{true};
|
|
188
|
+
std::atomic<size_t> activeWorkers{0};
|
|
189
|
+
std::atomic<size_t> totalTasksProcessed{0};
|
|
190
|
+
|
|
191
|
+
#ifdef __ANDROID__
|
|
192
|
+
// Performance cores
|
|
193
|
+
std::vector<int32_t> performanceCores;
|
|
194
|
+
std::vector<int32_t> efficiencyCores;
|
|
195
|
+
#endif
|
|
196
|
+
|
|
197
|
+
// Configuration
|
|
198
|
+
ThreadConfig config;
|
|
199
|
+
|
|
200
|
+
void detectCPUTopology() {
|
|
201
|
+
#ifdef __ANDROID__
|
|
202
|
+
struct CoreInfo {
|
|
203
|
+
int32_t id;
|
|
204
|
+
int64_t maxFreq;
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
std::vector<CoreInfo> cores;
|
|
208
|
+
const auto numOfCores = std::thread::hardware_concurrency();
|
|
209
|
+
|
|
210
|
+
for (int32_t i = 0; std::cmp_less(i, numOfCores); ++i) {
|
|
211
|
+
std::string path = "/sys/devices/system/cpu/cpu" + std::to_string(i) +
|
|
212
|
+
"/cpufreq/cpuinfo_max_freq";
|
|
213
|
+
std::ifstream file(path);
|
|
214
|
+
if (!file.good()) {
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
CoreInfo info;
|
|
219
|
+
info.id = i;
|
|
220
|
+
file >> info.maxFreq;
|
|
221
|
+
cores.push_back(info);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (cores.empty()) {
|
|
225
|
+
log(LOG_LEVEL::Debug, "Could not detect CPU topology");
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Sort by frequency
|
|
230
|
+
std::ranges::sort(cores, [](const CoreInfo &a, const CoreInfo &b) {
|
|
231
|
+
return a.maxFreq > b.maxFreq;
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// Classify cores
|
|
235
|
+
const auto numOfPerfCores =
|
|
236
|
+
::executorch::extension::cpuinfo::get_num_performant_cores();
|
|
237
|
+
|
|
238
|
+
constexpr float kKiloToGigaRatio = 1e6;
|
|
239
|
+
for (int32_t i = 0; i < cores.size(); ++i) {
|
|
240
|
+
if (i < numOfPerfCores) {
|
|
241
|
+
performanceCores.push_back(cores[i].id);
|
|
242
|
+
log(LOG_LEVEL::Debug, "Performance core:", cores[i].id, "(",
|
|
243
|
+
cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
|
|
244
|
+
} else {
|
|
245
|
+
efficiencyCores.push_back(cores[i].id);
|
|
246
|
+
log(LOG_LEVEL::Debug, "Efficiency core:", cores[i].id, "(",
|
|
247
|
+
cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
#endif
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
#ifdef __ANDROID__
|
|
254
|
+
inline uint64_t getCurrentThreadId() { return gettid(); }
|
|
255
|
+
#endif
|
|
256
|
+
|
|
257
|
+
inline void setCurrentThreadName(const std::string &name) {
|
|
258
|
+
#ifdef __ANDROID__
|
|
259
|
+
pthread_setname_np(pthread_self(), name.c_str());
|
|
260
|
+
#elif defined(__APPLE__)
|
|
261
|
+
pthread_setname_np(name.c_str());
|
|
262
|
+
#endif
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
void configureThread(uint32_t workerIndex) {
|
|
266
|
+
std::string threadName = config.namePrefix + std::to_string(workerIndex);
|
|
267
|
+
setCurrentThreadName(threadName.c_str());
|
|
268
|
+
|
|
269
|
+
#ifdef __ANDROID__
|
|
270
|
+
if (config.pinToPerformanceCores && !performanceCores.empty()) {
|
|
271
|
+
setCPUAffinity();
|
|
272
|
+
}
|
|
273
|
+
#endif
|
|
274
|
+
|
|
275
|
+
setThreadPriority();
|
|
276
|
+
|
|
277
|
+
log(LOG_LEVEL::Debug, "Worker", workerIndex,
|
|
278
|
+
"configured:", threadName.c_str());
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
void setCPUAffinity() {
|
|
282
|
+
// AFAIK it is not possible on iOS
|
|
283
|
+
#ifdef __ANDROID__
|
|
284
|
+
if (performanceCores.empty()) {
|
|
285
|
+
log(LOG_LEVEL::Error, "No cores specified for affinity setting");
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
cpu_set_t cpuset;
|
|
290
|
+
CPU_ZERO(&cpuset);
|
|
291
|
+
|
|
292
|
+
for (int32_t core : performanceCores) {
|
|
293
|
+
CPU_SET(core, &cpuset);
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
pid_t tid = getCurrentThreadId();
|
|
297
|
+
log(LOG_LEVEL::Debug, "Thread id", tid);
|
|
298
|
+
if (sched_setaffinity(tid, sizeof(cpuset), &cpuset) == 0) {
|
|
299
|
+
log(LOG_LEVEL::Debug, "Thread pinned to cores:", performanceCores);
|
|
300
|
+
} else {
|
|
301
|
+
log(LOG_LEVEL::Debug, "Failed to set CPU affinity (error:", errno,
|
|
302
|
+
"). Continuing without affinity.");
|
|
303
|
+
}
|
|
304
|
+
#endif
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
void setThreadPriority() {
|
|
308
|
+
// pthread_setschedparam doesn't work on android because permissions reasons
|
|
309
|
+
// and in general does not provide visible improvements on iOS
|
|
310
|
+
|
|
311
|
+
// Set nice value as fallback or additional priority boost
|
|
312
|
+
constexpr int nice_value = 0;
|
|
313
|
+
if (setpriority(PRIO_PROCESS, 0, nice_value) != 0) {
|
|
314
|
+
log(LOG_LEVEL::Debug, "Failed to set nice value");
|
|
315
|
+
} else {
|
|
316
|
+
log(LOG_LEVEL::Debug, "Set nice value", nice_value);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
void processTask(const WorkItem &item) {
|
|
321
|
+
activeWorkers++;
|
|
322
|
+
|
|
323
|
+
try {
|
|
324
|
+
item.task->execute();
|
|
325
|
+
} catch (const std::exception &e) {
|
|
326
|
+
log(LOG_LEVEL::Error, "Task failed:", e.what());
|
|
327
|
+
activeWorkers--;
|
|
328
|
+
throw;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
activeWorkers--;
|
|
332
|
+
totalTasksProcessed++;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
void workerThread(int workerIndex) {
|
|
336
|
+
configureThread(workerIndex);
|
|
337
|
+
|
|
338
|
+
while (running) {
|
|
339
|
+
WorkItem item;
|
|
340
|
+
|
|
341
|
+
{
|
|
342
|
+
std::unique_lock<std::mutex> lock(queueMutex);
|
|
343
|
+
condition.wait(lock, [this] { return !taskQueue.empty() || !running; });
|
|
344
|
+
|
|
345
|
+
if (!running && taskQueue.empty()) {
|
|
346
|
+
break;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if (!taskQueue.empty()) {
|
|
350
|
+
item = std::move(const_cast<WorkItem &>(taskQueue.top()));
|
|
351
|
+
taskQueue.pop();
|
|
352
|
+
} else {
|
|
353
|
+
continue;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
processTask(item);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
log(LOG_LEVEL::Debug, "Worker", workerIndex, "shutting down");
|
|
361
|
+
}
|
|
362
|
+
};
|
|
363
|
+
|
|
364
|
+
} // namespace rnexecutorch::threads
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <executorch/extension/threadpool/cpuinfo_utils.h>
|
|
4
|
+
#include <executorch/extension/threadpool/threadpool.h>
|
|
5
|
+
#include <rnexecutorch/Log.h>
|
|
6
|
+
|
|
7
|
+
namespace rnexecutorch::threads::utils {
|
|
8
|
+
|
|
9
|
+
void unsafeSetupThreadPool(uint32_t num_of_cores = 0) {
|
|
10
|
+
auto num_of_perf_cores =
|
|
11
|
+
::executorch::extension::cpuinfo::get_num_performant_cores();
|
|
12
|
+
log(LOG_LEVEL::Info, "Detected ", num_of_perf_cores, " performant cores");
|
|
13
|
+
// setting num_of_cores to floor(num_of_perf_cores / 2) + 1) because
|
|
14
|
+
// depending on cpu arch as when possible we want to leave at least 2
|
|
15
|
+
// performant cores for other tasks (setting more actually results in drop
|
|
16
|
+
// of performance). For older devices (i.e. samsung s22) resolves to 3
|
|
17
|
+
// cores, and for newer ones (like OnePlus 12) resolves to 4, which when
|
|
18
|
+
// benchmarked gives highest throughput. For iPhones they usually have 2
|
|
19
|
+
// performance cores
|
|
20
|
+
auto _num_of_cores = num_of_cores
|
|
21
|
+
? num_of_cores
|
|
22
|
+
: static_cast<uint32_t>(num_of_perf_cores / 2) + 1;
|
|
23
|
+
const auto threadpool = ::executorch::extension::threadpool::get_threadpool();
|
|
24
|
+
threadpool->_unsafe_reset_threadpool(_num_of_cores);
|
|
25
|
+
log(LOG_LEVEL::Info, "Configuring xnnpack for",
|
|
26
|
+
threadpool->get_thread_count(), "threads");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
} // namespace rnexecutorch::threads::utils
|
package/common/runner/runner.cpp
CHANGED
|
@@ -222,9 +222,6 @@ Error Runner::generate(const std::string &prompt,
|
|
|
222
222
|
RUNNER_ET_LOG(warmup, "RSS after prompt prefill: %f MiB (0 if unsupported)",
|
|
223
223
|
llm::get_rss_bytes() / 1024.0 / 1024.0);
|
|
224
224
|
|
|
225
|
-
if (cur_decoded != "�") {
|
|
226
|
-
wrapped_callback(cur_decoded);
|
|
227
|
-
}
|
|
228
225
|
// start the main loop
|
|
229
226
|
prompt_tokens_uint64.push_back(cur_token);
|
|
230
227
|
int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate(
|
|
@@ -275,4 +272,13 @@ void Runner::stop() {
|
|
|
275
272
|
ET_LOG(Error, "Token generator is not loaded, cannot stop");
|
|
276
273
|
}
|
|
277
274
|
}
|
|
275
|
+
|
|
276
|
+
void Runner::set_count_interval(size_t count_interval) {
|
|
277
|
+
text_token_generator_->set_count_interval(count_interval);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
void Runner::set_time_interval(size_t time_interval) {
|
|
281
|
+
text_token_generator_->set_time_interval(time_interval);
|
|
282
|
+
}
|
|
283
|
+
|
|
278
284
|
} // namespace example
|
package/common/runner/runner.h
CHANGED
|
@@ -43,8 +43,12 @@ public:
|
|
|
43
43
|
stats_callback = {},
|
|
44
44
|
bool echo = true, bool warming = false);
|
|
45
45
|
::executorch::runtime::Error warmup(const std::string &prompt);
|
|
46
|
+
void set_count_interval(size_t count_interval);
|
|
47
|
+
void set_time_interval(size_t time_interval);
|
|
46
48
|
void stop();
|
|
47
49
|
|
|
50
|
+
::executorch::extension::llm::Stats stats_;
|
|
51
|
+
|
|
48
52
|
private:
|
|
49
53
|
float temperature_;
|
|
50
54
|
bool shouldStop_{false};
|
|
@@ -59,9 +63,6 @@ private:
|
|
|
59
63
|
std::unique_ptr<::executorch::extension::llm::TextPrefiller> text_prefiller_;
|
|
60
64
|
std::unique_ptr<::executorch::extension::llm::TextTokenGenerator>
|
|
61
65
|
text_token_generator_;
|
|
62
|
-
|
|
63
|
-
// stats
|
|
64
|
-
::executorch::extension::llm::Stats stats_;
|
|
65
66
|
};
|
|
66
67
|
|
|
67
68
|
} // namespace example
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
#include "stats.h"
|
|
13
13
|
#include "text_decoder_runner.h"
|
|
14
|
+
#include <chrono>
|
|
14
15
|
#include <executorch/extension/tensor/tensor.h>
|
|
15
16
|
#include <iostream>
|
|
16
17
|
#include <tokenizers-cpp/tokenizers_cpp.h>
|
|
@@ -27,7 +28,7 @@ public:
|
|
|
27
28
|
Stats *stats)
|
|
28
29
|
: tokenizer_(tokenizer), text_decoder_runner_(text_decoder_runner),
|
|
29
30
|
eos_ids_(std::move(eos_ids)), use_kv_cache_(use_kv_cache),
|
|
30
|
-
stats_(stats) {}
|
|
31
|
+
stats_(stats), timestamp_(std::chrono::high_resolution_clock::now()) {}
|
|
31
32
|
|
|
32
33
|
/**
|
|
33
34
|
* Token generation loop.
|
|
@@ -55,12 +56,8 @@ public:
|
|
|
55
56
|
uint64_t prev_token;
|
|
56
57
|
// cache to keep tokens if they were decoded into illegal character
|
|
57
58
|
std::vector<int32_t> token_cache;
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
if (tokenizer_->Decode(
|
|
61
|
-
std::vector<int32_t>{static_cast<int32_t>(cur_token)}) == "�") {
|
|
62
|
-
token_cache.push_back(static_cast<int32_t>(cur_token));
|
|
63
|
-
}
|
|
59
|
+
// add first token after prefill to cache here
|
|
60
|
+
token_cache.push_back(static_cast<int32_t>(cur_token));
|
|
64
61
|
|
|
65
62
|
if (use_kv_cache_) {
|
|
66
63
|
// hard code these to size 1 as kv cache is locked to static size right
|
|
@@ -79,7 +76,7 @@ public:
|
|
|
79
76
|
from_blob(&pos, {1}, executorch::aten::ScalarType::Long);
|
|
80
77
|
|
|
81
78
|
should_stop_ = false;
|
|
82
|
-
|
|
79
|
+
timestamp_ = std::chrono::high_resolution_clock::now();
|
|
83
80
|
// Generate our tokens
|
|
84
81
|
while (pos < seq_len - 1) {
|
|
85
82
|
// Run the model
|
|
@@ -112,9 +109,19 @@ public:
|
|
|
112
109
|
token_cache.push_back(static_cast<int32_t>(cur_token));
|
|
113
110
|
const std::string cache_decoded = tokenizer_->Decode(token_cache);
|
|
114
111
|
|
|
115
|
-
|
|
112
|
+
const auto timeIntervalElapsed =
|
|
113
|
+
std::chrono::duration_cast<std::chrono::milliseconds>(
|
|
114
|
+
std::chrono::high_resolution_clock::now() - timestamp_) >
|
|
115
|
+
time_interval_;
|
|
116
|
+
const auto countIntervalElapsed = token_cache.size() > count_interval_;
|
|
117
|
+
const auto eos_reached = eos_ids_->contains(cur_token);
|
|
118
|
+
|
|
119
|
+
if (!cache_decoded.ends_with("�") &&
|
|
120
|
+
(countIntervalElapsed || timeIntervalElapsed || should_stop_ ||
|
|
121
|
+
eos_reached)) {
|
|
116
122
|
token_callback(cache_decoded);
|
|
117
123
|
token_cache.clear();
|
|
124
|
+
timestamp_ = std::chrono::high_resolution_clock::now();
|
|
118
125
|
}
|
|
119
126
|
|
|
120
127
|
if (should_stop_) {
|
|
@@ -122,7 +129,7 @@ public:
|
|
|
122
129
|
}
|
|
123
130
|
|
|
124
131
|
// data-dependent terminating condition: we have n_eos_ number of EOS
|
|
125
|
-
if (
|
|
132
|
+
if (eos_reached) {
|
|
126
133
|
printf("\n");
|
|
127
134
|
ET_LOG(Info, "\nReached to the end of generation");
|
|
128
135
|
break;
|
|
@@ -136,11 +143,22 @@ public:
|
|
|
136
143
|
*/
|
|
137
144
|
inline void stop() { should_stop_ = true; }
|
|
138
145
|
|
|
146
|
+
void set_count_interval(size_t count_interval) {
|
|
147
|
+
count_interval_ = count_interval;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
void set_time_interval(size_t time_interval) {
|
|
151
|
+
time_interval_ = std::chrono::milliseconds(time_interval);
|
|
152
|
+
}
|
|
153
|
+
|
|
139
154
|
private:
|
|
140
155
|
tokenizers::Tokenizer *tokenizer_;
|
|
141
156
|
TextDecoderRunner *text_decoder_runner_;
|
|
142
157
|
std::unique_ptr<std::unordered_set<uint64_t>> eos_ids_;
|
|
143
158
|
bool use_kv_cache_;
|
|
159
|
+
size_t count_interval_{10};
|
|
160
|
+
std::chrono::milliseconds time_interval_{120};
|
|
161
|
+
std::chrono::high_resolution_clock::time_point timestamp_;
|
|
144
162
|
|
|
145
163
|
// state machine
|
|
146
164
|
bool should_stop_ = false;
|
|
@@ -84,7 +84,16 @@ export class LLMController {
|
|
|
84
84
|
this.nativeModule = global.loadLLM(modelPath, tokenizerPath);
|
|
85
85
|
this.isReadyCallback(true);
|
|
86
86
|
this.onToken = data => {
|
|
87
|
-
if (!data
|
|
87
|
+
if (!data) {
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
if (SPECIAL_TOKENS.EOS_TOKEN in this.tokenizerConfig && data.indexOf(this.tokenizerConfig.eos_token) >= 0) {
|
|
91
|
+
data = data.replaceAll(this.tokenizerConfig.eos_token, '');
|
|
92
|
+
}
|
|
93
|
+
if (SPECIAL_TOKENS.PAD_TOKEN in this.tokenizerConfig && data.indexOf(this.tokenizerConfig.pad_token) >= 0) {
|
|
94
|
+
data = data.replaceAll(this.tokenizerConfig.pad_token, '');
|
|
95
|
+
}
|
|
96
|
+
if (data.length === 0) {
|
|
88
97
|
return;
|
|
89
98
|
}
|
|
90
99
|
this.tokenCallback(data);
|
|
@@ -100,13 +109,20 @@ export class LLMController {
|
|
|
100
109
|
}
|
|
101
110
|
configure({
|
|
102
111
|
chatConfig,
|
|
103
|
-
toolsConfig
|
|
112
|
+
toolsConfig,
|
|
113
|
+
generationConfig
|
|
104
114
|
}) {
|
|
105
115
|
this.chatConfig = {
|
|
106
116
|
...DEFAULT_CHAT_CONFIG,
|
|
107
117
|
...chatConfig
|
|
108
118
|
};
|
|
109
119
|
this.toolsConfig = toolsConfig;
|
|
120
|
+
if (generationConfig?.outputTokenBatchSize) {
|
|
121
|
+
this.nativeModule.setCountInterval(generationConfig.outputTokenBatchSize);
|
|
122
|
+
}
|
|
123
|
+
if (generationConfig?.batchTimeInterval) {
|
|
124
|
+
this.nativeModule.setTimeInterval(generationConfig.batchTimeInterval);
|
|
125
|
+
}
|
|
110
126
|
|
|
111
127
|
// reset inner state when loading new configuration
|
|
112
128
|
this.responseCallback('');
|
|
@@ -142,6 +158,9 @@ export class LLMController {
|
|
|
142
158
|
interrupt() {
|
|
143
159
|
this.nativeModule.interrupt();
|
|
144
160
|
}
|
|
161
|
+
getGeneratedTokenCount() {
|
|
162
|
+
return this.nativeModule.getGeneratedTokenCount();
|
|
163
|
+
}
|
|
145
164
|
async generate(messages, tools) {
|
|
146
165
|
if (!this._isReady) {
|
|
147
166
|
throw new Error(getError(ETError.ModuleNotLoaded));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["ResourceFetcher","ETError","getError","Template","DEFAULT_CHAT_CONFIG","readAsStringAsync","SPECIAL_TOKENS","parseToolCall","Logger","LLMController","chatConfig","_response","_isReady","_isGenerating","_messageHistory","constructor","tokenCallback","responseCallback","messageHistoryCallback","isReadyCallback","isGeneratingCallback","undefined","warn","token","response","messageHistory","isReady","isGenerating","load","modelSource","tokenizerSource","tokenizerConfigSource","onDownloadProgressCallback","initialMessageHistory","tokenizersPromise","fetch","modelPromise","tokenizersResults","modelResult","Promise","all","tokenizerPath","tokenizerConfigPath","modelPath","Error","tokenizerConfig","JSON","parse","nativeModule","global","loadLLM","onToken","data","EOS_TOKEN","eos_token","PAD_TOKEN","pad_token","e","setTokenCallback","configure","toolsConfig","delete","ModelGenerating","unload","forward","input","ModuleNotLoaded","generate","interrupt","
|
|
1
|
+
{"version":3,"names":["ResourceFetcher","ETError","getError","Template","DEFAULT_CHAT_CONFIG","readAsStringAsync","SPECIAL_TOKENS","parseToolCall","Logger","LLMController","chatConfig","_response","_isReady","_isGenerating","_messageHistory","constructor","tokenCallback","responseCallback","messageHistoryCallback","isReadyCallback","isGeneratingCallback","undefined","warn","token","response","messageHistory","isReady","isGenerating","load","modelSource","tokenizerSource","tokenizerConfigSource","onDownloadProgressCallback","initialMessageHistory","tokenizersPromise","fetch","modelPromise","tokenizersResults","modelResult","Promise","all","tokenizerPath","tokenizerConfigPath","modelPath","Error","tokenizerConfig","JSON","parse","nativeModule","global","loadLLM","onToken","data","EOS_TOKEN","indexOf","eos_token","replaceAll","PAD_TOKEN","pad_token","length","e","setTokenCallback","configure","toolsConfig","generationConfig","outputTokenBatchSize","setCountInterval","batchTimeInterval","setTimeInterval","delete","ModelGenerating","unload","forward","input","ModuleNotLoaded","generate","interrupt","getGeneratedTokenCount","messages","tools","role","renderedChat","applyChatTemplate","tools_in_user_message","add_generation_prompt","sendMessage","message","content","messageHistoryWithPrompt","systemPrompt","slice","contextWindowLength","displayToolCalls","toolCalls","toolCall","executeToolCallback","then","toolResponse","deleteMessage","index","newMessageHistory","templateFlags","chat_template","template","specialTokens","Object","fromEntries","keys","filter","key","map","result","render"],"sourceRoot":"../../../src","sources":["controllers/LLMController.ts"],"mappings":";;AACA,SAASA,eAAe,QAAQ,0BAA0B;AAC1D,SAASC,OAAO,EAAEC,QAAQ,QAAQ,UAAU;AAC5C,SAASC,QAAQ,QAAQ,oBAAoB;AAC7C,SAASC,mBAAmB,QAAQ,0BAA0B;AAC9D,SAASC,iBAAiB,QAAQ,kBAAkB;AACpD,SAKEC,cAAc,QAET,cAAc;AACrB,SAASC,aAAa,QAAQ,cAAc;AAC5C,SAASC,MAAM,QAAQ,kBAAkB;AAEzC,OAAO,MAAMC,aAAa,CAAC;EAEjBC,UAAU,GAAeN,mBAAmB;EAI5CO,SAAS,GAAG,EAAE;EACdC,QAAQ,GAAG,KAAK;EAChBC,aAAa,GAAG,KAAK;EACrBC,eAAe,GAAc,EAAE;;EAEvC;;EAOAC,WAAWA,CAAC;IACVC,aAAa;IACbC,gBAAgB;IAChBC,sBAAsB;IACtBC,eAAe;IACfC;EAOF,CAAC,EAAE;IACD,IAAIH,gBAAgB,KAAKI,SAAS,EAAE;MAClCb,MAAM,CAACc,IAAI,CACT,sEACF,CAAC;IACH;IACA,IAAI,CAACN,aAAa,GAAIO,KAAK,IAAK;MAC9BP,aAAa,GAAGO,KAAK,CAAC;IACxB,CAAC;IACD,IAAI,CAACN,gBAAgB,GAAIO,QAAQ,IAAK;MACpC,IAAI,CAACb,SAAS,GAAGa,QAAQ;MACzBP,gBAAgB,GAAGO,QAAQ,CAAC;IAC9B,CAAC;IACD,IAAI,CAACN,sBAAsB,GAAIO,cAAc,IAAK;MAChD,IAAI,CAACX,eAAe,GAAGW,cAAc;MACrCP,sBAAsB,GAAGO,cAAc,CAAC;IAC1C,CAAC;IACD,IAAI,CAACN,eAAe,GAAIO,OAAO,IAAK;MAClC,IAAI,CAACd,QAAQ,GAAGc,OAAO;MACvBP,eAAe,GAAGO,OAAO,CAAC;IAC5B,CAAC;IACD,IAAI,CAACN,oBAAoB,GAAIO,YAAY,IAAK;MAC5C,IAAI,CAACd,aAAa,GAAGc,YAAY;MACjCP,oBAAoB,GAAGO,YAAY,CAAC;IACtC,CAAC;EACH;EAEA,IAAWH,QAAQA,CAAA,EAAG;IACpB,OAAO,IAAI,CAACb,SAAS;EACvB;EACA,IAAWe,OAAOA,CAAA,EAAG;IACnB,OAAO,IAAI,CAACd,QAAQ;EACtB;EACA,IAAWe,YAAYA,CAAA,EAAG;IACxB,OAAO,IAAI,CAACd,aAAa;EAC3B;EACA,IAAWY,cAAcA,CAAA,EAAG;IAC1B,OAAO,IAAI,CAACX,eAAe;EAC7B;EAEA,MAAac,IAAIA,CAAC;IAChBC,WAAW;IACXC,eAAe;IACfC,qBAAqB;IACrBC;EAMF,CAAC,EAAE;IACD;IACA,IAAI,CAACf,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;IAChC,IAAI,CAACD,eAAe,CAAC,KAAK,CAAC;IAE3B,IAAI;MACF,MAAMe,iBAAiB,GAAGlC,eAAe,CAACmC,KAAK,CAC7Cd,SAAS,EACTS,eAAe,EACfC,qBACF,CAAC;MAED,MAAMK,YAAY,GAAGpC,eAAe,CAACmC,KAAK,CACxCH,0BAA0B,EAC1BH,WACF,CAAC;MAED,MAAM,CAACQ,iBAAiB,EAAEC,WAAW,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CACzDN,iBAAiB,EACjBE,YAAY,CACb,CAAC;MAEF,MAAMK,aAAa,GAAGJ,iBAAiB,GAAG,CAAC,CAAC;MAC5C,MAAMK,mBAAmB,GAAGL,iBAAiB,GAAG,CAAC,CAAC;MAClD,MAAMM,SAAS,GAAGL,WAAW,GAAG,CAAC,CAAC;MAElC,IAAI,CAACG,aAAa,IAAI,CAACC,mBAAmB,IAAI,CAACC,SAAS,EAAE;QACxD,MAAM,IAAIC,KAAK,CAAC,uBAAuB,CAAC;MAC1C;MAEA,IAAI,CAACC,eAAe,GAAGC,IAAI,CAACC,KAAK,CAC/B,MAAM1C,iBAAiB,CAAC,SAAS,GAAGqC,mBAAoB,CAC1D,CAAC;MACD,IAAI,CAACM,YAAY,GAAGC,MAAM,CAACC,OAAO,CAACP,SAAS,EAAEF,aAAa,CAAC;MAC5D,IAAI,CAACtB,eAAe,CAAC,IAAI,CAAC;MAC1B,IAAI,CAACgC,OAAO,GAAIC,IAAY,IAAK;QAC/B,IAAI,CAACA,IAAI,EAAE;UACT;QACF;QAEA,IACE9C,cAAc,CAAC+C,SAAS,IAAI,IAAI,CAACR,eAAe,IAChDO,IAAI,CAACE,OAAO,CAAC,IAAI,CAACT,eAAe,CAACU,SAAS,CAAC,IAAI,CAAC,EACjD;UACAH,IAAI,GAAGA,IAAI,CAACI,UAAU,CAAC,IAAI,CAACX,eAAe,CAACU,SAAS,EAAE,EAAE,CAAC;QAC5D;QACA,IACEjD,cAAc,CAACmD,SAAS,IAAI,IAAI,CAACZ,eAAe,IAChDO,IAAI,CAACE,OAAO,CAAC,IAAI,CAACT,eAAe,CAACa,SAAS,CAAC,IAAI,CAAC,EACjD;UACAN,IAAI,GAAGA,IAAI,CAACI,UAAU,CAAC,IAAI,CAACX,eAAe,CAACa,SAAS,EAAE,EAAE,CAAC;QAC5D;QACA,IAAIN,IAAI,CAACO,MAAM,KAAK,CAAC,EAAE;UACrB;QACF;QAEA,IAAI,CAAC3C,aAAa,CAACoC,IAAI,CAAC;QACxB,IAAI,CAACnC,gBAAgB,CAAC,IAAI,CAACN,SAAS,GAAGyC,IAAI,CAAC;MAC9C,CAAC;IACH,CAAC,CAAC,OAAOQ,CAAC,EAAE;MACV,IAAI,CAACzC,eAAe,CAAC,KAAK,CAAC;MAC3B,MAAM,IAAIyB,KAAK,CAAC1C,QAAQ,CAAC0D,CAAC,CAAC,CAAC;IAC9B;EACF;EAEOC,gBAAgBA,CAAC7C,aAAsC,EAAE;IAC9D,IAAI,CAACA,aAAa,GAAGA,aAAa;EACpC;EAEO8C,SAASA,CAAC;IACfpD,UAAU;IACVqD,WAAW;IACXC;EAKF,CAAC,EAAE;IACD,IAAI,CAACtD,UAAU,GAAG;MAAE,GAAGN,mBAAmB;MAAE,GAAGM;IAAW,CAAC;IAC3D,IAAI,CAACqD,WAAW,GAAGA,WAAW;IAE9B,IAAIC,gBAAgB,EAAEC,oBAAoB,EAAE;MAC1C,IAAI,CAACjB,YAAY,CAACkB,gBAAgB,CAACF,gBAAgB,CAACC,oBAAoB,CAAC;IAC3E;IACA,IAAID,gBAAgB,EAAEG,iBAAiB,EAAE;MACvC,IAAI,CAACnB,YAAY,CAACoB,eAAe,CAACJ,gBAAgB,CAACG,iBAAiB,CAAC;IACvE;;IAEA;IACA,IAAI,CAAClD,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEOiD,MAAMA,CAAA,EAAG;IACd,IAAI,IAAI,CAACxD,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CACb1C,QAAQ,CAACD,OAAO,CAACqE,eAAe,CAAC,GAC/B,+DACJ,CAAC;IACH;IACA,IAAI,CAACnB,OAAO,GAAG,MAAM,CAAC,CAAC;IACvB,IAAI,CAACH,YAAY,CAACuB,MAAM,CAAC,CAAC;IAC1B,IAAI,CAACpD,eAAe,CAAC,KAAK,CAAC;IAC3B,IAAI,CAACC,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEA,MAAaoD,OAAOA,CAACC,KAAa,EAAE;IAClC,IAAI,CAAC,IAAI,CAAC7D,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACyE,eAAe,CAAC,CAAC;IACpD;IACA,IAAI,IAAI,CAAC7D,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACqE,eAAe,CAAC,CAAC;IACpD;IACA,IAAI;MACF,IAAI,CAACrD,gBAAgB,CAAC,EAAE,CAAC;MACzB,IAAI,CAACG,oBAAoB,CAAC,IAAI,CAAC;MAC/B,MAAM,IAAI,CAAC4B,YAAY,CAAC2B,QAAQ,CAACF,KAAK,EAAE,IAAI,CAACtB,OAAO,CAAC;IACvD,CAAC,CAAC,OAAOS,CAAC,EAAE;MACV,MAAM,IAAIhB,KAAK,CAAC1C,QAAQ,CAAC0D,CAAC,CAAC,CAAC;IAC9B,CAAC,SAAS;MACR,IAAI,CAACxC,oBAAoB,CAAC,KAAK,CAAC;IAClC;EACF;EAEOwD,SAASA,CAAA,EAAG;IACjB,IAAI,CAAC5B,YAAY,CAAC4B,SAAS,CAAC,CAAC;EAC/B;EAEOC,sBAAsBA,CAAA,EAAW;IACtC,OAAO,IAAI,CAAC7B,YAAY,CAAC6B,sBAAsB,CAAC,CAAC;EACnD;EAEA,MAAaF,QAAQA,CAACG,QAAmB,EAAEC,KAAiB,EAAE;IAC5D,IAAI,CAAC,IAAI,CAACnE,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACyE,eAAe,CAAC,CAAC;IACpD;IACA,IAAII,QAAQ,CAACnB,MAAM,KAAK,CAAC,EAAE;MACzB,MAAM,IAAIf,KAAK,CAAC,yBAAyB,CAAC;IAC5C;IACA,IAAIkC,QAAQ,CAAC,CAAC,CAAC,IAAIA,QAAQ,CAAC,CAAC,CAAC,CAACE,IAAI,KAAK,QAAQ,EAAE;MAChDxE,MAAM,CAACc,IAAI,CACT,0LACF,CAAC;IACH;IAEA,MAAM2D,YAAoB,GAAG,IAAI,CAACC,iBAAiB,CACjDJ,QAAQ,EACR,IAAI,CAACjC,eAAe,EACpBkC,KAAK;IACL;IACA;MAAEI,qBAAqB,EAAE,KAAK;MAAEC,qBAAqB,EAAE;IAAK,CAC9D,CAAC;IAED,MAAM,IAAI,CAACZ,OAAO,CAACS,YAAY,CAAC;EAClC;EAEA,MAAaI,WAAWA,CAACC,OAAe,EAAE;IACxC,IAAI,CAACpE,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;MAAEyE,OAAO,EAAED,OAAO;MAAEN,IAAI,EAAE;IAAO,CAAC,CACnC,CAAC;IAEF,MAAMQ,wBAAmC,GAAG,CAC1C;MAAED,OAAO,EAAE,IAAI,CAAC7E,UAAU,CAAC+E,YAAY;MAAET,IAAI,EAAE;IAAS,CAAC,EACzD,GAAG,IAAI,CAAClE,eAAe,CAAC4E,KAAK,CAAC,CAAC,IAAI,CAAChF,UAAU,CAACiF,mBAAmB,CAAC,CACpE;IAED,MAAM,IAAI,CAAChB,QAAQ,CAACa,wBAAwB,EAAE,IAAI,CAACzB,WAAW,EAAEgB,KAAK,CAAC;IAEtE,IAAI,CAAC,IAAI,CAAChB,WAAW,IAAI,IAAI,CAACA,WAAW,CAAC6B,gBAAgB,EAAE;MAC1D,IAAI,CAAC1E,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;QAAEyE,OAAO,EAAE,IAAI,CAAC5E,SAAS;QAAEqE,IAAI,EAAE;MAAY,CAAC,CAC/C,CAAC;IACJ;IACA,IAAI,CAAC,IAAI,CAACjB,WAAW,EAAE;MACrB;IACF;IAEA,MAAM8B,SAAS,GAAGtF,aAAa,CAAC,IAAI,CAACI,SAAS,CAAC;IAE/C,KAAK,MAAMmF,QAAQ,IAAID,SAAS,EAAE;MAChC,IAAI,CAAC9B,WAAW,CACbgC,mBAAmB,CAACD,QAAQ,CAAC,CAC7BE,IAAI,CAAEC,YAA2B,IAAK;QACrC,IAAIA,YAAY,EAAE;UAChB,IAAI,CAAC/E,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;YAAEyE,OAAO,EAAEU,YAAY;YAAEjB,IAAI,EAAE;UAAY,CAAC,CAC7C,CAAC;QACJ;MACF,CAAC,CAAC;IACN;EACF;EAEOkB,aAAaA,CAACC,KAAa,EAAE;IAClC;IACA;IACA,MAAMC,iBAAiB,GAAG,IAAI,CAACtF,eAAe,CAAC4E,KAAK,CAAC,CAAC,EAAES,KAAK,CAAC;IAE9D,IAAI,CAACjF,sBAAsB,CAACkF,iBAAiB,CAAC;EAChD;EAEQlB,iBAAiBA,CACvBJ,QAAmB,EACnBjC,eAAoB,EACpBkC,KAAiB,EACjBsB,aAAsB,EACd;IACR,IAAI,CAACxD,eAAe,CAACyD,aAAa,EAAE;MAClC,MAAM1D,KAAK,CAAC,gDAAgD,CAAC;IAC/D;IACA,MAAM2D,QAAQ,GAAG,IAAIpG,QAAQ,CAAC0C,eAAe,CAACyD,aAAa,CAAC;IAE5D,MAAME,aAAa,GAAGC,MAAM,CAACC,WAAW,CACtCD,MAAM,CAACE,IAAI,CAACrG,cAAc,CAAC,CACxBsG,MAAM,CAAEC,GAAG,IAAKA,GAAG,IAAIhE,eAAe,CAAC,CACvCiE,GAAG,CAAED,GAAG,IAAK,CAACA,GAAG,EAAEhE,eAAe,CAACgE,GAAG,CAAC,CAAC,CAC7C,CAAC;IAED,MAAME,MAAM,GAAGR,QAAQ,CAACS,MAAM,CAAC;MAC7BlC,QAAQ;MACRC,KAAK;MACL,GAAGsB,aAAa;MAChB,GAAGG;IACL,CAAC,CAAC;IACF,OAAOO,MAAM;EACf;AACF","ignoreList":[]}
|
|
@@ -51,10 +51,12 @@ export const useLLM = ({
|
|
|
51
51
|
// memoization of returned functions
|
|
52
52
|
const configure = useCallback(({
|
|
53
53
|
chatConfig,
|
|
54
|
-
toolsConfig
|
|
54
|
+
toolsConfig,
|
|
55
|
+
generationConfig
|
|
55
56
|
}) => controllerInstance.configure({
|
|
56
57
|
chatConfig,
|
|
57
|
-
toolsConfig
|
|
58
|
+
toolsConfig,
|
|
59
|
+
generationConfig
|
|
58
60
|
}), [controllerInstance]);
|
|
59
61
|
const generate = useCallback((messages, tools) => {
|
|
60
62
|
setResponse('');
|
|
@@ -66,6 +68,7 @@ export const useLLM = ({
|
|
|
66
68
|
}, [controllerInstance]);
|
|
67
69
|
const deleteMessage = useCallback(index => controllerInstance.deleteMessage(index), [controllerInstance]);
|
|
68
70
|
const interrupt = useCallback(() => controllerInstance.interrupt(), [controllerInstance]);
|
|
71
|
+
const getGeneratedTokenCount = useCallback(() => controllerInstance.getGeneratedTokenCount(), [controllerInstance]);
|
|
69
72
|
return {
|
|
70
73
|
messageHistory,
|
|
71
74
|
response,
|
|
@@ -74,6 +77,7 @@ export const useLLM = ({
|
|
|
74
77
|
isGenerating,
|
|
75
78
|
downloadProgress,
|
|
76
79
|
error,
|
|
80
|
+
getGeneratedTokenCount: getGeneratedTokenCount,
|
|
77
81
|
configure: configure,
|
|
78
82
|
generate: generate,
|
|
79
83
|
sendMessage: sendMessage,
|