node-llama-cpp 3.0.0-beta.12 → 3.0.0-beta.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ChatWrapper.d.ts +1 -0
- package/dist/ChatWrapper.js +2 -1
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/TemplateChatWrapper.d.ts +68 -0
- package/dist/TemplateChatWrapper.js +239 -0
- package/dist/TemplateChatWrapper.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +15 -5
- package/dist/bindings/Llama.d.ts +9 -3
- package/dist/bindings/Llama.js +61 -19
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +11 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +14 -18
- package/dist/bindings/getLlama.js +210 -78
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +11 -5
- package/dist/bindings/types.js +22 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +13 -3
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
- package/dist/bindings/utils/compileLLamaCpp.js +136 -74
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +300 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +36 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +12 -6
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +9 -2
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +10 -4
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +98 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.d.ts +18 -0
- package/dist/chatWrappers/GemmaChatWrapper.js +86 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +3 -0
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
- package/dist/cli/cli.js +2 -0
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +5 -5
- package/dist/cli/commands/BuildCommand.js +78 -60
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.js +31 -14
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.js +30 -13
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +3 -9
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/DownloadCommand.d.ts +5 -5
- package/dist/cli/commands/DownloadCommand.js +97 -56
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.js +30 -13
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/InspectCommand.d.ts +7 -0
- package/dist/cli/commands/InspectCommand.js +113 -0
- package/dist/cli/commands/InspectCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +2 -0
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/config.d.ts +4 -4
- package/dist/config.js +11 -12
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +5 -8
- package/dist/evaluator/LlamaContext/LlamaContext.js +111 -65
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +13 -6
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +6 -5
- package/dist/evaluator/LlamaEmbeddingContext.js +32 -22
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.js +1 -0
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +16 -16
- package/dist/evaluator/LlamaModel.js +95 -20
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/gguf/GGUFInsights.d.ts +28 -0
- package/dist/gguf/GGUFInsights.js +58 -0
- package/dist/gguf/GGUFInsights.js.map +1 -0
- package/dist/gguf/GGUFMetadata.d.ts +19 -0
- package/dist/gguf/GGUFMetadata.js +38 -0
- package/dist/gguf/GGUFMetadata.js.map +1 -0
- package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGGUFMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGGUFMagicError.js.map +1 -0
- package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +3 -0
- package/dist/gguf/errors/MetadataNotParsedYetError.js +6 -0
- package/dist/gguf/errors/MetadataNotParsedYetError.js.map +1 -0
- package/dist/gguf/errors/MissingNodeLlamaError.d.ts +3 -0
- package/dist/gguf/errors/MissingNodeLlamaError.js +6 -0
- package/dist/gguf/errors/MissingNodeLlamaError.js.map +1 -0
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +5 -0
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +11 -0
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js +8 -0
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +1 -0
- package/dist/gguf/ggufParser/GGUFParser.d.ts +18 -0
- package/dist/gguf/ggufParser/GGUFParser.js +123 -0
- package/dist/gguf/ggufParser/GGUFParser.js.map +1 -0
- package/dist/gguf/ggufParser/GGUFTypes.d.ts +257 -0
- package/dist/gguf/ggufParser/GGUFTypes.js +2 -0
- package/dist/gguf/ggufParser/GGUFTypes.js.map +1 -0
- package/dist/gguf/ggufParser/checkArchitecture.d.ts +14 -0
- package/dist/gguf/ggufParser/checkArchitecture.js +74 -0
- package/dist/gguf/ggufParser/checkArchitecture.js.map +1 -0
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +38 -0
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +83 -0
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +1 -0
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +14 -0
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +35 -0
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +1 -0
- package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +15 -0
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js +40 -0
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +1 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/LlamaText.js +2 -2
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/cmake.js +23 -10
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -3
- package/dist/utils/getBuildDefaults.js +2 -4
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
- package/dist/utils/getConsoleLogPrefix.js +5 -4
- package/dist/utils/getConsoleLogPrefix.js.map +1 -1
- package/dist/utils/mergeUnionTypes.d.ts +6 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -2
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +55 -7
- package/dist/utils/spawnCommand.js.map +1 -1
- package/llama/CMakeLists.txt +11 -5
- package/llama/addon.cpp +700 -83
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +14 -9
- package/dist/cli/utils/logEnabledComputeLayers.d.ts +0 -8
- package/dist/cli/utils/logEnabledComputeLayers.js +0 -11
- package/dist/cli/utils/logEnabledComputeLayers.js.map +0 -1
- package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
- package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64-vulkan/.buildMetadata.json +0 -1
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7022
- package/llamaBins/mac-x64/.buildMetadata.json +0 -1
- package/llamaBins/win-x64/.buildMetadata.json +0 -1
- package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/win-x64-vulkan/.buildMetadata.json +0 -1
package/llama/addon.cpp
CHANGED
|
@@ -35,10 +35,77 @@ void addonCallJsLogCallback(
|
|
|
35
35
|
using AddonThreadSafeLogCallbackFunction =
|
|
36
36
|
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
|
37
37
|
|
|
38
|
+
|
|
39
|
+
struct addon_progress_event {
|
|
40
|
+
public:
|
|
41
|
+
const float progress;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
45
|
+
void addonCallJsProgressCallback(
|
|
46
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
47
|
+
);
|
|
48
|
+
using AddonThreadSafeProgressEventCallbackFunction =
|
|
49
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
39
53
|
bool addonJsLoggerCallbackSet = false;
|
|
40
54
|
int addonLoggerLogLevel = 5;
|
|
41
55
|
bool backendInitialized = false;
|
|
56
|
+
bool backendDisposed = false;
|
|
57
|
+
|
|
58
|
+
void addonCallJsProgressCallback(
|
|
59
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
60
|
+
) {
|
|
61
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
62
|
+
try {
|
|
63
|
+
callback.Call({Napi::Number::New(env, data->progress)});
|
|
64
|
+
} catch (const Napi::Error& e) {}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (data != nullptr) {
|
|
68
|
+
delete data;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
|
|
73
|
+
uint64_t totalSize = 0;
|
|
74
|
+
|
|
75
|
+
if (embd) {
|
|
76
|
+
totalSize += sizeof(float) * n_tokens_alloc * embd;
|
|
77
|
+
} else {
|
|
78
|
+
totalSize += sizeof(llama_token) * n_tokens_alloc;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
totalSize += sizeof(llama_pos) * n_tokens_alloc;
|
|
82
|
+
totalSize += sizeof(int32_t) * n_tokens_alloc;
|
|
83
|
+
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
|
|
84
|
+
|
|
85
|
+
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
|
|
86
|
+
|
|
87
|
+
totalSize += sizeof(int8_t) * n_tokens_alloc;
|
|
88
|
+
|
|
89
|
+
return totalSize;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
|
|
93
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
94
|
+
while (size > 0) {
|
|
95
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
96
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
|
|
97
|
+
size -= adjustSize;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
|
|
102
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
103
|
+
while (size > 0) {
|
|
104
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
105
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
|
|
106
|
+
size -= adjustSize;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
42
109
|
|
|
43
110
|
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
|
|
44
111
|
std::vector<char> result(8, 0);
|
|
@@ -107,6 +174,22 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
107
174
|
return result;
|
|
108
175
|
}
|
|
109
176
|
|
|
177
|
+
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
|
178
|
+
#ifdef GPU_INFO_USE_CUBLAS
|
|
179
|
+
return Napi::String::New(info.Env(), "cuda");
|
|
180
|
+
#endif
|
|
181
|
+
|
|
182
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
183
|
+
return Napi::String::New(info.Env(), "vulkan");
|
|
184
|
+
#endif
|
|
185
|
+
|
|
186
|
+
#ifdef GPU_INFO_USE_METAL
|
|
187
|
+
return Napi::String::New(info.Env(), "metal");
|
|
188
|
+
#endif
|
|
189
|
+
|
|
190
|
+
return info.Env().Undefined();
|
|
191
|
+
}
|
|
192
|
+
|
|
110
193
|
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
111
194
|
auto tokenType = llama_token_get_type(model, token);
|
|
112
195
|
|
|
@@ -120,28 +203,49 @@ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* mod
|
|
|
120
203
|
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
121
204
|
auto tokenType = llama_token_get_type(model, token);
|
|
122
205
|
|
|
123
|
-
if (tokenType != LLAMA_TOKEN_TYPE_CONTROL) {
|
|
206
|
+
if (tokenType != LLAMA_TOKEN_TYPE_CONTROL && tokenType != LLAMA_TOKEN_TYPE_USER_DEFINED) {
|
|
124
207
|
return Napi::Number::From(info.Env(), -1);
|
|
125
208
|
}
|
|
126
209
|
|
|
127
210
|
return Napi::Number::From(info.Env(), token);
|
|
128
211
|
}
|
|
129
212
|
|
|
213
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data);
|
|
214
|
+
|
|
130
215
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
131
216
|
public:
|
|
132
217
|
llama_model_params model_params;
|
|
133
218
|
llama_model* model;
|
|
219
|
+
uint64_t loadedModelSize = 0;
|
|
220
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
221
|
+
bool hasAddonExportsRef = false;
|
|
222
|
+
|
|
223
|
+
std::string modelPath;
|
|
224
|
+
bool modelLoaded = false;
|
|
225
|
+
bool abortModelLoad = false;
|
|
226
|
+
bool model_load_stopped = false;
|
|
227
|
+
float rawModelLoadPercentage = 0;
|
|
228
|
+
unsigned modelLoadPercentage = 0;
|
|
229
|
+
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
|
|
230
|
+
bool onLoadProgressEventCallbackSet = false;
|
|
231
|
+
bool hasLoadAbortSignal = false;
|
|
232
|
+
|
|
134
233
|
bool disposed = false;
|
|
135
234
|
|
|
136
235
|
AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
|
|
137
236
|
model_params = llama_model_default_params();
|
|
138
237
|
|
|
139
238
|
// Get the model path
|
|
140
|
-
|
|
239
|
+
modelPath = info[0].As<Napi::String>().Utf8Value();
|
|
141
240
|
|
|
142
241
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
143
242
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
144
243
|
|
|
244
|
+
if (options.Has("addonExports")) {
|
|
245
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
246
|
+
hasAddonExportsRef = true;
|
|
247
|
+
}
|
|
248
|
+
|
|
145
249
|
if (options.Has("gpuLayers")) {
|
|
146
250
|
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
|
147
251
|
}
|
|
@@ -157,13 +261,37 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
157
261
|
if (options.Has("useMlock")) {
|
|
158
262
|
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
|
|
159
263
|
}
|
|
160
|
-
}
|
|
161
264
|
|
|
162
|
-
|
|
265
|
+
if (options.Has("onLoadProgress")) {
|
|
266
|
+
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
|
|
267
|
+
if (onLoadProgressJSCallback.IsFunction()) {
|
|
268
|
+
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
269
|
+
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
|
|
270
|
+
info.Env(),
|
|
271
|
+
onLoadProgressJSCallback,
|
|
272
|
+
"onLoadProgressCallback",
|
|
273
|
+
0,
|
|
274
|
+
1,
|
|
275
|
+
context,
|
|
276
|
+
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
|
|
277
|
+
addonModel->onLoadProgressEventCallbackSet = false;
|
|
278
|
+
|
|
279
|
+
delete ctx;
|
|
280
|
+
},
|
|
281
|
+
this
|
|
282
|
+
);
|
|
283
|
+
onLoadProgressEventCallbackSet = true;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
163
286
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
287
|
+
if (options.Has("hasLoadAbortSignal")) {
|
|
288
|
+
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
|
|
292
|
+
model_params.progress_callback_user_data = &(*this);
|
|
293
|
+
model_params.progress_callback = llamaModelParamsProgressCallback;
|
|
294
|
+
}
|
|
167
295
|
}
|
|
168
296
|
}
|
|
169
297
|
|
|
@@ -176,23 +304,31 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
176
304
|
return;
|
|
177
305
|
}
|
|
178
306
|
|
|
179
|
-
llama_free_model(model);
|
|
180
307
|
disposed = true;
|
|
181
|
-
|
|
308
|
+
if (modelLoaded) {
|
|
309
|
+
modelLoaded = false;
|
|
310
|
+
llama_free_model(model);
|
|
182
311
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
return info.Env().Undefined();
|
|
312
|
+
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
|
313
|
+
loadedModelSize = 0;
|
|
186
314
|
}
|
|
187
315
|
|
|
188
|
-
|
|
316
|
+
if (hasAddonExportsRef) {
|
|
317
|
+
addonExportsRef.Unref();
|
|
318
|
+
hasAddonExportsRef = false;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
189
321
|
|
|
322
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
323
|
+
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
|
|
324
|
+
abortModelLoad = true;
|
|
190
325
|
return info.Env().Undefined();
|
|
191
326
|
}
|
|
327
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
192
328
|
|
|
193
329
|
Napi::Value Tokenize(const Napi::CallbackInfo& info) {
|
|
194
330
|
if (disposed) {
|
|
195
|
-
Napi::Error::New(info.Env(), "
|
|
331
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
196
332
|
return info.Env().Undefined();
|
|
197
333
|
}
|
|
198
334
|
|
|
@@ -210,7 +346,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
210
346
|
}
|
|
211
347
|
Napi::Value Detokenize(const Napi::CallbackInfo& info) {
|
|
212
348
|
if (disposed) {
|
|
213
|
-
Napi::Error::New(info.Env(), "
|
|
349
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
214
350
|
return info.Env().Undefined();
|
|
215
351
|
}
|
|
216
352
|
|
|
@@ -235,7 +371,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
235
371
|
|
|
236
372
|
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
|
|
237
373
|
if (disposed) {
|
|
238
|
-
Napi::Error::New(info.Env(), "
|
|
374
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
239
375
|
return info.Env().Undefined();
|
|
240
376
|
}
|
|
241
377
|
|
|
@@ -244,7 +380,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
244
380
|
|
|
245
381
|
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
246
382
|
if (disposed) {
|
|
247
|
-
Napi::Error::New(info.Env(), "
|
|
383
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
248
384
|
return info.Env().Undefined();
|
|
249
385
|
}
|
|
250
386
|
|
|
@@ -253,7 +389,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
253
389
|
|
|
254
390
|
Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
|
|
255
391
|
if (disposed) {
|
|
256
|
-
Napi::Error::New(info.Env(), "
|
|
392
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
257
393
|
return info.Env().Undefined();
|
|
258
394
|
}
|
|
259
395
|
|
|
@@ -262,7 +398,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
262
398
|
|
|
263
399
|
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
|
|
264
400
|
if (disposed) {
|
|
265
|
-
Napi::Error::New(info.Env(), "
|
|
401
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
266
402
|
return info.Env().Undefined();
|
|
267
403
|
}
|
|
268
404
|
|
|
@@ -271,7 +407,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
271
407
|
|
|
272
408
|
Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
|
|
273
409
|
if (disposed) {
|
|
274
|
-
Napi::Error::New(info.Env(), "
|
|
410
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
275
411
|
return info.Env().Undefined();
|
|
276
412
|
}
|
|
277
413
|
|
|
@@ -283,7 +419,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
283
419
|
|
|
284
420
|
Napi::Value TokenBos(const Napi::CallbackInfo& info) {
|
|
285
421
|
if (disposed) {
|
|
286
|
-
Napi::Error::New(info.Env(), "
|
|
422
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
287
423
|
return info.Env().Undefined();
|
|
288
424
|
}
|
|
289
425
|
|
|
@@ -291,7 +427,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
291
427
|
}
|
|
292
428
|
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
293
429
|
if (disposed) {
|
|
294
|
-
Napi::Error::New(info.Env(), "
|
|
430
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
295
431
|
return info.Env().Undefined();
|
|
296
432
|
}
|
|
297
433
|
|
|
@@ -299,7 +435,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
299
435
|
}
|
|
300
436
|
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
301
437
|
if (disposed) {
|
|
302
|
-
Napi::Error::New(info.Env(), "
|
|
438
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
303
439
|
return info.Env().Undefined();
|
|
304
440
|
}
|
|
305
441
|
|
|
@@ -307,7 +443,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
307
443
|
}
|
|
308
444
|
Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
|
|
309
445
|
if (disposed) {
|
|
310
|
-
Napi::Error::New(info.Env(), "
|
|
446
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
311
447
|
return info.Env().Undefined();
|
|
312
448
|
}
|
|
313
449
|
|
|
@@ -315,7 +451,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
315
451
|
}
|
|
316
452
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
|
|
317
453
|
if (disposed) {
|
|
318
|
-
Napi::Error::New(info.Env(), "
|
|
454
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
319
455
|
return info.Env().Undefined();
|
|
320
456
|
}
|
|
321
457
|
|
|
@@ -323,7 +459,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
323
459
|
}
|
|
324
460
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
|
|
325
461
|
if (disposed) {
|
|
326
|
-
Napi::Error::New(info.Env(), "
|
|
462
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
327
463
|
return info.Env().Undefined();
|
|
328
464
|
}
|
|
329
465
|
|
|
@@ -331,7 +467,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
331
467
|
}
|
|
332
468
|
Napi::Value EotToken(const Napi::CallbackInfo& info) {
|
|
333
469
|
if (disposed) {
|
|
334
|
-
Napi::Error::New(info.Env(), "
|
|
470
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
335
471
|
return info.Env().Undefined();
|
|
336
472
|
}
|
|
337
473
|
|
|
@@ -339,7 +475,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
339
475
|
}
|
|
340
476
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
341
477
|
if (disposed) {
|
|
342
|
-
Napi::Error::New(info.Env(), "
|
|
478
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
343
479
|
return info.Env().Undefined();
|
|
344
480
|
}
|
|
345
481
|
|
|
@@ -358,7 +494,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
358
494
|
|
|
359
495
|
Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
|
|
360
496
|
if (disposed) {
|
|
361
|
-
Napi::Error::New(info.Env(), "
|
|
497
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
362
498
|
return info.Env().Undefined();
|
|
363
499
|
}
|
|
364
500
|
|
|
@@ -386,6 +522,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
386
522
|
exports.Env(),
|
|
387
523
|
"AddonModel",
|
|
388
524
|
{
|
|
525
|
+
InstanceMethod("init", &AddonModel::Init),
|
|
526
|
+
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
|
|
389
527
|
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
|
390
528
|
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
|
391
529
|
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
|
@@ -410,9 +548,166 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
410
548
|
}
|
|
411
549
|
};
|
|
412
550
|
|
|
551
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
|
|
552
|
+
AddonModel* addonModel = (AddonModel *) user_data;
|
|
553
|
+
unsigned percentage = (unsigned) (100 * progress);
|
|
554
|
+
|
|
555
|
+
if (percentage > addonModel->modelLoadPercentage) {
|
|
556
|
+
addonModel->modelLoadPercentage = percentage;
|
|
557
|
+
|
|
558
|
+
// original llama.cpp logs
|
|
559
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
|
|
560
|
+
if (percentage >= 100) {
|
|
561
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
if (progress > addonModel->rawModelLoadPercentage) {
|
|
566
|
+
addonModel->rawModelLoadPercentage = progress;
|
|
567
|
+
|
|
568
|
+
if (addonModel->onLoadProgressEventCallbackSet) {
|
|
569
|
+
addon_progress_event* data = new addon_progress_event {
|
|
570
|
+
progress
|
|
571
|
+
};
|
|
572
|
+
|
|
573
|
+
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
|
|
574
|
+
|
|
575
|
+
if (status != napi_ok) {
|
|
576
|
+
delete data;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
return !(addonModel->abortModelLoad);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
|
585
|
+
public:
|
|
586
|
+
AddonModel* model;
|
|
587
|
+
|
|
588
|
+
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
589
|
+
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
|
|
590
|
+
model(model),
|
|
591
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
592
|
+
model->Ref();
|
|
593
|
+
}
|
|
594
|
+
~AddonModelLoadModelWorker() {
|
|
595
|
+
model->Unref();
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
Napi::Promise GetPromise() {
|
|
599
|
+
return deferred.Promise();
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
protected:
|
|
603
|
+
Napi::Promise::Deferred deferred;
|
|
604
|
+
|
|
605
|
+
void Execute() {
|
|
606
|
+
try {
|
|
607
|
+
model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
|
|
608
|
+
|
|
609
|
+
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
|
610
|
+
} catch (const std::exception& e) {
|
|
611
|
+
SetError(e.what());
|
|
612
|
+
} catch(...) {
|
|
613
|
+
SetError("Unknown error when calling \"llama_load_model_from_file\"");
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
void OnOK() {
|
|
617
|
+
if (model->modelLoaded) {
|
|
618
|
+
uint64_t modelSize = llama_model_size(model->model);
|
|
619
|
+
adjustNapiExternalMemoryAdd(Env(), modelSize);
|
|
620
|
+
model->loadedModelSize = modelSize;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
|
|
624
|
+
if (model->onLoadProgressEventCallbackSet) {
|
|
625
|
+
model->addonThreadSafeOnLoadProgressEventCallback.Release();
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
void OnError(const Napi::Error& err) {
|
|
629
|
+
deferred.Reject(err.Value());
|
|
630
|
+
}
|
|
631
|
+
};
|
|
632
|
+
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
|
633
|
+
public:
|
|
634
|
+
AddonModel* model;
|
|
635
|
+
|
|
636
|
+
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
637
|
+
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
|
|
638
|
+
model(model),
|
|
639
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
640
|
+
model->Ref();
|
|
641
|
+
}
|
|
642
|
+
~AddonModelUnloadModelWorker() {
|
|
643
|
+
model->Unref();
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
Napi::Promise GetPromise() {
|
|
647
|
+
return deferred.Promise();
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
protected:
|
|
651
|
+
Napi::Promise::Deferred deferred;
|
|
652
|
+
|
|
653
|
+
void Execute() {
|
|
654
|
+
try {
|
|
655
|
+
llama_free_model(model->model);
|
|
656
|
+
model->modelLoaded = false;
|
|
657
|
+
|
|
658
|
+
model->dispose();
|
|
659
|
+
} catch (const std::exception& e) {
|
|
660
|
+
SetError(e.what());
|
|
661
|
+
} catch(...) {
|
|
662
|
+
SetError("Unknown error when calling \"llama_free_model\"");
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
void OnOK() {
|
|
666
|
+
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
|
|
667
|
+
model->loadedModelSize = 0;
|
|
668
|
+
|
|
669
|
+
deferred.Resolve(Env().Undefined());
|
|
670
|
+
}
|
|
671
|
+
void OnError(const Napi::Error& err) {
|
|
672
|
+
deferred.Reject(err.Value());
|
|
673
|
+
}
|
|
674
|
+
};
|
|
675
|
+
|
|
676
|
+
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
|
|
677
|
+
if (disposed) {
|
|
678
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
679
|
+
return info.Env().Undefined();
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
|
|
683
|
+
worker->Queue();
|
|
684
|
+
return worker->GetPromise();
|
|
685
|
+
}
|
|
686
|
+
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
|
|
687
|
+
if (disposed) {
|
|
688
|
+
return info.Env().Undefined();
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
if (modelLoaded) {
|
|
692
|
+
modelLoaded = false;
|
|
693
|
+
|
|
694
|
+
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
|
|
695
|
+
worker->Queue();
|
|
696
|
+
return worker->GetPromise();
|
|
697
|
+
} else {
|
|
698
|
+
dispose();
|
|
699
|
+
|
|
700
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
701
|
+
deferred.Resolve(info.Env().Undefined());
|
|
702
|
+
return deferred.Promise();
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
413
706
|
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
414
707
|
public:
|
|
415
708
|
grammar_parser::parse_state parsed_grammar;
|
|
709
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
710
|
+
bool hasAddonExportsRef = false;
|
|
416
711
|
|
|
417
712
|
AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
|
|
418
713
|
// Get the model path
|
|
@@ -422,6 +717,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
422
717
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
423
718
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
424
719
|
|
|
720
|
+
if (options.Has("addonExports")) {
|
|
721
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
722
|
+
hasAddonExportsRef = true;
|
|
723
|
+
}
|
|
724
|
+
|
|
425
725
|
if (options.Has("printGrammar")) {
|
|
426
726
|
should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
|
|
427
727
|
}
|
|
@@ -439,6 +739,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
439
739
|
}
|
|
440
740
|
}
|
|
441
741
|
|
|
742
|
+
~AddonGrammar() {
|
|
743
|
+
if (hasAddonExportsRef) {
|
|
744
|
+
addonExportsRef.Unref();
|
|
745
|
+
hasAddonExportsRef = false;
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
442
749
|
static void init(Napi::Object exports) {
|
|
443
750
|
exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
|
|
444
751
|
}
|
|
@@ -477,9 +784,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
477
784
|
llama_context_params context_params;
|
|
478
785
|
llama_context* ctx;
|
|
479
786
|
llama_batch batch;
|
|
787
|
+
uint64_t batchMemorySize = 0;
|
|
480
788
|
bool has_batch = false;
|
|
481
789
|
int32_t batch_n_tokens = 0;
|
|
482
790
|
int n_cur = 0;
|
|
791
|
+
|
|
792
|
+
uint64_t loadedContextMemorySize = 0;
|
|
793
|
+
bool contextLoaded = false;
|
|
794
|
+
|
|
483
795
|
bool disposed = false;
|
|
484
796
|
|
|
485
797
|
AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
|
|
@@ -507,10 +819,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
507
819
|
|
|
508
820
|
if (options.Has("batchSize")) {
|
|
509
821
|
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
|
822
|
+
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
|
|
510
823
|
}
|
|
511
824
|
|
|
512
|
-
if (options.Has("
|
|
513
|
-
context_params.
|
|
825
|
+
if (options.Has("embeddings")) {
|
|
826
|
+
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
514
827
|
}
|
|
515
828
|
|
|
516
829
|
if (options.Has("threads")) {
|
|
@@ -521,9 +834,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
521
834
|
context_params.n_threads_batch = resolved_n_threads;
|
|
522
835
|
}
|
|
523
836
|
}
|
|
524
|
-
|
|
525
|
-
ctx = llama_new_context_with_model(model->model, context_params);
|
|
526
|
-
Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
|
|
527
837
|
}
|
|
528
838
|
~AddonContext() {
|
|
529
839
|
dispose();
|
|
@@ -534,13 +844,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
534
844
|
return;
|
|
535
845
|
}
|
|
536
846
|
|
|
537
|
-
|
|
538
|
-
|
|
847
|
+
disposed = true;
|
|
848
|
+
if (contextLoaded) {
|
|
849
|
+
contextLoaded = false;
|
|
850
|
+
llama_free(ctx);
|
|
851
|
+
|
|
852
|
+
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
|
|
853
|
+
loadedContextMemorySize = 0;
|
|
854
|
+
}
|
|
855
|
+
|
|
539
856
|
model->Unref();
|
|
540
857
|
|
|
541
858
|
disposeBatch();
|
|
542
|
-
|
|
543
|
-
disposed = true;
|
|
544
859
|
}
|
|
545
860
|
void disposeBatch() {
|
|
546
861
|
if (!has_batch) {
|
|
@@ -550,16 +865,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
550
865
|
llama_batch_free(batch);
|
|
551
866
|
has_batch = false;
|
|
552
867
|
batch_n_tokens = 0;
|
|
868
|
+
|
|
869
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
|
|
870
|
+
batchMemorySize = 0;
|
|
553
871
|
}
|
|
554
|
-
Napi::Value Dispose(const Napi::CallbackInfo& info) {
|
|
555
|
-
if (disposed) {
|
|
556
|
-
return info.Env().Undefined();
|
|
557
|
-
}
|
|
558
872
|
|
|
559
|
-
|
|
873
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
874
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
560
875
|
|
|
561
|
-
return info.Env().Undefined();
|
|
562
|
-
}
|
|
563
876
|
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
|
|
564
877
|
if (disposed) {
|
|
565
878
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -584,6 +897,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
584
897
|
has_batch = true;
|
|
585
898
|
batch_n_tokens = n_tokens;
|
|
586
899
|
|
|
900
|
+
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
|
|
901
|
+
if (newBatchMemorySize > batchMemorySize) {
|
|
902
|
+
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
|
|
903
|
+
batchMemorySize = newBatchMemorySize;
|
|
904
|
+
} else if (newBatchMemorySize < batchMemorySize) {
|
|
905
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
|
|
906
|
+
batchMemorySize = newBatchMemorySize;
|
|
907
|
+
}
|
|
908
|
+
|
|
587
909
|
return info.Env().Undefined();
|
|
588
910
|
}
|
|
589
911
|
Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
|
|
@@ -632,7 +954,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
632
954
|
|
|
633
955
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
634
956
|
|
|
635
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
957
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
958
|
+
|
|
959
|
+
if (!result) {
|
|
960
|
+
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
|
|
961
|
+
return info.Env().Undefined();
|
|
962
|
+
}
|
|
636
963
|
|
|
637
964
|
return info.Env().Undefined();
|
|
638
965
|
}
|
|
@@ -646,9 +973,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
646
973
|
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
|
647
974
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
648
975
|
|
|
649
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
976
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
650
977
|
|
|
651
|
-
return info.Env()
|
|
978
|
+
return Napi::Boolean::New(info.Env(), result);
|
|
652
979
|
}
|
|
653
980
|
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
|
|
654
981
|
if (disposed) {
|
|
@@ -661,7 +988,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
661
988
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
662
989
|
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
|
|
663
990
|
|
|
664
|
-
|
|
991
|
+
llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
|
|
665
992
|
|
|
666
993
|
return info.Env().Undefined();
|
|
667
994
|
}
|
|
@@ -686,8 +1013,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
686
1013
|
return info.Env().Undefined();
|
|
687
1014
|
}
|
|
688
1015
|
|
|
1016
|
+
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
|
|
1017
|
+
|
|
1018
|
+
if (inputTokensLength <= 0) {
|
|
1019
|
+
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
|
|
1020
|
+
return info.Env().Undefined();
|
|
1021
|
+
}
|
|
1022
|
+
|
|
689
1023
|
const int n_embd = llama_n_embd(model->model);
|
|
690
|
-
const auto* embeddings =
|
|
1024
|
+
const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
|
|
1025
|
+
if (embeddings == NULL) {
|
|
1026
|
+
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
|
|
1027
|
+
|
|
1028
|
+
if (embeddings == NULL) {
|
|
1029
|
+
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
|
|
1030
|
+
return info.Env().Undefined();
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
691
1033
|
|
|
692
1034
|
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
|
|
693
1035
|
for (size_t i = 0; i < n_embd; ++i) {
|
|
@@ -710,6 +1052,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
710
1052
|
exports.Env(),
|
|
711
1053
|
"AddonContext",
|
|
712
1054
|
{
|
|
1055
|
+
InstanceMethod("init", &AddonContext::Init),
|
|
713
1056
|
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
|
|
714
1057
|
InstanceMethod("initBatch", &AddonContext::InitBatch),
|
|
715
1058
|
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
|
|
@@ -729,53 +1072,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
729
1072
|
};
|
|
730
1073
|
|
|
731
1074
|
|
|
732
|
-
class AddonContextDecodeBatchWorker : Napi::AsyncWorker
|
|
1075
|
+
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
|
|
733
1076
|
public:
|
|
734
1077
|
AddonContext* ctx;
|
|
735
1078
|
|
|
736
|
-
AddonContextDecodeBatchWorker(const Napi::
|
|
737
|
-
: Napi::AsyncWorker(
|
|
1079
|
+
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
|
|
1080
|
+
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
|
|
738
1081
|
ctx(ctx),
|
|
739
|
-
Napi::Promise::Deferred(
|
|
1082
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
740
1083
|
ctx->Ref();
|
|
741
1084
|
}
|
|
742
1085
|
~AddonContextDecodeBatchWorker() {
|
|
743
1086
|
ctx->Unref();
|
|
744
1087
|
}
|
|
745
|
-
|
|
746
|
-
|
|
1088
|
+
|
|
1089
|
+
Napi::Promise GetPromise() {
|
|
1090
|
+
return deferred.Promise();
|
|
1091
|
+
}
|
|
747
1092
|
|
|
748
1093
|
protected:
|
|
1094
|
+
Napi::Promise::Deferred deferred;
|
|
1095
|
+
|
|
749
1096
|
void Execute() {
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
if (r
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
1097
|
+
try {
|
|
1098
|
+
// Perform the evaluation using llama_decode.
|
|
1099
|
+
int r = llama_decode(ctx->ctx, ctx->batch);
|
|
1100
|
+
|
|
1101
|
+
if (r != 0) {
|
|
1102
|
+
if (r == 1) {
|
|
1103
|
+
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
|
1104
|
+
} else {
|
|
1105
|
+
SetError("Eval has failed");
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
return;
|
|
758
1109
|
}
|
|
759
1110
|
|
|
760
|
-
|
|
1111
|
+
llama_synchronize(ctx->ctx);
|
|
1112
|
+
} catch (const std::exception& e) {
|
|
1113
|
+
SetError(e.what());
|
|
1114
|
+
} catch(...) {
|
|
1115
|
+
SetError("Unknown error when calling \"llama_decode\"");
|
|
761
1116
|
}
|
|
762
1117
|
}
|
|
763
1118
|
void OnOK() {
|
|
764
|
-
|
|
765
|
-
Napi::Promise::Deferred::Resolve(env.Undefined());
|
|
1119
|
+
deferred.Resolve(Env().Undefined());
|
|
766
1120
|
}
|
|
767
1121
|
void OnError(const Napi::Error& err) {
|
|
768
|
-
|
|
1122
|
+
deferred.Reject(err.Value());
|
|
769
1123
|
}
|
|
770
1124
|
};
|
|
771
1125
|
|
|
772
1126
|
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
|
|
773
|
-
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
|
|
1127
|
+
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
|
|
1128
|
+
worker->Queue();
|
|
1129
|
+
return worker->GetPromise();
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
|
1133
|
+
public:
|
|
1134
|
+
AddonContext* context;
|
|
1135
|
+
|
|
1136
|
+
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1137
|
+
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
|
|
1138
|
+
context(context),
|
|
1139
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1140
|
+
context->Ref();
|
|
1141
|
+
}
|
|
1142
|
+
~AddonContextLoadContextWorker() {
|
|
1143
|
+
context->Unref();
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
Napi::Promise GetPromise() {
|
|
1147
|
+
return deferred.Promise();
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
protected:
|
|
1151
|
+
Napi::Promise::Deferred deferred;
|
|
1152
|
+
|
|
1153
|
+
void Execute() {
|
|
1154
|
+
try {
|
|
1155
|
+
context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
|
|
1156
|
+
|
|
1157
|
+
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
|
|
1158
|
+
} catch (const std::exception& e) {
|
|
1159
|
+
SetError(e.what());
|
|
1160
|
+
} catch(...) {
|
|
1161
|
+
SetError("Unknown error when calling \"llama_new_context_with_model\"");
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
void OnOK() {
|
|
1165
|
+
if (context->contextLoaded) {
|
|
1166
|
+
uint64_t contextMemorySize = llama_get_state_size(context->ctx);
|
|
1167
|
+
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
|
|
1168
|
+
context->loadedContextMemorySize = contextMemorySize;
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
|
|
1172
|
+
}
|
|
1173
|
+
void OnError(const Napi::Error& err) {
|
|
1174
|
+
deferred.Reject(err.Value());
|
|
1175
|
+
}
|
|
1176
|
+
};
|
|
1177
|
+
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
|
|
1178
|
+
public:
|
|
1179
|
+
AddonContext* context;
|
|
1180
|
+
|
|
1181
|
+
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1182
|
+
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
|
|
1183
|
+
context(context),
|
|
1184
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1185
|
+
context->Ref();
|
|
1186
|
+
}
|
|
1187
|
+
~AddonContextUnloadContextWorker() {
|
|
1188
|
+
context->Unref();
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
Napi::Promise GetPromise() {
|
|
1192
|
+
return deferred.Promise();
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
protected:
|
|
1196
|
+
Napi::Promise::Deferred deferred;
|
|
1197
|
+
|
|
1198
|
+
void Execute() {
|
|
1199
|
+
try {
|
|
1200
|
+
llama_free(context->ctx);
|
|
1201
|
+
context->contextLoaded = false;
|
|
1202
|
+
|
|
1203
|
+
try {
|
|
1204
|
+
if (context->has_batch) {
|
|
1205
|
+
llama_batch_free(context->batch);
|
|
1206
|
+
context->has_batch = false;
|
|
1207
|
+
context->batch_n_tokens = 0;
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
context->dispose();
|
|
1211
|
+
} catch (const std::exception& e) {
|
|
1212
|
+
SetError(e.what());
|
|
1213
|
+
} catch(...) {
|
|
1214
|
+
SetError("Unknown error when calling \"llama_batch_free\"");
|
|
1215
|
+
}
|
|
1216
|
+
} catch (const std::exception& e) {
|
|
1217
|
+
SetError(e.what());
|
|
1218
|
+
} catch(...) {
|
|
1219
|
+
SetError("Unknown error when calling \"llama_free\"");
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
void OnOK() {
|
|
1223
|
+
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
|
|
1224
|
+
context->loadedContextMemorySize = 0;
|
|
1225
|
+
|
|
1226
|
+
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
|
|
1227
|
+
context->batchMemorySize = 0;
|
|
1228
|
+
|
|
1229
|
+
deferred.Resolve(Env().Undefined());
|
|
1230
|
+
}
|
|
1231
|
+
void OnError(const Napi::Error& err) {
|
|
1232
|
+
deferred.Reject(err.Value());
|
|
1233
|
+
}
|
|
1234
|
+
};
|
|
1235
|
+
|
|
1236
|
+
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
|
|
1237
|
+
if (disposed) {
|
|
1238
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1239
|
+
return info.Env().Undefined();
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
|
|
774
1243
|
worker->Queue();
|
|
775
|
-
return worker->
|
|
1244
|
+
return worker->GetPromise();
|
|
1245
|
+
}
|
|
1246
|
+
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
|
|
1247
|
+
if (disposed) {
|
|
1248
|
+
return info.Env().Undefined();
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
if (contextLoaded) {
|
|
1252
|
+
contextLoaded = false;
|
|
1253
|
+
|
|
1254
|
+
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
|
|
1255
|
+
worker->Queue();
|
|
1256
|
+
return worker->GetPromise();
|
|
1257
|
+
} else {
|
|
1258
|
+
dispose();
|
|
1259
|
+
|
|
1260
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1261
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1262
|
+
return deferred.Promise();
|
|
1263
|
+
}
|
|
776
1264
|
}
|
|
777
1265
|
|
|
778
|
-
class AddonContextSampleTokenWorker : Napi::AsyncWorker
|
|
1266
|
+
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
779
1267
|
public:
|
|
780
1268
|
AddonContext* ctx;
|
|
781
1269
|
AddonGrammarEvaluationState* grammar_evaluation_state;
|
|
@@ -795,7 +1283,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
795
1283
|
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
|
796
1284
|
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
|
|
797
1285
|
ctx(ctx),
|
|
798
|
-
Napi::Promise::Deferred(info.Env()) {
|
|
1286
|
+
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
|
799
1287
|
ctx->Ref();
|
|
800
1288
|
|
|
801
1289
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
@@ -858,11 +1346,25 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
858
1346
|
use_grammar = false;
|
|
859
1347
|
}
|
|
860
1348
|
}
|
|
861
|
-
|
|
862
|
-
|
|
1349
|
+
|
|
1350
|
+
Napi::Promise GetPromise() {
|
|
1351
|
+
return deferred.Promise();
|
|
1352
|
+
}
|
|
863
1353
|
|
|
864
1354
|
protected:
|
|
1355
|
+
Napi::Promise::Deferred deferred;
|
|
1356
|
+
|
|
865
1357
|
void Execute() {
|
|
1358
|
+
try {
|
|
1359
|
+
SampleToken();
|
|
1360
|
+
} catch (const std::exception& e) {
|
|
1361
|
+
SetError(e.what());
|
|
1362
|
+
} catch(...) {
|
|
1363
|
+
SetError("Unknown error when calling \"SampleToken\"");
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
void SampleToken() {
|
|
866
1368
|
llama_token new_token_id = 0;
|
|
867
1369
|
|
|
868
1370
|
// Select the best prediction.
|
|
@@ -924,19 +1426,18 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
924
1426
|
result = new_token_id;
|
|
925
1427
|
}
|
|
926
1428
|
void OnOK() {
|
|
927
|
-
Napi::
|
|
928
|
-
|
|
929
|
-
Napi::Promise::Deferred::Resolve(resultValue);
|
|
1429
|
+
Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
1430
|
+
deferred.Resolve(resultValue);
|
|
930
1431
|
}
|
|
931
1432
|
void OnError(const Napi::Error& err) {
|
|
932
|
-
|
|
1433
|
+
deferred.Reject(err.Value());
|
|
933
1434
|
}
|
|
934
1435
|
};
|
|
935
1436
|
|
|
936
1437
|
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
|
|
937
1438
|
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
|
|
938
1439
|
worker->Queue();
|
|
939
|
-
return worker->
|
|
1440
|
+
return worker->GetPromise();
|
|
940
1441
|
}
|
|
941
1442
|
|
|
942
1443
|
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
@@ -1009,6 +1510,9 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
|
|
|
1009
1510
|
|
|
1010
1511
|
if (status == napi_ok) {
|
|
1011
1512
|
return;
|
|
1513
|
+
} else {
|
|
1514
|
+
delete stringStream;
|
|
1515
|
+
delete data;
|
|
1012
1516
|
}
|
|
1013
1517
|
}
|
|
1014
1518
|
|
|
@@ -1066,22 +1570,135 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
|
1066
1570
|
return info.Env().Undefined();
|
|
1067
1571
|
}
|
|
1068
1572
|
|
|
1573
|
+
class AddonBackendLoadWorker : public Napi::AsyncWorker {
|
|
1574
|
+
public:
|
|
1575
|
+
AddonBackendLoadWorker(const Napi::Env& env)
|
|
1576
|
+
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
|
|
1577
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1578
|
+
}
|
|
1579
|
+
~AddonBackendLoadWorker() {
|
|
1580
|
+
}
|
|
1581
|
+
|
|
1582
|
+
Napi::Promise GetPromise() {
|
|
1583
|
+
return deferred.Promise();
|
|
1584
|
+
}
|
|
1585
|
+
|
|
1586
|
+
protected:
|
|
1587
|
+
Napi::Promise::Deferred deferred;
|
|
1588
|
+
|
|
1589
|
+
void Execute() {
|
|
1590
|
+
try {
|
|
1591
|
+
llama_backend_init();
|
|
1592
|
+
|
|
1593
|
+
try {
|
|
1594
|
+
if (backendDisposed) {
|
|
1595
|
+
llama_backend_free();
|
|
1596
|
+
} else {
|
|
1597
|
+
backendInitialized = true;
|
|
1598
|
+
}
|
|
1599
|
+
} catch (const std::exception& e) {
|
|
1600
|
+
SetError(e.what());
|
|
1601
|
+
} catch(...) {
|
|
1602
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1603
|
+
}
|
|
1604
|
+
} catch (const std::exception& e) {
|
|
1605
|
+
SetError(e.what());
|
|
1606
|
+
} catch(...) {
|
|
1607
|
+
SetError("Unknown error when calling \"llama_backend_init\"");
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
void OnOK() {
|
|
1611
|
+
deferred.Resolve(Env().Undefined());
|
|
1612
|
+
}
|
|
1613
|
+
void OnError(const Napi::Error& err) {
|
|
1614
|
+
deferred.Reject(err.Value());
|
|
1615
|
+
}
|
|
1616
|
+
};
|
|
1617
|
+
|
|
1618
|
+
|
|
1619
|
+
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
|
|
1620
|
+
public:
|
|
1621
|
+
AddonBackendUnloadWorker(const Napi::Env& env)
|
|
1622
|
+
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
|
|
1623
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1624
|
+
}
|
|
1625
|
+
~AddonBackendUnloadWorker() {
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
Napi::Promise GetPromise() {
|
|
1629
|
+
return deferred.Promise();
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
protected:
|
|
1633
|
+
Napi::Promise::Deferred deferred;
|
|
1634
|
+
|
|
1635
|
+
void Execute() {
|
|
1636
|
+
try {
|
|
1637
|
+
if (backendInitialized) {
|
|
1638
|
+
backendInitialized = false;
|
|
1639
|
+
llama_backend_free();
|
|
1640
|
+
}
|
|
1641
|
+
} catch (const std::exception& e) {
|
|
1642
|
+
SetError(e.what());
|
|
1643
|
+
} catch(...) {
|
|
1644
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
void OnOK() {
|
|
1648
|
+
deferred.Resolve(Env().Undefined());
|
|
1649
|
+
}
|
|
1650
|
+
void OnError(const Napi::Error& err) {
|
|
1651
|
+
deferred.Reject(err.Value());
|
|
1652
|
+
}
|
|
1653
|
+
};
|
|
1654
|
+
|
|
1655
|
+
Napi::Value addonInit(const Napi::CallbackInfo& info) {
|
|
1656
|
+
if (backendInitialized) {
|
|
1657
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1658
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1659
|
+
return deferred.Promise();
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
|
|
1663
|
+
worker->Queue();
|
|
1664
|
+
return worker->GetPromise();
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
|
|
1668
|
+
if (backendDisposed) {
|
|
1669
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1670
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1671
|
+
return deferred.Promise();
|
|
1672
|
+
}
|
|
1673
|
+
|
|
1674
|
+
backendDisposed = true;
|
|
1675
|
+
|
|
1676
|
+
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
|
|
1677
|
+
worker->Queue();
|
|
1678
|
+
return worker->GetPromise();
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1069
1681
|
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
|
1682
|
+
if (backendDisposed) {
|
|
1683
|
+
return;
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
backendDisposed = true;
|
|
1070
1687
|
if (backendInitialized) {
|
|
1071
|
-
llama_backend_free();
|
|
1072
1688
|
backendInitialized = false;
|
|
1689
|
+
llama_backend_free();
|
|
1073
1690
|
}
|
|
1074
1691
|
}
|
|
1075
1692
|
|
|
1076
1693
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
1077
|
-
llama_backend_init();
|
|
1078
|
-
backendInitialized = true;
|
|
1079
|
-
|
|
1080
1694
|
exports.DefineProperties({
|
|
1081
1695
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
1082
1696
|
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
1083
1697
|
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
|
1084
1698
|
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
1699
|
+
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
|
1700
|
+
Napi::PropertyDescriptor::Function("init", addonInit),
|
|
1701
|
+
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
|
1085
1702
|
});
|
|
1086
1703
|
AddonModel::init(exports);
|
|
1087
1704
|
AddonGrammar::init(exports);
|