node-llama-cpp 3.0.0-beta.13 → 3.0.0-beta.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/ChatWrapper.js +4 -0
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +35 -6
- package/dist/bindings/Llama.d.ts +12 -0
- package/dist/bindings/Llama.js +100 -7
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +19 -1
- package/dist/bindings/getLlama.js +16 -6
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +18 -0
- package/dist/bindings/types.js +31 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +4 -3
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
- package/dist/bindings/utils/compileLLamaCpp.js +133 -97
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +3 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +155 -13
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +16 -6
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.js +2 -2
- package/dist/bindings/utils/testBindingBinary.js.map +1 -1
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/FalconChatWrapper.js +28 -11
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
- package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +16 -18
- package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +1 -1
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +7 -4
- package/dist/cli/commands/ChatCommand.js +177 -70
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +5 -5
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +3 -2
- package/dist/cli/commands/CompleteCommand.js +115 -51
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +3 -2
- package/dist/cli/commands/InfillCommand.js +115 -51
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +2 -0
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
- package/dist/cli/commands/inspect/InspectCommand.js +17 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +10 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +108 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +98 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +14 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +577 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +70 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +10 -0
- package/dist/cli/utils/printInfoLine.js +45 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +6 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/config.d.ts +3 -1
- package/dist/config.js +7 -1
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.js +13 -5
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.js +5 -3
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +43 -9
- package/dist/evaluator/LlamaContext/LlamaContext.js +251 -60
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +68 -10
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -7
- package/dist/evaluator/LlamaEmbeddingContext.js +31 -22
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.js +1 -0
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +78 -20
- package/dist/evaluator/LlamaModel.js +385 -21
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/GgufInsights.d.ts +40 -0
- package/dist/gguf/GgufInsights.js +350 -0
- package/dist/gguf/GgufInsights.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +58 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +30 -0
- package/dist/gguf/readGgufFileInfo.js +37 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/index.d.ts +13 -7
- package/dist/index.js +11 -6
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +25 -10
- package/dist/utils/LlamaText.js +205 -23
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.js +3 -1
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/cmake.js +1 -1
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/getConsoleLogPrefix.js +3 -2
- package/dist/utils/getConsoleLogPrefix.js.map +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +68 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/mergeUnionTypes.d.ts +4 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/prettyPrintObject.d.ts +10 -1
- package/dist/utils/prettyPrintObject.js +57 -13
- package/dist/utils/prettyPrintObject.js.map +1 -1
- package/dist/utils/removeNullFields.d.ts +2 -2
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +55 -7
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +1 -1
- package/dist/utils/tokenizeInput.js +3 -3
- package/dist/utils/tokenizeInput.js.map +1 -1
- package/dist/utils/withOra.d.ts +1 -0
- package/dist/utils/withOra.js +2 -2
- package/dist/utils/withOra.js.map +1 -1
- package/llama/CMakeLists.txt +5 -5
- package/llama/addon.cpp +793 -88
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
- package/llama/gpuInfo/cuda-gpu-info.h +3 -0
- package/llama/gpuInfo/metal-gpu-info.h +4 -1
- package/llama/gpuInfo/metal-gpu-info.mm +14 -1
- package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
- package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/llamaBins/linux-arm64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-arm64/llama-addon.exp +0 -0
- package/llamaBins/win-arm64/llama-addon.lib +0 -0
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +16 -11
- package/dist/TemplateChatWrapper.js.map +0 -1
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/cli/commands/InspectCommand.js +0 -113
- package/dist/cli/commands/InspectCommand.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/gguf/GGUFInsights.d.ts +0 -28
- package/dist/gguf/GGUFInsights.js +0 -58
- package/dist/gguf/GGUFInsights.js.map +0 -1
- package/dist/gguf/GGUFMetadata.d.ts +0 -19
- package/dist/gguf/GGUFMetadata.js +0 -38
- package/dist/gguf/GGUFMetadata.js.map +0 -1
- package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
- package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
- package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
- package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
- package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
- package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
- package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
- package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
- package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -12
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
- package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
- package/dist/gguf/ggufParser/GGUFParser.js +0 -123
- package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
- package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
- package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
- package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
- package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
- package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7731
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
package/llama/addon.cpp
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#include "llama.h"
|
|
10
10
|
#include "napi.h"
|
|
11
11
|
|
|
12
|
-
#ifdef
|
|
12
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
13
13
|
# include "gpuInfo/cuda-gpu-info.h"
|
|
14
14
|
#endif
|
|
15
15
|
#ifdef GPU_INFO_USE_VULKAN
|
|
@@ -35,10 +35,77 @@ void addonCallJsLogCallback(
|
|
|
35
35
|
using AddonThreadSafeLogCallbackFunction =
|
|
36
36
|
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
|
37
37
|
|
|
38
|
+
|
|
39
|
+
struct addon_progress_event {
|
|
40
|
+
public:
|
|
41
|
+
const float progress;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
45
|
+
void addonCallJsProgressCallback(
|
|
46
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
47
|
+
);
|
|
48
|
+
using AddonThreadSafeProgressEventCallbackFunction =
|
|
49
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
39
53
|
bool addonJsLoggerCallbackSet = false;
|
|
40
54
|
int addonLoggerLogLevel = 5;
|
|
41
55
|
bool backendInitialized = false;
|
|
56
|
+
bool backendDisposed = false;
|
|
57
|
+
|
|
58
|
+
void addonCallJsProgressCallback(
|
|
59
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
60
|
+
) {
|
|
61
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
62
|
+
try {
|
|
63
|
+
callback.Call({Napi::Number::New(env, data->progress)});
|
|
64
|
+
} catch (const Napi::Error& e) {}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (data != nullptr) {
|
|
68
|
+
delete data;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
|
|
73
|
+
uint64_t totalSize = 0;
|
|
74
|
+
|
|
75
|
+
if (embd) {
|
|
76
|
+
totalSize += sizeof(float) * n_tokens_alloc * embd;
|
|
77
|
+
} else {
|
|
78
|
+
totalSize += sizeof(llama_token) * n_tokens_alloc;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
totalSize += sizeof(llama_pos) * n_tokens_alloc;
|
|
82
|
+
totalSize += sizeof(int32_t) * n_tokens_alloc;
|
|
83
|
+
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
|
|
84
|
+
|
|
85
|
+
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
|
|
86
|
+
|
|
87
|
+
totalSize += sizeof(int8_t) * n_tokens_alloc;
|
|
88
|
+
|
|
89
|
+
return totalSize;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
|
|
93
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
94
|
+
while (size > 0) {
|
|
95
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
96
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
|
|
97
|
+
size -= adjustSize;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
|
|
102
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
103
|
+
while (size > 0) {
|
|
104
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
105
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
|
|
106
|
+
size -= adjustSize;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
42
109
|
|
|
43
110
|
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
|
|
44
111
|
std::vector<char> result(8, 0);
|
|
@@ -54,7 +121,7 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
|
|
|
54
121
|
return std::string(result.data(), result.size());
|
|
55
122
|
}
|
|
56
123
|
|
|
57
|
-
#ifdef
|
|
124
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
58
125
|
void logCudaError(const char* message) {
|
|
59
126
|
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
60
127
|
}
|
|
@@ -69,7 +136,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
69
136
|
uint64_t total = 0;
|
|
70
137
|
uint64_t used = 0;
|
|
71
138
|
|
|
72
|
-
#ifdef
|
|
139
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
73
140
|
size_t cudaDeviceTotal = 0;
|
|
74
141
|
size_t cudaDeviceUsed = 0;
|
|
75
142
|
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
|
|
@@ -94,7 +161,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
94
161
|
#ifdef GPU_INFO_USE_METAL
|
|
95
162
|
uint64_t metalDeviceTotal = 0;
|
|
96
163
|
uint64_t metalDeviceUsed = 0;
|
|
97
|
-
|
|
164
|
+
getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
|
|
98
165
|
|
|
99
166
|
total += metalDeviceTotal;
|
|
100
167
|
used += metalDeviceUsed;
|
|
@@ -107,8 +174,34 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
107
174
|
return result;
|
|
108
175
|
}
|
|
109
176
|
|
|
177
|
+
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
|
|
178
|
+
std::vector<std::string> deviceNames;
|
|
179
|
+
|
|
180
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
181
|
+
gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
|
|
182
|
+
#endif
|
|
183
|
+
|
|
184
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
185
|
+
gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
|
|
186
|
+
#endif
|
|
187
|
+
|
|
188
|
+
#ifdef GPU_INFO_USE_METAL
|
|
189
|
+
getMetalGpuDeviceNames(&deviceNames);
|
|
190
|
+
#endif
|
|
191
|
+
|
|
192
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
193
|
+
|
|
194
|
+
Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
|
|
195
|
+
for (size_t i = 0; i < deviceNames.size(); ++i) {
|
|
196
|
+
deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
|
|
197
|
+
}
|
|
198
|
+
result.Set("deviceNames", deviceNamesNapiArray);
|
|
199
|
+
|
|
200
|
+
return result;
|
|
201
|
+
}
|
|
202
|
+
|
|
110
203
|
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
|
111
|
-
#ifdef
|
|
204
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
112
205
|
return Napi::String::New(info.Env(), "cuda");
|
|
113
206
|
#endif
|
|
114
207
|
|
|
@@ -143,21 +236,42 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod
|
|
|
143
236
|
return Napi::Number::From(info.Env(), token);
|
|
144
237
|
}
|
|
145
238
|
|
|
239
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data);
|
|
240
|
+
|
|
146
241
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
147
242
|
public:
|
|
148
243
|
llama_model_params model_params;
|
|
149
244
|
llama_model* model;
|
|
245
|
+
uint64_t loadedModelSize = 0;
|
|
246
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
247
|
+
bool hasAddonExportsRef = false;
|
|
248
|
+
|
|
249
|
+
std::string modelPath;
|
|
250
|
+
bool modelLoaded = false;
|
|
251
|
+
bool abortModelLoad = false;
|
|
252
|
+
bool model_load_stopped = false;
|
|
253
|
+
float rawModelLoadPercentage = 0;
|
|
254
|
+
unsigned modelLoadPercentage = 0;
|
|
255
|
+
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
|
|
256
|
+
bool onLoadProgressEventCallbackSet = false;
|
|
257
|
+
bool hasLoadAbortSignal = false;
|
|
258
|
+
|
|
150
259
|
bool disposed = false;
|
|
151
260
|
|
|
152
261
|
AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
|
|
153
262
|
model_params = llama_model_default_params();
|
|
154
263
|
|
|
155
264
|
// Get the model path
|
|
156
|
-
|
|
265
|
+
modelPath = info[0].As<Napi::String>().Utf8Value();
|
|
157
266
|
|
|
158
267
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
159
268
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
160
269
|
|
|
270
|
+
if (options.Has("addonExports")) {
|
|
271
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
272
|
+
hasAddonExportsRef = true;
|
|
273
|
+
}
|
|
274
|
+
|
|
161
275
|
if (options.Has("gpuLayers")) {
|
|
162
276
|
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
|
163
277
|
}
|
|
@@ -173,13 +287,37 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
173
287
|
if (options.Has("useMlock")) {
|
|
174
288
|
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
|
|
175
289
|
}
|
|
176
|
-
}
|
|
177
290
|
|
|
178
|
-
|
|
291
|
+
if (options.Has("onLoadProgress")) {
|
|
292
|
+
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
|
|
293
|
+
if (onLoadProgressJSCallback.IsFunction()) {
|
|
294
|
+
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
295
|
+
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
|
|
296
|
+
info.Env(),
|
|
297
|
+
onLoadProgressJSCallback,
|
|
298
|
+
"onLoadProgressCallback",
|
|
299
|
+
0,
|
|
300
|
+
1,
|
|
301
|
+
context,
|
|
302
|
+
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
|
|
303
|
+
addonModel->onLoadProgressEventCallbackSet = false;
|
|
304
|
+
|
|
305
|
+
delete ctx;
|
|
306
|
+
},
|
|
307
|
+
this
|
|
308
|
+
);
|
|
309
|
+
onLoadProgressEventCallbackSet = true;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if (options.Has("hasLoadAbortSignal")) {
|
|
314
|
+
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
|
|
315
|
+
}
|
|
179
316
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
317
|
+
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
|
|
318
|
+
model_params.progress_callback_user_data = &(*this);
|
|
319
|
+
model_params.progress_callback = llamaModelParamsProgressCallback;
|
|
320
|
+
}
|
|
183
321
|
}
|
|
184
322
|
}
|
|
185
323
|
|
|
@@ -192,23 +330,31 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
192
330
|
return;
|
|
193
331
|
}
|
|
194
332
|
|
|
195
|
-
llama_free_model(model);
|
|
196
333
|
disposed = true;
|
|
197
|
-
|
|
334
|
+
if (modelLoaded) {
|
|
335
|
+
modelLoaded = false;
|
|
336
|
+
llama_free_model(model);
|
|
198
337
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
return info.Env().Undefined();
|
|
338
|
+
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
|
339
|
+
loadedModelSize = 0;
|
|
202
340
|
}
|
|
203
341
|
|
|
204
|
-
|
|
342
|
+
if (hasAddonExportsRef) {
|
|
343
|
+
addonExportsRef.Unref();
|
|
344
|
+
hasAddonExportsRef = false;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
205
347
|
|
|
348
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
349
|
+
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
|
|
350
|
+
abortModelLoad = true;
|
|
206
351
|
return info.Env().Undefined();
|
|
207
352
|
}
|
|
353
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
208
354
|
|
|
209
355
|
Napi::Value Tokenize(const Napi::CallbackInfo& info) {
|
|
210
356
|
if (disposed) {
|
|
211
|
-
Napi::Error::New(info.Env(), "
|
|
357
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
212
358
|
return info.Env().Undefined();
|
|
213
359
|
}
|
|
214
360
|
|
|
@@ -226,7 +372,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
226
372
|
}
|
|
227
373
|
Napi::Value Detokenize(const Napi::CallbackInfo& info) {
|
|
228
374
|
if (disposed) {
|
|
229
|
-
Napi::Error::New(info.Env(), "
|
|
375
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
230
376
|
return info.Env().Undefined();
|
|
231
377
|
}
|
|
232
378
|
|
|
@@ -251,7 +397,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
251
397
|
|
|
252
398
|
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
|
|
253
399
|
if (disposed) {
|
|
254
|
-
Napi::Error::New(info.Env(), "
|
|
400
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
255
401
|
return info.Env().Undefined();
|
|
256
402
|
}
|
|
257
403
|
|
|
@@ -260,7 +406,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
260
406
|
|
|
261
407
|
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
262
408
|
if (disposed) {
|
|
263
|
-
Napi::Error::New(info.Env(), "
|
|
409
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
264
410
|
return info.Env().Undefined();
|
|
265
411
|
}
|
|
266
412
|
|
|
@@ -269,7 +415,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
269
415
|
|
|
270
416
|
Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
|
|
271
417
|
if (disposed) {
|
|
272
|
-
Napi::Error::New(info.Env(), "
|
|
418
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
273
419
|
return info.Env().Undefined();
|
|
274
420
|
}
|
|
275
421
|
|
|
@@ -278,7 +424,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
278
424
|
|
|
279
425
|
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
|
|
280
426
|
if (disposed) {
|
|
281
|
-
Napi::Error::New(info.Env(), "
|
|
427
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
282
428
|
return info.Env().Undefined();
|
|
283
429
|
}
|
|
284
430
|
|
|
@@ -287,7 +433,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
287
433
|
|
|
288
434
|
Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
|
|
289
435
|
if (disposed) {
|
|
290
|
-
Napi::Error::New(info.Env(), "
|
|
436
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
291
437
|
return info.Env().Undefined();
|
|
292
438
|
}
|
|
293
439
|
|
|
@@ -299,7 +445,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
299
445
|
|
|
300
446
|
Napi::Value TokenBos(const Napi::CallbackInfo& info) {
|
|
301
447
|
if (disposed) {
|
|
302
|
-
Napi::Error::New(info.Env(), "
|
|
448
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
303
449
|
return info.Env().Undefined();
|
|
304
450
|
}
|
|
305
451
|
|
|
@@ -307,7 +453,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
307
453
|
}
|
|
308
454
|
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
309
455
|
if (disposed) {
|
|
310
|
-
Napi::Error::New(info.Env(), "
|
|
456
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
311
457
|
return info.Env().Undefined();
|
|
312
458
|
}
|
|
313
459
|
|
|
@@ -315,7 +461,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
315
461
|
}
|
|
316
462
|
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
317
463
|
if (disposed) {
|
|
318
|
-
Napi::Error::New(info.Env(), "
|
|
464
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
319
465
|
return info.Env().Undefined();
|
|
320
466
|
}
|
|
321
467
|
|
|
@@ -323,7 +469,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
323
469
|
}
|
|
324
470
|
Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
|
|
325
471
|
if (disposed) {
|
|
326
|
-
Napi::Error::New(info.Env(), "
|
|
472
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
327
473
|
return info.Env().Undefined();
|
|
328
474
|
}
|
|
329
475
|
|
|
@@ -331,7 +477,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
331
477
|
}
|
|
332
478
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
|
|
333
479
|
if (disposed) {
|
|
334
|
-
Napi::Error::New(info.Env(), "
|
|
480
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
335
481
|
return info.Env().Undefined();
|
|
336
482
|
}
|
|
337
483
|
|
|
@@ -339,7 +485,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
339
485
|
}
|
|
340
486
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
|
|
341
487
|
if (disposed) {
|
|
342
|
-
Napi::Error::New(info.Env(), "
|
|
488
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
343
489
|
return info.Env().Undefined();
|
|
344
490
|
}
|
|
345
491
|
|
|
@@ -347,7 +493,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
347
493
|
}
|
|
348
494
|
Napi::Value EotToken(const Napi::CallbackInfo& info) {
|
|
349
495
|
if (disposed) {
|
|
350
|
-
Napi::Error::New(info.Env(), "
|
|
496
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
351
497
|
return info.Env().Undefined();
|
|
352
498
|
}
|
|
353
499
|
|
|
@@ -355,7 +501,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
355
501
|
}
|
|
356
502
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
357
503
|
if (disposed) {
|
|
358
|
-
Napi::Error::New(info.Env(), "
|
|
504
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
359
505
|
return info.Env().Undefined();
|
|
360
506
|
}
|
|
361
507
|
|
|
@@ -374,7 +520,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
374
520
|
|
|
375
521
|
Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
|
|
376
522
|
if (disposed) {
|
|
377
|
-
Napi::Error::New(info.Env(), "
|
|
523
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
378
524
|
return info.Env().Undefined();
|
|
379
525
|
}
|
|
380
526
|
|
|
@@ -387,6 +533,16 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
387
533
|
|
|
388
534
|
return Napi::Number::From(info.Env(), int32_t(tokenType));
|
|
389
535
|
}
|
|
536
|
+
Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
|
|
537
|
+
if (disposed) {
|
|
538
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
539
|
+
return info.Env().Undefined();
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
auto vocabularyType = llama_vocab_type(model);
|
|
543
|
+
|
|
544
|
+
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
|
|
545
|
+
}
|
|
390
546
|
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
|
391
547
|
const int addBos = llama_add_bos_token(model);
|
|
392
548
|
|
|
@@ -395,6 +551,10 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
395
551
|
return Napi::Boolean::New(info.Env(), shouldPrependBos);
|
|
396
552
|
}
|
|
397
553
|
|
|
554
|
+
Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
|
|
555
|
+
return Napi::Number::From(info.Env(), llama_model_size(model));
|
|
556
|
+
}
|
|
557
|
+
|
|
398
558
|
static void init(Napi::Object exports) {
|
|
399
559
|
exports.Set(
|
|
400
560
|
"AddonModel",
|
|
@@ -402,6 +562,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
402
562
|
exports.Env(),
|
|
403
563
|
"AddonModel",
|
|
404
564
|
{
|
|
565
|
+
InstanceMethod("init", &AddonModel::Init),
|
|
566
|
+
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
|
|
405
567
|
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
|
406
568
|
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
|
407
569
|
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
|
@@ -418,7 +580,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
418
580
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
419
581
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
420
582
|
InstanceMethod("getTokenType", &AddonModel::GetTokenType),
|
|
583
|
+
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
|
|
421
584
|
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
|
|
585
|
+
InstanceMethod("getModelSize", &AddonModel::GetModelSize),
|
|
422
586
|
InstanceMethod("dispose", &AddonModel::Dispose),
|
|
423
587
|
}
|
|
424
588
|
)
|
|
@@ -426,9 +590,166 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
426
590
|
}
|
|
427
591
|
};
|
|
428
592
|
|
|
593
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
|
|
594
|
+
AddonModel* addonModel = (AddonModel *) user_data;
|
|
595
|
+
unsigned percentage = (unsigned) (100 * progress);
|
|
596
|
+
|
|
597
|
+
if (percentage > addonModel->modelLoadPercentage) {
|
|
598
|
+
addonModel->modelLoadPercentage = percentage;
|
|
599
|
+
|
|
600
|
+
// original llama.cpp logs
|
|
601
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
|
|
602
|
+
if (percentage >= 100) {
|
|
603
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
if (progress > addonModel->rawModelLoadPercentage) {
|
|
608
|
+
addonModel->rawModelLoadPercentage = progress;
|
|
609
|
+
|
|
610
|
+
if (addonModel->onLoadProgressEventCallbackSet) {
|
|
611
|
+
addon_progress_event* data = new addon_progress_event {
|
|
612
|
+
progress
|
|
613
|
+
};
|
|
614
|
+
|
|
615
|
+
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
|
|
616
|
+
|
|
617
|
+
if (status != napi_ok) {
|
|
618
|
+
delete data;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
return !(addonModel->abortModelLoad);
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
|
627
|
+
public:
|
|
628
|
+
AddonModel* model;
|
|
629
|
+
|
|
630
|
+
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
631
|
+
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
|
|
632
|
+
model(model),
|
|
633
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
634
|
+
model->Ref();
|
|
635
|
+
}
|
|
636
|
+
~AddonModelLoadModelWorker() {
|
|
637
|
+
model->Unref();
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
Napi::Promise GetPromise() {
|
|
641
|
+
return deferred.Promise();
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
protected:
|
|
645
|
+
Napi::Promise::Deferred deferred;
|
|
646
|
+
|
|
647
|
+
void Execute() {
|
|
648
|
+
try {
|
|
649
|
+
model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
|
|
650
|
+
|
|
651
|
+
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
|
652
|
+
} catch (const std::exception& e) {
|
|
653
|
+
SetError(e.what());
|
|
654
|
+
} catch(...) {
|
|
655
|
+
SetError("Unknown error when calling \"llama_load_model_from_file\"");
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
void OnOK() {
|
|
659
|
+
if (model->modelLoaded) {
|
|
660
|
+
uint64_t modelSize = llama_model_size(model->model);
|
|
661
|
+
adjustNapiExternalMemoryAdd(Env(), modelSize);
|
|
662
|
+
model->loadedModelSize = modelSize;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
|
|
666
|
+
if (model->onLoadProgressEventCallbackSet) {
|
|
667
|
+
model->addonThreadSafeOnLoadProgressEventCallback.Release();
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
void OnError(const Napi::Error& err) {
|
|
671
|
+
deferred.Reject(err.Value());
|
|
672
|
+
}
|
|
673
|
+
};
|
|
674
|
+
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
|
675
|
+
public:
|
|
676
|
+
AddonModel* model;
|
|
677
|
+
|
|
678
|
+
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
679
|
+
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
|
|
680
|
+
model(model),
|
|
681
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
682
|
+
model->Ref();
|
|
683
|
+
}
|
|
684
|
+
~AddonModelUnloadModelWorker() {
|
|
685
|
+
model->Unref();
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
Napi::Promise GetPromise() {
|
|
689
|
+
return deferred.Promise();
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
protected:
|
|
693
|
+
Napi::Promise::Deferred deferred;
|
|
694
|
+
|
|
695
|
+
void Execute() {
|
|
696
|
+
try {
|
|
697
|
+
llama_free_model(model->model);
|
|
698
|
+
model->modelLoaded = false;
|
|
699
|
+
|
|
700
|
+
model->dispose();
|
|
701
|
+
} catch (const std::exception& e) {
|
|
702
|
+
SetError(e.what());
|
|
703
|
+
} catch(...) {
|
|
704
|
+
SetError("Unknown error when calling \"llama_free_model\"");
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
void OnOK() {
|
|
708
|
+
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
|
|
709
|
+
model->loadedModelSize = 0;
|
|
710
|
+
|
|
711
|
+
deferred.Resolve(Env().Undefined());
|
|
712
|
+
}
|
|
713
|
+
void OnError(const Napi::Error& err) {
|
|
714
|
+
deferred.Reject(err.Value());
|
|
715
|
+
}
|
|
716
|
+
};
|
|
717
|
+
|
|
718
|
+
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
|
|
719
|
+
if (disposed) {
|
|
720
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
721
|
+
return info.Env().Undefined();
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
|
|
725
|
+
worker->Queue();
|
|
726
|
+
return worker->GetPromise();
|
|
727
|
+
}
|
|
728
|
+
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
|
|
729
|
+
if (disposed) {
|
|
730
|
+
return info.Env().Undefined();
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
if (modelLoaded) {
|
|
734
|
+
modelLoaded = false;
|
|
735
|
+
|
|
736
|
+
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
|
|
737
|
+
worker->Queue();
|
|
738
|
+
return worker->GetPromise();
|
|
739
|
+
} else {
|
|
740
|
+
dispose();
|
|
741
|
+
|
|
742
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
743
|
+
deferred.Resolve(info.Env().Undefined());
|
|
744
|
+
return deferred.Promise();
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
429
748
|
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
430
749
|
public:
|
|
431
750
|
grammar_parser::parse_state parsed_grammar;
|
|
751
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
752
|
+
bool hasAddonExportsRef = false;
|
|
432
753
|
|
|
433
754
|
AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
|
|
434
755
|
// Get the model path
|
|
@@ -438,6 +759,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
438
759
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
439
760
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
440
761
|
|
|
762
|
+
if (options.Has("addonExports")) {
|
|
763
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
764
|
+
hasAddonExportsRef = true;
|
|
765
|
+
}
|
|
766
|
+
|
|
441
767
|
if (options.Has("printGrammar")) {
|
|
442
768
|
should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
|
|
443
769
|
}
|
|
@@ -455,6 +781,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
455
781
|
}
|
|
456
782
|
}
|
|
457
783
|
|
|
784
|
+
~AddonGrammar() {
|
|
785
|
+
if (hasAddonExportsRef) {
|
|
786
|
+
addonExportsRef.Unref();
|
|
787
|
+
hasAddonExportsRef = false;
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
458
791
|
static void init(Napi::Object exports) {
|
|
459
792
|
exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
|
|
460
793
|
}
|
|
@@ -493,9 +826,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
493
826
|
llama_context_params context_params;
|
|
494
827
|
llama_context* ctx;
|
|
495
828
|
llama_batch batch;
|
|
829
|
+
uint64_t batchMemorySize = 0;
|
|
496
830
|
bool has_batch = false;
|
|
497
831
|
int32_t batch_n_tokens = 0;
|
|
498
832
|
int n_cur = 0;
|
|
833
|
+
|
|
834
|
+
uint64_t loadedContextMemorySize = 0;
|
|
835
|
+
bool contextLoaded = false;
|
|
836
|
+
|
|
499
837
|
bool disposed = false;
|
|
500
838
|
|
|
501
839
|
AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
|
|
@@ -523,10 +861,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
523
861
|
|
|
524
862
|
if (options.Has("batchSize")) {
|
|
525
863
|
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
|
864
|
+
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
|
|
526
865
|
}
|
|
527
866
|
|
|
528
|
-
if (options.Has("
|
|
529
|
-
context_params.
|
|
867
|
+
if (options.Has("sequences")) {
|
|
868
|
+
context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
if (options.Has("embeddings")) {
|
|
872
|
+
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
530
873
|
}
|
|
531
874
|
|
|
532
875
|
if (options.Has("threads")) {
|
|
@@ -537,9 +880,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
537
880
|
context_params.n_threads_batch = resolved_n_threads;
|
|
538
881
|
}
|
|
539
882
|
}
|
|
540
|
-
|
|
541
|
-
ctx = llama_new_context_with_model(model->model, context_params);
|
|
542
|
-
Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
|
|
543
883
|
}
|
|
544
884
|
~AddonContext() {
|
|
545
885
|
dispose();
|
|
@@ -550,13 +890,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
550
890
|
return;
|
|
551
891
|
}
|
|
552
892
|
|
|
553
|
-
|
|
554
|
-
|
|
893
|
+
disposed = true;
|
|
894
|
+
if (contextLoaded) {
|
|
895
|
+
contextLoaded = false;
|
|
896
|
+
llama_free(ctx);
|
|
897
|
+
|
|
898
|
+
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
|
|
899
|
+
loadedContextMemorySize = 0;
|
|
900
|
+
}
|
|
901
|
+
|
|
555
902
|
model->Unref();
|
|
556
903
|
|
|
557
904
|
disposeBatch();
|
|
558
|
-
|
|
559
|
-
disposed = true;
|
|
560
905
|
}
|
|
561
906
|
void disposeBatch() {
|
|
562
907
|
if (!has_batch) {
|
|
@@ -566,16 +911,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
566
911
|
llama_batch_free(batch);
|
|
567
912
|
has_batch = false;
|
|
568
913
|
batch_n_tokens = 0;
|
|
914
|
+
|
|
915
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
|
|
916
|
+
batchMemorySize = 0;
|
|
569
917
|
}
|
|
570
|
-
Napi::Value Dispose(const Napi::CallbackInfo& info) {
|
|
571
|
-
if (disposed) {
|
|
572
|
-
return info.Env().Undefined();
|
|
573
|
-
}
|
|
574
918
|
|
|
575
|
-
|
|
919
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
920
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
576
921
|
|
|
577
|
-
return info.Env().Undefined();
|
|
578
|
-
}
|
|
579
922
|
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
|
|
580
923
|
if (disposed) {
|
|
581
924
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -600,6 +943,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
600
943
|
has_batch = true;
|
|
601
944
|
batch_n_tokens = n_tokens;
|
|
602
945
|
|
|
946
|
+
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
|
|
947
|
+
if (newBatchMemorySize > batchMemorySize) {
|
|
948
|
+
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
|
|
949
|
+
batchMemorySize = newBatchMemorySize;
|
|
950
|
+
} else if (newBatchMemorySize < batchMemorySize) {
|
|
951
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
|
|
952
|
+
batchMemorySize = newBatchMemorySize;
|
|
953
|
+
}
|
|
954
|
+
|
|
603
955
|
return info.Env().Undefined();
|
|
604
956
|
}
|
|
605
957
|
Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
|
|
@@ -648,7 +1000,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
648
1000
|
|
|
649
1001
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
650
1002
|
|
|
651
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
1003
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
1004
|
+
|
|
1005
|
+
if (!result) {
|
|
1006
|
+
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
|
|
1007
|
+
return info.Env().Undefined();
|
|
1008
|
+
}
|
|
652
1009
|
|
|
653
1010
|
return info.Env().Undefined();
|
|
654
1011
|
}
|
|
@@ -662,9 +1019,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
662
1019
|
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
|
663
1020
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
664
1021
|
|
|
665
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
1022
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
666
1023
|
|
|
667
|
-
return info.Env()
|
|
1024
|
+
return Napi::Boolean::New(info.Env(), result);
|
|
668
1025
|
}
|
|
669
1026
|
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
|
|
670
1027
|
if (disposed) {
|
|
@@ -702,8 +1059,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
702
1059
|
return info.Env().Undefined();
|
|
703
1060
|
}
|
|
704
1061
|
|
|
1062
|
+
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
|
|
1063
|
+
|
|
1064
|
+
if (inputTokensLength <= 0) {
|
|
1065
|
+
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
|
|
1066
|
+
return info.Env().Undefined();
|
|
1067
|
+
}
|
|
1068
|
+
|
|
705
1069
|
const int n_embd = llama_n_embd(model->model);
|
|
706
|
-
const auto* embeddings =
|
|
1070
|
+
const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
|
|
1071
|
+
if (embeddings == NULL) {
|
|
1072
|
+
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
|
|
1073
|
+
|
|
1074
|
+
if (embeddings == NULL) {
|
|
1075
|
+
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
|
|
1076
|
+
return info.Env().Undefined();
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
707
1079
|
|
|
708
1080
|
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
|
|
709
1081
|
for (size_t i = 0; i < n_embd; ++i) {
|
|
@@ -713,6 +1085,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
713
1085
|
return result;
|
|
714
1086
|
}
|
|
715
1087
|
|
|
1088
|
+
Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
|
|
1089
|
+
if (disposed) {
|
|
1090
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1091
|
+
return info.Env().Undefined();
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
return Napi::Number::From(info.Env(), llama_get_state_size(ctx));
|
|
1095
|
+
}
|
|
1096
|
+
|
|
716
1097
|
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
|
|
717
1098
|
llama_print_timings(ctx);
|
|
718
1099
|
llama_reset_timings(ctx);
|
|
@@ -726,6 +1107,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
726
1107
|
exports.Env(),
|
|
727
1108
|
"AddonContext",
|
|
728
1109
|
{
|
|
1110
|
+
InstanceMethod("init", &AddonContext::Init),
|
|
729
1111
|
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
|
|
730
1112
|
InstanceMethod("initBatch", &AddonContext::InitBatch),
|
|
731
1113
|
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
|
|
@@ -736,6 +1118,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
736
1118
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
737
1119
|
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
|
|
738
1120
|
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
1121
|
+
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
|
|
739
1122
|
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
740
1123
|
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
741
1124
|
}
|
|
@@ -745,53 +1128,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
745
1128
|
};
|
|
746
1129
|
|
|
747
1130
|
|
|
748
|
-
class AddonContextDecodeBatchWorker : Napi::AsyncWorker
|
|
1131
|
+
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
|
|
749
1132
|
public:
|
|
750
1133
|
AddonContext* ctx;
|
|
751
1134
|
|
|
752
|
-
AddonContextDecodeBatchWorker(const Napi::
|
|
753
|
-
: Napi::AsyncWorker(
|
|
1135
|
+
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
|
|
1136
|
+
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
|
|
754
1137
|
ctx(ctx),
|
|
755
|
-
Napi::Promise::Deferred(
|
|
1138
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
756
1139
|
ctx->Ref();
|
|
757
1140
|
}
|
|
758
1141
|
~AddonContextDecodeBatchWorker() {
|
|
759
1142
|
ctx->Unref();
|
|
760
1143
|
}
|
|
761
|
-
|
|
762
|
-
|
|
1144
|
+
|
|
1145
|
+
Napi::Promise GetPromise() {
|
|
1146
|
+
return deferred.Promise();
|
|
1147
|
+
}
|
|
763
1148
|
|
|
764
1149
|
protected:
|
|
1150
|
+
Napi::Promise::Deferred deferred;
|
|
1151
|
+
|
|
765
1152
|
void Execute() {
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
if (r
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
1153
|
+
try {
|
|
1154
|
+
// Perform the evaluation using llama_decode.
|
|
1155
|
+
int r = llama_decode(ctx->ctx, ctx->batch);
|
|
1156
|
+
|
|
1157
|
+
if (r != 0) {
|
|
1158
|
+
if (r == 1) {
|
|
1159
|
+
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
|
1160
|
+
} else {
|
|
1161
|
+
SetError("Eval has failed");
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
return;
|
|
774
1165
|
}
|
|
775
1166
|
|
|
776
|
-
|
|
1167
|
+
llama_synchronize(ctx->ctx);
|
|
1168
|
+
} catch (const std::exception& e) {
|
|
1169
|
+
SetError(e.what());
|
|
1170
|
+
} catch(...) {
|
|
1171
|
+
SetError("Unknown error when calling \"llama_decode\"");
|
|
777
1172
|
}
|
|
778
1173
|
}
|
|
779
1174
|
void OnOK() {
|
|
780
|
-
|
|
781
|
-
Napi::Promise::Deferred::Resolve(env.Undefined());
|
|
1175
|
+
deferred.Resolve(Env().Undefined());
|
|
782
1176
|
}
|
|
783
1177
|
void OnError(const Napi::Error& err) {
|
|
784
|
-
|
|
1178
|
+
deferred.Reject(err.Value());
|
|
785
1179
|
}
|
|
786
1180
|
};
|
|
787
1181
|
|
|
788
1182
|
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
|
|
789
|
-
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
|
|
1183
|
+
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
|
|
790
1184
|
worker->Queue();
|
|
791
|
-
return worker->
|
|
1185
|
+
return worker->GetPromise();
|
|
792
1186
|
}
|
|
793
1187
|
|
|
794
|
-
class
|
|
1188
|
+
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
|
1189
|
+
public:
|
|
1190
|
+
AddonContext* context;
|
|
1191
|
+
|
|
1192
|
+
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1193
|
+
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
|
|
1194
|
+
context(context),
|
|
1195
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1196
|
+
context->Ref();
|
|
1197
|
+
}
|
|
1198
|
+
~AddonContextLoadContextWorker() {
|
|
1199
|
+
context->Unref();
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
Napi::Promise GetPromise() {
|
|
1203
|
+
return deferred.Promise();
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
protected:
|
|
1207
|
+
Napi::Promise::Deferred deferred;
|
|
1208
|
+
|
|
1209
|
+
void Execute() {
|
|
1210
|
+
try {
|
|
1211
|
+
context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
|
|
1212
|
+
|
|
1213
|
+
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
|
|
1214
|
+
} catch (const std::exception& e) {
|
|
1215
|
+
SetError(e.what());
|
|
1216
|
+
} catch(...) {
|
|
1217
|
+
SetError("Unknown error when calling \"llama_new_context_with_model\"");
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
void OnOK() {
|
|
1221
|
+
if (context->contextLoaded) {
|
|
1222
|
+
uint64_t contextMemorySize = llama_get_state_size(context->ctx);
|
|
1223
|
+
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
|
|
1224
|
+
context->loadedContextMemorySize = contextMemorySize;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
|
|
1228
|
+
}
|
|
1229
|
+
void OnError(const Napi::Error& err) {
|
|
1230
|
+
deferred.Reject(err.Value());
|
|
1231
|
+
}
|
|
1232
|
+
};
|
|
1233
|
+
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
|
|
1234
|
+
public:
|
|
1235
|
+
AddonContext* context;
|
|
1236
|
+
|
|
1237
|
+
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1238
|
+
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
|
|
1239
|
+
context(context),
|
|
1240
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1241
|
+
context->Ref();
|
|
1242
|
+
}
|
|
1243
|
+
~AddonContextUnloadContextWorker() {
|
|
1244
|
+
context->Unref();
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
Napi::Promise GetPromise() {
|
|
1248
|
+
return deferred.Promise();
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
protected:
|
|
1252
|
+
Napi::Promise::Deferred deferred;
|
|
1253
|
+
|
|
1254
|
+
void Execute() {
|
|
1255
|
+
try {
|
|
1256
|
+
llama_free(context->ctx);
|
|
1257
|
+
context->contextLoaded = false;
|
|
1258
|
+
|
|
1259
|
+
try {
|
|
1260
|
+
if (context->has_batch) {
|
|
1261
|
+
llama_batch_free(context->batch);
|
|
1262
|
+
context->has_batch = false;
|
|
1263
|
+
context->batch_n_tokens = 0;
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
context->dispose();
|
|
1267
|
+
} catch (const std::exception& e) {
|
|
1268
|
+
SetError(e.what());
|
|
1269
|
+
} catch(...) {
|
|
1270
|
+
SetError("Unknown error when calling \"llama_batch_free\"");
|
|
1271
|
+
}
|
|
1272
|
+
} catch (const std::exception& e) {
|
|
1273
|
+
SetError(e.what());
|
|
1274
|
+
} catch(...) {
|
|
1275
|
+
SetError("Unknown error when calling \"llama_free\"");
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
void OnOK() {
|
|
1279
|
+
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
|
|
1280
|
+
context->loadedContextMemorySize = 0;
|
|
1281
|
+
|
|
1282
|
+
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
|
|
1283
|
+
context->batchMemorySize = 0;
|
|
1284
|
+
|
|
1285
|
+
deferred.Resolve(Env().Undefined());
|
|
1286
|
+
}
|
|
1287
|
+
void OnError(const Napi::Error& err) {
|
|
1288
|
+
deferred.Reject(err.Value());
|
|
1289
|
+
}
|
|
1290
|
+
};
|
|
1291
|
+
|
|
1292
|
+
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
|
|
1293
|
+
if (disposed) {
|
|
1294
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1295
|
+
return info.Env().Undefined();
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
|
|
1299
|
+
worker->Queue();
|
|
1300
|
+
return worker->GetPromise();
|
|
1301
|
+
}
|
|
1302
|
+
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
|
|
1303
|
+
if (disposed) {
|
|
1304
|
+
return info.Env().Undefined();
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
if (contextLoaded) {
|
|
1308
|
+
contextLoaded = false;
|
|
1309
|
+
|
|
1310
|
+
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
|
|
1311
|
+
worker->Queue();
|
|
1312
|
+
return worker->GetPromise();
|
|
1313
|
+
} else {
|
|
1314
|
+
dispose();
|
|
1315
|
+
|
|
1316
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1317
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1318
|
+
return deferred.Promise();
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
795
1323
|
public:
|
|
796
1324
|
AddonContext* ctx;
|
|
797
1325
|
AddonGrammarEvaluationState* grammar_evaluation_state;
|
|
@@ -811,7 +1339,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
811
1339
|
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
|
812
1340
|
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
|
|
813
1341
|
ctx(ctx),
|
|
814
|
-
Napi::Promise::Deferred(info.Env()) {
|
|
1342
|
+
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
|
815
1343
|
ctx->Ref();
|
|
816
1344
|
|
|
817
1345
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
@@ -874,11 +1402,25 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
874
1402
|
use_grammar = false;
|
|
875
1403
|
}
|
|
876
1404
|
}
|
|
877
|
-
|
|
878
|
-
|
|
1405
|
+
|
|
1406
|
+
Napi::Promise GetPromise() {
|
|
1407
|
+
return deferred.Promise();
|
|
1408
|
+
}
|
|
879
1409
|
|
|
880
1410
|
protected:
|
|
1411
|
+
Napi::Promise::Deferred deferred;
|
|
1412
|
+
|
|
881
1413
|
void Execute() {
|
|
1414
|
+
try {
|
|
1415
|
+
SampleToken();
|
|
1416
|
+
} catch (const std::exception& e) {
|
|
1417
|
+
SetError(e.what());
|
|
1418
|
+
} catch(...) {
|
|
1419
|
+
SetError("Unknown error when calling \"SampleToken\"");
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
void SampleToken() {
|
|
882
1424
|
llama_token new_token_id = 0;
|
|
883
1425
|
|
|
884
1426
|
// Select the best prediction.
|
|
@@ -940,25 +1482,73 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
940
1482
|
result = new_token_id;
|
|
941
1483
|
}
|
|
942
1484
|
void OnOK() {
|
|
943
|
-
Napi::
|
|
944
|
-
|
|
945
|
-
Napi::Promise::Deferred::Resolve(resultValue);
|
|
1485
|
+
Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
1486
|
+
deferred.Resolve(resultValue);
|
|
946
1487
|
}
|
|
947
1488
|
void OnError(const Napi::Error& err) {
|
|
948
|
-
|
|
1489
|
+
deferred.Reject(err.Value());
|
|
949
1490
|
}
|
|
950
1491
|
};
|
|
951
1492
|
|
|
952
1493
|
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
|
|
953
1494
|
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
|
|
954
1495
|
worker->Queue();
|
|
955
|
-
return worker->
|
|
1496
|
+
return worker->GetPromise();
|
|
956
1497
|
}
|
|
957
1498
|
|
|
958
1499
|
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
959
1500
|
return Napi::String::From(info.Env(), llama_print_system_info());
|
|
960
1501
|
}
|
|
961
1502
|
|
|
1503
|
+
Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
|
|
1504
|
+
return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
|
|
1508
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
|
|
1512
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1515
|
+
Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1516
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1517
|
+
|
|
1518
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1519
|
+
return info.Env().Undefined();
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
|
|
1523
|
+
|
|
1524
|
+
return Napi::Number::New(info.Env(), blockSize);
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1528
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1529
|
+
|
|
1530
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1531
|
+
return info.Env().Undefined();
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
|
|
1535
|
+
|
|
1536
|
+
return Napi::Number::New(info.Env(), typeSize);
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
|
|
1540
|
+
Napi::Object consts = Napi::Object::New(info.Env());
|
|
1541
|
+
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
|
|
1542
|
+
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
|
|
1543
|
+
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
|
|
1544
|
+
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
|
|
1545
|
+
consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
|
|
1546
|
+
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
|
|
1547
|
+
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
|
|
1548
|
+
|
|
1549
|
+
return consts;
|
|
1550
|
+
}
|
|
1551
|
+
|
|
962
1552
|
int addonGetGgmlLogLevelNumber(ggml_log_level level) {
|
|
963
1553
|
switch (level) {
|
|
964
1554
|
case GGML_LOG_LEVEL_ERROR: return 2;
|
|
@@ -1025,6 +1615,9 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
|
|
|
1025
1615
|
|
|
1026
1616
|
if (status == napi_ok) {
|
|
1027
1617
|
return;
|
|
1618
|
+
} else {
|
|
1619
|
+
delete stringStream;
|
|
1620
|
+
delete data;
|
|
1028
1621
|
}
|
|
1029
1622
|
}
|
|
1030
1623
|
|
|
@@ -1082,38 +1675,150 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
|
1082
1675
|
return info.Env().Undefined();
|
|
1083
1676
|
}
|
|
1084
1677
|
|
|
1678
|
+
class AddonBackendLoadWorker : public Napi::AsyncWorker {
|
|
1679
|
+
public:
|
|
1680
|
+
AddonBackendLoadWorker(const Napi::Env& env)
|
|
1681
|
+
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
|
|
1682
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1683
|
+
}
|
|
1684
|
+
~AddonBackendLoadWorker() {
|
|
1685
|
+
}
|
|
1686
|
+
|
|
1687
|
+
Napi::Promise GetPromise() {
|
|
1688
|
+
return deferred.Promise();
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
protected:
|
|
1692
|
+
Napi::Promise::Deferred deferred;
|
|
1693
|
+
|
|
1694
|
+
void Execute() {
|
|
1695
|
+
try {
|
|
1696
|
+
llama_backend_init();
|
|
1697
|
+
|
|
1698
|
+
try {
|
|
1699
|
+
if (backendDisposed) {
|
|
1700
|
+
llama_backend_free();
|
|
1701
|
+
} else {
|
|
1702
|
+
backendInitialized = true;
|
|
1703
|
+
}
|
|
1704
|
+
} catch (const std::exception& e) {
|
|
1705
|
+
SetError(e.what());
|
|
1706
|
+
} catch(...) {
|
|
1707
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1708
|
+
}
|
|
1709
|
+
} catch (const std::exception& e) {
|
|
1710
|
+
SetError(e.what());
|
|
1711
|
+
} catch(...) {
|
|
1712
|
+
SetError("Unknown error when calling \"llama_backend_init\"");
|
|
1713
|
+
}
|
|
1714
|
+
}
|
|
1715
|
+
void OnOK() {
|
|
1716
|
+
deferred.Resolve(Env().Undefined());
|
|
1717
|
+
}
|
|
1718
|
+
void OnError(const Napi::Error& err) {
|
|
1719
|
+
deferred.Reject(err.Value());
|
|
1720
|
+
}
|
|
1721
|
+
};
|
|
1722
|
+
|
|
1723
|
+
|
|
1724
|
+
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
|
|
1725
|
+
public:
|
|
1726
|
+
AddonBackendUnloadWorker(const Napi::Env& env)
|
|
1727
|
+
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
|
|
1728
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1729
|
+
}
|
|
1730
|
+
~AddonBackendUnloadWorker() {
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1733
|
+
Napi::Promise GetPromise() {
|
|
1734
|
+
return deferred.Promise();
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
protected:
|
|
1738
|
+
Napi::Promise::Deferred deferred;
|
|
1739
|
+
|
|
1740
|
+
void Execute() {
|
|
1741
|
+
try {
|
|
1742
|
+
if (backendInitialized) {
|
|
1743
|
+
backendInitialized = false;
|
|
1744
|
+
llama_backend_free();
|
|
1745
|
+
}
|
|
1746
|
+
} catch (const std::exception& e) {
|
|
1747
|
+
SetError(e.what());
|
|
1748
|
+
} catch(...) {
|
|
1749
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1750
|
+
}
|
|
1751
|
+
}
|
|
1752
|
+
void OnOK() {
|
|
1753
|
+
deferred.Resolve(Env().Undefined());
|
|
1754
|
+
}
|
|
1755
|
+
void OnError(const Napi::Error& err) {
|
|
1756
|
+
deferred.Reject(err.Value());
|
|
1757
|
+
}
|
|
1758
|
+
};
|
|
1759
|
+
|
|
1085
1760
|
Napi::Value addonInit(const Napi::CallbackInfo& info) {
|
|
1086
|
-
if (
|
|
1087
|
-
|
|
1088
|
-
|
|
1761
|
+
if (backendInitialized) {
|
|
1762
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1763
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1764
|
+
return deferred.Promise();
|
|
1089
1765
|
}
|
|
1090
1766
|
|
|
1091
|
-
|
|
1767
|
+
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
|
|
1768
|
+
worker->Queue();
|
|
1769
|
+
return worker->GetPromise();
|
|
1770
|
+
}
|
|
1092
1771
|
|
|
1093
|
-
|
|
1772
|
+
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
|
|
1773
|
+
if (backendDisposed) {
|
|
1774
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1775
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1776
|
+
return deferred.Promise();
|
|
1777
|
+
}
|
|
1778
|
+
|
|
1779
|
+
backendDisposed = true;
|
|
1780
|
+
|
|
1781
|
+
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
|
|
1782
|
+
worker->Queue();
|
|
1783
|
+
return worker->GetPromise();
|
|
1094
1784
|
}
|
|
1095
1785
|
|
|
1096
1786
|
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
|
1787
|
+
if (backendDisposed) {
|
|
1788
|
+
return;
|
|
1789
|
+
}
|
|
1790
|
+
|
|
1791
|
+
backendDisposed = true;
|
|
1097
1792
|
if (backendInitialized) {
|
|
1098
|
-
llama_backend_free();
|
|
1099
1793
|
backendInitialized = false;
|
|
1794
|
+
llama_backend_free();
|
|
1100
1795
|
}
|
|
1101
1796
|
}
|
|
1102
1797
|
|
|
1103
1798
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
1104
1799
|
exports.DefineProperties({
|
|
1105
1800
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
1801
|
+
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
|
|
1802
|
+
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
|
|
1803
|
+
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
|
|
1804
|
+
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
|
|
1805
|
+
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
|
|
1806
|
+
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
|
|
1106
1807
|
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
1107
1808
|
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
|
1108
1809
|
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
1810
|
+
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
|
|
1109
1811
|
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
|
1110
1812
|
Napi::PropertyDescriptor::Function("init", addonInit),
|
|
1813
|
+
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
|
1111
1814
|
});
|
|
1112
1815
|
AddonModel::init(exports);
|
|
1113
1816
|
AddonGrammar::init(exports);
|
|
1114
1817
|
AddonGrammarEvaluationState::init(exports);
|
|
1115
1818
|
AddonContext::init(exports);
|
|
1116
1819
|
|
|
1820
|
+
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
|
1821
|
+
|
|
1117
1822
|
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
|
|
1118
1823
|
|
|
1119
1824
|
return exports;
|