node-llama-cpp 3.0.0-beta.2 → 3.0.0-beta.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -11
- package/dist/ChatWrapper.d.ts +2 -15
- package/dist/ChatWrapper.js +28 -33
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsOverrides.d.ts +1 -0
- package/dist/apiDocsOverrides.js +5 -0
- package/dist/apiDocsOverrides.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -7
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +47 -0
- package/dist/bindings/Llama.js +343 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +11 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +145 -0
- package/dist/bindings/getLlama.js +389 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +55 -0
- package/dist/bindings/types.js +77 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +15 -0
- package/dist/bindings/utils/compileLLamaCpp.js +221 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +304 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +45 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +98 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +5 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +13 -11
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/FalconChatWrapper.js +28 -11
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +86 -73
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/{LlamaChatWrapper.d.ts → GemmaChatWrapper.d.ts} +6 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +88 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +20 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +29 -11
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +31 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +129 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +359 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +64 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +214 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +21 -7
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +6 -4
- package/dist/cli/commands/BuildCommand.js +103 -41
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +18 -6
- package/dist/cli/commands/ChatCommand.js +298 -142
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +11 -12
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +29 -0
- package/dist/cli/commands/CompleteCommand.js +365 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +6 -4
- package/dist/cli/commands/DownloadCommand.js +120 -69
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +31 -0
- package/dist/cli/commands/InfillCommand.js +401 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +9 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +117 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +19 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +138 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +342 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
- package/dist/cli/utils/interactivelyAskForModel.js +451 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +71 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +1 -0
- package/dist/commands.js +3 -0
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +38 -5
- package/dist/config.js +61 -16
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +3 -0
- package/dist/consts.js +10 -0
- package/dist/consts.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +37 -35
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.js +298 -221
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.js +5 -3
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +18 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.d.ts +40 -3
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.js +28 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +155 -0
- package/dist/evaluator/LlamaCompletion.js +405 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -20
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +271 -81
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +140 -0
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +8 -5
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +13 -10
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/evaluator/LlamaModel.d.ts +230 -0
- package/dist/evaluator/LlamaModel.js +597 -0
- package/dist/evaluator/LlamaModel.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +22 -0
- package/dist/evaluator/TokenBias.js +33 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +42 -0
- package/dist/gguf/insights/GgufInsights.js +361 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +63 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +33 -0
- package/dist/gguf/readGgufFileInfo.js +66 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +334 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +37 -17
- package/dist/index.js +33 -14
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +4 -0
- package/dist/state.js +14 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +53 -2
- package/dist/types.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +50 -25
- package/dist/utils/LlamaText.js +367 -155
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +1 -1
- package/dist/utils/StopGenerationDetector.js +23 -18
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +8 -4
- package/dist/utils/TokenStreamRegulator.js +78 -8
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/cmake.js +38 -20
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +102 -0
- package/dist/utils/createModelDownloader.js +226 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +10 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +73 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -0
- package/dist/utils/prettyPrintObject.js +84 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -1
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +56 -6
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +12 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +22 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +23 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +12 -9
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +115 -4
- package/llama/addon.cpp +1318 -99
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +11 -1
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llama/llama.cpp.info.json +4 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-arm64/llama-addon.exp +0 -0
- package/llamaBins/win-arm64/llama-addon.lib +0 -0
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +61 -34
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
- package/templates/packed/node_modules.json +1 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -55
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -86
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
- package/dist/llamaEvaluator/LlamaModel.js +0 -322
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -132
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/usedBin.json +0 -3
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp
CHANGED
|
@@ -3,42 +3,284 @@
|
|
|
3
3
|
#include <algorithm>
|
|
4
4
|
#include <sstream>
|
|
5
5
|
#include <vector>
|
|
6
|
+
#include <unordered_map>
|
|
6
7
|
|
|
7
8
|
#include "common.h"
|
|
8
9
|
#include "common/grammar-parser.h"
|
|
9
10
|
#include "llama.h"
|
|
10
11
|
#include "napi.h"
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
14
|
+
# include "gpuInfo/cuda-gpu-info.h"
|
|
15
|
+
#endif
|
|
16
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
17
|
+
# include "gpuInfo/vulkan-gpu-info.h"
|
|
18
|
+
#endif
|
|
19
|
+
#ifdef GPU_INFO_USE_METAL
|
|
20
|
+
# include "gpuInfo/metal-gpu-info.h"
|
|
21
|
+
#endif
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
struct addon_logger_log {
|
|
25
|
+
public:
|
|
26
|
+
const int logLevelNumber;
|
|
27
|
+
const std::stringstream* stringStream;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
|
|
31
|
+
|
|
32
|
+
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
33
|
+
void addonCallJsLogCallback(
|
|
34
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
35
|
+
);
|
|
36
|
+
using AddonThreadSafeLogCallbackFunction =
|
|
37
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
struct addon_progress_event {
|
|
41
|
+
public:
|
|
42
|
+
const float progress;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
46
|
+
void addonCallJsProgressCallback(
|
|
47
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
48
|
+
);
|
|
49
|
+
using AddonThreadSafeProgressEventCallbackFunction =
|
|
50
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
54
|
+
bool addonJsLoggerCallbackSet = false;
|
|
55
|
+
int addonLoggerLogLevel = 5;
|
|
56
|
+
bool backendInitialized = false;
|
|
57
|
+
bool backendDisposed = false;
|
|
58
|
+
|
|
59
|
+
void addonCallJsProgressCallback(
|
|
60
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
61
|
+
) {
|
|
62
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
63
|
+
try {
|
|
64
|
+
callback.Call({Napi::Number::New(env, data->progress)});
|
|
65
|
+
} catch (const Napi::Error& e) {}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (data != nullptr) {
|
|
69
|
+
delete data;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
|
|
74
|
+
uint64_t totalSize = 0;
|
|
75
|
+
|
|
76
|
+
if (embd) {
|
|
77
|
+
totalSize += sizeof(float) * n_tokens_alloc * embd;
|
|
78
|
+
} else {
|
|
79
|
+
totalSize += sizeof(llama_token) * n_tokens_alloc;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
totalSize += sizeof(llama_pos) * n_tokens_alloc;
|
|
83
|
+
totalSize += sizeof(int32_t) * n_tokens_alloc;
|
|
84
|
+
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
|
|
85
|
+
|
|
86
|
+
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
|
|
87
|
+
|
|
88
|
+
totalSize += sizeof(int8_t) * n_tokens_alloc;
|
|
89
|
+
|
|
90
|
+
return totalSize;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
|
|
94
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
95
|
+
while (size > 0) {
|
|
96
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
97
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
|
|
98
|
+
size -= adjustSize;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
|
|
103
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
104
|
+
while (size > 0) {
|
|
105
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
106
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
|
|
107
|
+
size -= adjustSize;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token, bool specialTokens) {
|
|
13
112
|
std::vector<char> result(8, 0);
|
|
14
|
-
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
|
|
113
|
+
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
|
|
15
114
|
if (n_tokens < 0) {
|
|
16
115
|
result.resize(-n_tokens);
|
|
17
|
-
int check = llama_token_to_piece(model, token, result.data(), result.size());
|
|
116
|
+
int check = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
|
|
18
117
|
GGML_ASSERT(check == -n_tokens);
|
|
19
|
-
}
|
|
20
|
-
else {
|
|
118
|
+
} else {
|
|
21
119
|
result.resize(n_tokens);
|
|
22
120
|
}
|
|
23
121
|
|
|
24
122
|
return std::string(result.data(), result.size());
|
|
25
123
|
}
|
|
26
124
|
|
|
125
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
126
|
+
void logCudaError(const char* message) {
|
|
127
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
128
|
+
}
|
|
129
|
+
#endif
|
|
130
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
131
|
+
void logVulkanWarning(const char* message) {
|
|
132
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
|
|
133
|
+
}
|
|
134
|
+
#endif
|
|
135
|
+
|
|
136
|
+
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
137
|
+
uint64_t total = 0;
|
|
138
|
+
uint64_t used = 0;
|
|
139
|
+
|
|
140
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
141
|
+
size_t cudaDeviceTotal = 0;
|
|
142
|
+
size_t cudaDeviceUsed = 0;
|
|
143
|
+
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
|
|
144
|
+
|
|
145
|
+
if (cudeGetInfoSuccess) {
|
|
146
|
+
total += cudaDeviceTotal;
|
|
147
|
+
used += cudaDeviceUsed;
|
|
148
|
+
}
|
|
149
|
+
#endif
|
|
150
|
+
|
|
151
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
152
|
+
uint64_t vulkanDeviceTotal = 0;
|
|
153
|
+
uint64_t vulkanDeviceUsed = 0;
|
|
154
|
+
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
|
|
155
|
+
|
|
156
|
+
if (vulkanDeviceSupportsMemoryBudgetExtension) {
|
|
157
|
+
total += vulkanDeviceTotal;
|
|
158
|
+
used += vulkanDeviceUsed;
|
|
159
|
+
}
|
|
160
|
+
#endif
|
|
161
|
+
|
|
162
|
+
#ifdef GPU_INFO_USE_METAL
|
|
163
|
+
uint64_t metalDeviceTotal = 0;
|
|
164
|
+
uint64_t metalDeviceUsed = 0;
|
|
165
|
+
getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
|
|
166
|
+
|
|
167
|
+
total += metalDeviceTotal;
|
|
168
|
+
used += metalDeviceUsed;
|
|
169
|
+
#endif
|
|
170
|
+
|
|
171
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
172
|
+
result.Set("total", Napi::Number::From(info.Env(), total));
|
|
173
|
+
result.Set("used", Napi::Number::From(info.Env(), used));
|
|
174
|
+
|
|
175
|
+
return result;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
|
|
179
|
+
std::vector<std::string> deviceNames;
|
|
180
|
+
|
|
181
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
182
|
+
gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
|
|
183
|
+
#endif
|
|
184
|
+
|
|
185
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
186
|
+
gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
|
|
187
|
+
#endif
|
|
188
|
+
|
|
189
|
+
#ifdef GPU_INFO_USE_METAL
|
|
190
|
+
getMetalGpuDeviceNames(&deviceNames);
|
|
191
|
+
#endif
|
|
192
|
+
|
|
193
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
194
|
+
|
|
195
|
+
Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
|
|
196
|
+
for (size_t i = 0; i < deviceNames.size(); ++i) {
|
|
197
|
+
deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
|
|
198
|
+
}
|
|
199
|
+
result.Set("deviceNames", deviceNamesNapiArray);
|
|
200
|
+
|
|
201
|
+
return result;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
|
205
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
206
|
+
return Napi::String::New(info.Env(), "cuda");
|
|
207
|
+
#endif
|
|
208
|
+
|
|
209
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
210
|
+
return Napi::String::New(info.Env(), "vulkan");
|
|
211
|
+
#endif
|
|
212
|
+
|
|
213
|
+
#ifdef GPU_INFO_USE_METAL
|
|
214
|
+
return Napi::String::New(info.Env(), "metal");
|
|
215
|
+
#endif
|
|
216
|
+
|
|
217
|
+
return info.Env().Undefined();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
221
|
+
if (token < 0) {
|
|
222
|
+
return Napi::Number::From(info.Env(), -1);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
226
|
+
|
|
227
|
+
if (tokenType == LLAMA_TOKEN_TYPE_UNDEFINED || tokenType == LLAMA_TOKEN_TYPE_UNKNOWN) {
|
|
228
|
+
return Napi::Number::From(info.Env(), -1);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return Napi::Number::From(info.Env(), token);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
235
|
+
if (token < 0) {
|
|
236
|
+
return Napi::Number::From(info.Env(), -1);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
240
|
+
|
|
241
|
+
if (tokenType != LLAMA_TOKEN_TYPE_CONTROL && tokenType != LLAMA_TOKEN_TYPE_USER_DEFINED) {
|
|
242
|
+
return Napi::Number::From(info.Env(), -1);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return Napi::Number::From(info.Env(), token);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data);
|
|
249
|
+
|
|
27
250
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
28
251
|
public:
|
|
29
252
|
llama_model_params model_params;
|
|
30
253
|
llama_model* model;
|
|
254
|
+
uint64_t loadedModelSize = 0;
|
|
255
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
256
|
+
bool hasAddonExportsRef = false;
|
|
257
|
+
|
|
258
|
+
std::string modelPath;
|
|
259
|
+
bool modelLoaded = false;
|
|
260
|
+
bool abortModelLoad = false;
|
|
261
|
+
bool model_load_stopped = false;
|
|
262
|
+
float rawModelLoadPercentage = 0;
|
|
263
|
+
unsigned modelLoadPercentage = 0;
|
|
264
|
+
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
|
|
265
|
+
bool onLoadProgressEventCallbackSet = false;
|
|
266
|
+
bool hasLoadAbortSignal = false;
|
|
267
|
+
|
|
31
268
|
bool disposed = false;
|
|
32
269
|
|
|
33
270
|
AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
|
|
34
271
|
model_params = llama_model_default_params();
|
|
35
272
|
|
|
36
273
|
// Get the model path
|
|
37
|
-
|
|
274
|
+
modelPath = info[0].As<Napi::String>().Utf8Value();
|
|
38
275
|
|
|
39
276
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
40
277
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
41
278
|
|
|
279
|
+
if (options.Has("addonExports")) {
|
|
280
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
281
|
+
hasAddonExportsRef = true;
|
|
282
|
+
}
|
|
283
|
+
|
|
42
284
|
if (options.Has("gpuLayers")) {
|
|
43
285
|
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
|
44
286
|
}
|
|
@@ -54,14 +296,41 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
54
296
|
if (options.Has("useMlock")) {
|
|
55
297
|
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
|
|
56
298
|
}
|
|
57
|
-
}
|
|
58
299
|
|
|
59
|
-
|
|
60
|
-
|
|
300
|
+
if (options.Has("checkTensors")) {
|
|
301
|
+
model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (options.Has("onLoadProgress")) {
|
|
305
|
+
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
|
|
306
|
+
if (onLoadProgressJSCallback.IsFunction()) {
|
|
307
|
+
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
308
|
+
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
|
|
309
|
+
info.Env(),
|
|
310
|
+
onLoadProgressJSCallback,
|
|
311
|
+
"onLoadProgressCallback",
|
|
312
|
+
0,
|
|
313
|
+
1,
|
|
314
|
+
context,
|
|
315
|
+
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
|
|
316
|
+
addonModel->onLoadProgressEventCallbackSet = false;
|
|
317
|
+
|
|
318
|
+
delete ctx;
|
|
319
|
+
},
|
|
320
|
+
this
|
|
321
|
+
);
|
|
322
|
+
onLoadProgressEventCallbackSet = true;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
61
325
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
326
|
+
if (options.Has("hasLoadAbortSignal")) {
|
|
327
|
+
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
|
|
331
|
+
model_params.progress_callback_user_data = &(*this);
|
|
332
|
+
model_params.progress_callback = llamaModelParamsProgressCallback;
|
|
333
|
+
}
|
|
65
334
|
}
|
|
66
335
|
}
|
|
67
336
|
|
|
@@ -74,23 +343,32 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
74
343
|
return;
|
|
75
344
|
}
|
|
76
345
|
|
|
77
|
-
llama_free_model(model);
|
|
78
346
|
disposed = true;
|
|
79
|
-
|
|
347
|
+
if (modelLoaded) {
|
|
348
|
+
modelLoaded = false;
|
|
349
|
+
llama_free_model(model);
|
|
80
350
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
return info.Env().Undefined();
|
|
351
|
+
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
|
352
|
+
loadedModelSize = 0;
|
|
84
353
|
}
|
|
85
354
|
|
|
86
|
-
|
|
355
|
+
if (hasAddonExportsRef) {
|
|
356
|
+
addonExportsRef.Unref();
|
|
357
|
+
hasAddonExportsRef = false;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
87
360
|
|
|
361
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
362
|
+
Napi::Value LoadLora(const Napi::CallbackInfo& info);
|
|
363
|
+
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
|
|
364
|
+
abortModelLoad = true;
|
|
88
365
|
return info.Env().Undefined();
|
|
89
366
|
}
|
|
367
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
90
368
|
|
|
91
369
|
Napi::Value Tokenize(const Napi::CallbackInfo& info) {
|
|
92
370
|
if (disposed) {
|
|
93
|
-
Napi::Error::New(info.Env(), "
|
|
371
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
94
372
|
return info.Env().Undefined();
|
|
95
373
|
}
|
|
96
374
|
|
|
@@ -108,18 +386,21 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
108
386
|
}
|
|
109
387
|
Napi::Value Detokenize(const Napi::CallbackInfo& info) {
|
|
110
388
|
if (disposed) {
|
|
111
|
-
Napi::Error::New(info.Env(), "
|
|
389
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
112
390
|
return info.Env().Undefined();
|
|
113
391
|
}
|
|
114
392
|
|
|
115
393
|
Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
|
|
394
|
+
bool decodeSpecialTokens = info.Length() > 0
|
|
395
|
+
? info[1].As<Napi::Boolean>().Value()
|
|
396
|
+
: false;
|
|
116
397
|
|
|
117
398
|
// Create a stringstream for accumulating the decoded string.
|
|
118
399
|
std::stringstream ss;
|
|
119
400
|
|
|
120
401
|
// Decode each token and accumulate the result.
|
|
121
402
|
for (size_t i = 0; i < tokens.ElementLength(); i++) {
|
|
122
|
-
const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i]);
|
|
403
|
+
const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i], decodeSpecialTokens);
|
|
123
404
|
|
|
124
405
|
if (piece.empty()) {
|
|
125
406
|
continue;
|
|
@@ -133,16 +414,25 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
133
414
|
|
|
134
415
|
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
|
|
135
416
|
if (disposed) {
|
|
136
|
-
Napi::Error::New(info.Env(), "
|
|
417
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
137
418
|
return info.Env().Undefined();
|
|
138
419
|
}
|
|
139
420
|
|
|
140
421
|
return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
|
|
141
422
|
}
|
|
142
423
|
|
|
424
|
+
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
425
|
+
if (disposed) {
|
|
426
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
427
|
+
return info.Env().Undefined();
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return Napi::Number::From(info.Env(), llama_n_embd(model));
|
|
431
|
+
}
|
|
432
|
+
|
|
143
433
|
Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
|
|
144
434
|
if (disposed) {
|
|
145
|
-
Napi::Error::New(info.Env(), "
|
|
435
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
146
436
|
return info.Env().Undefined();
|
|
147
437
|
}
|
|
148
438
|
|
|
@@ -151,7 +441,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
151
441
|
|
|
152
442
|
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
|
|
153
443
|
if (disposed) {
|
|
154
|
-
Napi::Error::New(info.Env(), "
|
|
444
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
155
445
|
return info.Env().Undefined();
|
|
156
446
|
}
|
|
157
447
|
|
|
@@ -160,7 +450,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
160
450
|
|
|
161
451
|
Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
|
|
162
452
|
if (disposed) {
|
|
163
|
-
Napi::Error::New(info.Env(), "
|
|
453
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
164
454
|
return info.Env().Undefined();
|
|
165
455
|
}
|
|
166
456
|
|
|
@@ -172,63 +462,63 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
172
462
|
|
|
173
463
|
Napi::Value TokenBos(const Napi::CallbackInfo& info) {
|
|
174
464
|
if (disposed) {
|
|
175
|
-
Napi::Error::New(info.Env(), "
|
|
465
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
176
466
|
return info.Env().Undefined();
|
|
177
467
|
}
|
|
178
468
|
|
|
179
|
-
return
|
|
469
|
+
return getNapiControlToken(info, model, llama_token_bos(model));
|
|
180
470
|
}
|
|
181
471
|
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
182
472
|
if (disposed) {
|
|
183
|
-
Napi::Error::New(info.Env(), "
|
|
473
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
184
474
|
return info.Env().Undefined();
|
|
185
475
|
}
|
|
186
476
|
|
|
187
|
-
return
|
|
477
|
+
return getNapiControlToken(info, model, llama_token_eos(model));
|
|
188
478
|
}
|
|
189
479
|
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
190
480
|
if (disposed) {
|
|
191
|
-
Napi::Error::New(info.Env(), "
|
|
481
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
192
482
|
return info.Env().Undefined();
|
|
193
483
|
}
|
|
194
484
|
|
|
195
|
-
return
|
|
485
|
+
return getNapiToken(info, model, llama_token_nl(model));
|
|
196
486
|
}
|
|
197
487
|
Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
|
|
198
488
|
if (disposed) {
|
|
199
|
-
Napi::Error::New(info.Env(), "
|
|
489
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
200
490
|
return info.Env().Undefined();
|
|
201
491
|
}
|
|
202
492
|
|
|
203
|
-
return
|
|
493
|
+
return getNapiControlToken(info, model, llama_token_prefix(model));
|
|
204
494
|
}
|
|
205
495
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
|
|
206
496
|
if (disposed) {
|
|
207
|
-
Napi::Error::New(info.Env(), "
|
|
497
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
208
498
|
return info.Env().Undefined();
|
|
209
499
|
}
|
|
210
500
|
|
|
211
|
-
return
|
|
501
|
+
return getNapiControlToken(info, model, llama_token_middle(model));
|
|
212
502
|
}
|
|
213
503
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
|
|
214
504
|
if (disposed) {
|
|
215
|
-
Napi::Error::New(info.Env(), "
|
|
505
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
216
506
|
return info.Env().Undefined();
|
|
217
507
|
}
|
|
218
508
|
|
|
219
|
-
return
|
|
509
|
+
return getNapiControlToken(info, model, llama_token_suffix(model));
|
|
220
510
|
}
|
|
221
511
|
Napi::Value EotToken(const Napi::CallbackInfo& info) {
|
|
222
512
|
if (disposed) {
|
|
223
|
-
Napi::Error::New(info.Env(), "
|
|
513
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
224
514
|
return info.Env().Undefined();
|
|
225
515
|
}
|
|
226
516
|
|
|
227
|
-
return
|
|
517
|
+
return getNapiControlToken(info, model, llama_token_eot(model));
|
|
228
518
|
}
|
|
229
519
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
230
520
|
if (disposed) {
|
|
231
|
-
Napi::Error::New(info.Env(), "
|
|
521
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
232
522
|
return info.Env().Undefined();
|
|
233
523
|
}
|
|
234
524
|
|
|
@@ -245,6 +535,57 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
245
535
|
return Napi::String::New(info.Env(), ss.str());
|
|
246
536
|
}
|
|
247
537
|
|
|
538
|
+
Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
|
|
539
|
+
if (disposed) {
|
|
540
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
541
|
+
return info.Env().Undefined();
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
if (info[0].IsNumber() == false) {
|
|
545
|
+
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_TYPE_UNDEFINED));
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
int token = info[0].As<Napi::Number>().Int32Value();
|
|
549
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
550
|
+
|
|
551
|
+
return Napi::Number::From(info.Env(), int32_t(tokenType));
|
|
552
|
+
}
|
|
553
|
+
Napi::Value IsEogToken(const Napi::CallbackInfo& info) {
|
|
554
|
+
if (disposed) {
|
|
555
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
556
|
+
return info.Env().Undefined();
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
if (info[0].IsNumber() == false) {
|
|
560
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
int token = info[0].As<Napi::Number>().Int32Value();
|
|
564
|
+
|
|
565
|
+
return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
|
|
566
|
+
}
|
|
567
|
+
Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
|
|
568
|
+
if (disposed) {
|
|
569
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
570
|
+
return info.Env().Undefined();
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
auto vocabularyType = llama_vocab_type(model);
|
|
574
|
+
|
|
575
|
+
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
|
|
576
|
+
}
|
|
577
|
+
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
|
578
|
+
const int addBos = llama_add_bos_token(model);
|
|
579
|
+
|
|
580
|
+
bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
|
581
|
+
|
|
582
|
+
return Napi::Boolean::New(info.Env(), shouldPrependBos);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
|
|
586
|
+
return Napi::Number::From(info.Env(), llama_model_size(model));
|
|
587
|
+
}
|
|
588
|
+
|
|
248
589
|
static void init(Napi::Object exports) {
|
|
249
590
|
exports.Set(
|
|
250
591
|
"AddonModel",
|
|
@@ -252,9 +593,13 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
252
593
|
exports.Env(),
|
|
253
594
|
"AddonModel",
|
|
254
595
|
{
|
|
596
|
+
InstanceMethod("init", &AddonModel::Init),
|
|
597
|
+
InstanceMethod("loadLora", &AddonModel::LoadLora),
|
|
598
|
+
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
|
|
255
599
|
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
|
256
600
|
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
|
257
601
|
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
|
602
|
+
InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
|
|
258
603
|
InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
|
|
259
604
|
InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
|
|
260
605
|
InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
|
|
@@ -266,16 +611,260 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
266
611
|
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
|
267
612
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
268
613
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
269
|
-
InstanceMethod("
|
|
614
|
+
InstanceMethod("getTokenType", &AddonModel::GetTokenType),
|
|
615
|
+
InstanceMethod("isEogToken", &AddonModel::IsEogToken),
|
|
616
|
+
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
|
|
617
|
+
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
|
|
618
|
+
InstanceMethod("getModelSize", &AddonModel::GetModelSize),
|
|
619
|
+
InstanceMethod("dispose", &AddonModel::Dispose),
|
|
270
620
|
}
|
|
271
621
|
)
|
|
272
622
|
);
|
|
273
623
|
}
|
|
274
624
|
};
|
|
275
625
|
|
|
626
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
|
|
627
|
+
AddonModel* addonModel = (AddonModel *) user_data;
|
|
628
|
+
unsigned percentage = (unsigned) (100 * progress);
|
|
629
|
+
|
|
630
|
+
if (percentage > addonModel->modelLoadPercentage) {
|
|
631
|
+
addonModel->modelLoadPercentage = percentage;
|
|
632
|
+
|
|
633
|
+
// original llama.cpp logs
|
|
634
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
|
|
635
|
+
if (percentage >= 100) {
|
|
636
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
if (progress > addonModel->rawModelLoadPercentage) {
|
|
641
|
+
addonModel->rawModelLoadPercentage = progress;
|
|
642
|
+
|
|
643
|
+
if (addonModel->onLoadProgressEventCallbackSet) {
|
|
644
|
+
addon_progress_event* data = new addon_progress_event {
|
|
645
|
+
progress
|
|
646
|
+
};
|
|
647
|
+
|
|
648
|
+
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
|
|
649
|
+
|
|
650
|
+
if (status != napi_ok) {
|
|
651
|
+
delete data;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return !(addonModel->abortModelLoad);
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
|
660
|
+
public:
|
|
661
|
+
AddonModel* model;
|
|
662
|
+
|
|
663
|
+
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
664
|
+
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
|
|
665
|
+
model(model),
|
|
666
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
667
|
+
model->Ref();
|
|
668
|
+
}
|
|
669
|
+
~AddonModelLoadModelWorker() {
|
|
670
|
+
model->Unref();
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
Napi::Promise GetPromise() {
|
|
674
|
+
return deferred.Promise();
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
protected:
|
|
678
|
+
Napi::Promise::Deferred deferred;
|
|
679
|
+
|
|
680
|
+
void Execute() {
|
|
681
|
+
try {
|
|
682
|
+
model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
|
|
683
|
+
|
|
684
|
+
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
|
685
|
+
} catch (const std::exception& e) {
|
|
686
|
+
SetError(e.what());
|
|
687
|
+
} catch(...) {
|
|
688
|
+
SetError("Unknown error when calling \"llama_load_model_from_file\"");
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
void OnOK() {
|
|
692
|
+
if (model->modelLoaded) {
|
|
693
|
+
uint64_t modelSize = llama_model_size(model->model);
|
|
694
|
+
adjustNapiExternalMemoryAdd(Env(), modelSize);
|
|
695
|
+
model->loadedModelSize = modelSize;
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
|
|
699
|
+
if (model->onLoadProgressEventCallbackSet) {
|
|
700
|
+
model->addonThreadSafeOnLoadProgressEventCallback.Release();
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
void OnError(const Napi::Error& err) {
|
|
704
|
+
deferred.Reject(err.Value());
|
|
705
|
+
}
|
|
706
|
+
};
|
|
707
|
+
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
|
708
|
+
public:
|
|
709
|
+
AddonModel* model;
|
|
710
|
+
|
|
711
|
+
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
712
|
+
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
|
|
713
|
+
model(model),
|
|
714
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
715
|
+
model->Ref();
|
|
716
|
+
}
|
|
717
|
+
~AddonModelUnloadModelWorker() {
|
|
718
|
+
model->Unref();
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
Napi::Promise GetPromise() {
|
|
722
|
+
return deferred.Promise();
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
protected:
|
|
726
|
+
Napi::Promise::Deferred deferred;
|
|
727
|
+
|
|
728
|
+
void Execute() {
|
|
729
|
+
try {
|
|
730
|
+
llama_free_model(model->model);
|
|
731
|
+
model->modelLoaded = false;
|
|
732
|
+
|
|
733
|
+
model->dispose();
|
|
734
|
+
} catch (const std::exception& e) {
|
|
735
|
+
SetError(e.what());
|
|
736
|
+
} catch(...) {
|
|
737
|
+
SetError("Unknown error when calling \"llama_free_model\"");
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
void OnOK() {
|
|
741
|
+
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
|
|
742
|
+
model->loadedModelSize = 0;
|
|
743
|
+
|
|
744
|
+
deferred.Resolve(Env().Undefined());
|
|
745
|
+
}
|
|
746
|
+
void OnError(const Napi::Error& err) {
|
|
747
|
+
deferred.Reject(err.Value());
|
|
748
|
+
}
|
|
749
|
+
};
|
|
750
|
+
class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
|
|
751
|
+
public:
|
|
752
|
+
AddonModel* model;
|
|
753
|
+
std::string loraFilePath;
|
|
754
|
+
float loraScale;
|
|
755
|
+
int32_t loraThreads;
|
|
756
|
+
std::string baseModelPath;
|
|
757
|
+
|
|
758
|
+
AddonModelLoadLoraWorker(
|
|
759
|
+
const Napi::Env& env,
|
|
760
|
+
AddonModel* model,
|
|
761
|
+
std::string loraFilePath,
|
|
762
|
+
float loraScale,
|
|
763
|
+
int32_t loraThreads,
|
|
764
|
+
std::string baseModelPath
|
|
765
|
+
)
|
|
766
|
+
: Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
|
|
767
|
+
model(model),
|
|
768
|
+
loraFilePath(loraFilePath),
|
|
769
|
+
loraScale(loraScale),
|
|
770
|
+
loraThreads(loraThreads),
|
|
771
|
+
baseModelPath(baseModelPath),
|
|
772
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
773
|
+
model->Ref();
|
|
774
|
+
}
|
|
775
|
+
~AddonModelLoadLoraWorker() {
|
|
776
|
+
model->Unref();
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
Napi::Promise GetPromise() {
|
|
780
|
+
return deferred.Promise();
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
protected:
|
|
784
|
+
Napi::Promise::Deferred deferred;
|
|
785
|
+
|
|
786
|
+
void Execute() {
|
|
787
|
+
try {
|
|
788
|
+
const auto res = llama_model_apply_lora_from_file(
|
|
789
|
+
model->model,
|
|
790
|
+
loraFilePath.c_str(),
|
|
791
|
+
loraScale,
|
|
792
|
+
baseModelPath.empty() ? NULL : baseModelPath.c_str(),
|
|
793
|
+
loraThreads
|
|
794
|
+
);
|
|
795
|
+
|
|
796
|
+
if (res != 0) {
|
|
797
|
+
SetError(
|
|
798
|
+
std::string(
|
|
799
|
+
std::string("Failed to apply LoRA \"") + loraFilePath + std::string("\"") + (
|
|
800
|
+
baseModelPath.empty()
|
|
801
|
+
? std::string("")
|
|
802
|
+
: (std::string(" with base model \"") + baseModelPath + std::string("\""))
|
|
803
|
+
)
|
|
804
|
+
)
|
|
805
|
+
);
|
|
806
|
+
}
|
|
807
|
+
} catch (const std::exception& e) {
|
|
808
|
+
SetError(e.what());
|
|
809
|
+
} catch(...) {
|
|
810
|
+
SetError("Unknown error when calling \"llama_model_apply_lora_from_file\"");
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
void OnOK() {
|
|
814
|
+
deferred.Resolve(Env().Undefined());
|
|
815
|
+
}
|
|
816
|
+
void OnError(const Napi::Error& err) {
|
|
817
|
+
deferred.Reject(err.Value());
|
|
818
|
+
}
|
|
819
|
+
};
|
|
820
|
+
|
|
821
|
+
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
|
|
822
|
+
if (disposed) {
|
|
823
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
824
|
+
return info.Env().Undefined();
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
|
|
828
|
+
worker->Queue();
|
|
829
|
+
return worker->GetPromise();
|
|
830
|
+
}
|
|
831
|
+
Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
|
|
832
|
+
std::string loraFilePath = info[0].As<Napi::String>().Utf8Value();
|
|
833
|
+
float scale = info[1].As<Napi::Number>().FloatValue();
|
|
834
|
+
int32_t threads = info[2].As<Napi::Number>().Int32Value();
|
|
835
|
+
std::string baseModelPath = (info.Length() > 3 && info[3].IsString()) ? info[3].As<Napi::String>().Utf8Value() : std::string("");
|
|
836
|
+
|
|
837
|
+
int32_t resolvedThreads = threads == 0 ? std::thread::hardware_concurrency() : threads;
|
|
838
|
+
|
|
839
|
+
AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), this, loraFilePath, scale, threads, baseModelPath);
|
|
840
|
+
worker->Queue();
|
|
841
|
+
return worker->GetPromise();
|
|
842
|
+
}
|
|
843
|
+
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
|
|
844
|
+
if (disposed) {
|
|
845
|
+
return info.Env().Undefined();
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if (modelLoaded) {
|
|
849
|
+
modelLoaded = false;
|
|
850
|
+
|
|
851
|
+
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
|
|
852
|
+
worker->Queue();
|
|
853
|
+
return worker->GetPromise();
|
|
854
|
+
} else {
|
|
855
|
+
dispose();
|
|
856
|
+
|
|
857
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
858
|
+
deferred.Resolve(info.Env().Undefined());
|
|
859
|
+
return deferred.Promise();
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
|
|
276
863
|
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
277
864
|
public:
|
|
278
865
|
grammar_parser::parse_state parsed_grammar;
|
|
866
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
867
|
+
bool hasAddonExportsRef = false;
|
|
279
868
|
|
|
280
869
|
AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
|
|
281
870
|
// Get the model path
|
|
@@ -285,6 +874,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
285
874
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
286
875
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
287
876
|
|
|
877
|
+
if (options.Has("addonExports")) {
|
|
878
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
879
|
+
hasAddonExportsRef = true;
|
|
880
|
+
}
|
|
881
|
+
|
|
288
882
|
if (options.Has("printGrammar")) {
|
|
289
883
|
should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
|
|
290
884
|
}
|
|
@@ -302,6 +896,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
302
896
|
}
|
|
303
897
|
}
|
|
304
898
|
|
|
899
|
+
~AddonGrammar() {
|
|
900
|
+
if (hasAddonExportsRef) {
|
|
901
|
+
addonExportsRef.Unref();
|
|
902
|
+
hasAddonExportsRef = false;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
305
906
|
static void init(Napi::Object exports) {
|
|
306
907
|
exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
|
|
307
908
|
}
|
|
@@ -340,9 +941,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
340
941
|
llama_context_params context_params;
|
|
341
942
|
llama_context* ctx;
|
|
342
943
|
llama_batch batch;
|
|
944
|
+
uint64_t batchMemorySize = 0;
|
|
343
945
|
bool has_batch = false;
|
|
344
946
|
int32_t batch_n_tokens = 0;
|
|
345
947
|
int n_cur = 0;
|
|
948
|
+
|
|
949
|
+
uint64_t loadedContextMemorySize = 0;
|
|
950
|
+
bool contextLoaded = false;
|
|
951
|
+
|
|
346
952
|
bool disposed = false;
|
|
347
953
|
|
|
348
954
|
AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
|
|
@@ -358,7 +964,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
358
964
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
359
965
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
360
966
|
|
|
361
|
-
if (options.Has("
|
|
967
|
+
if (options.Has("noSeed")) {
|
|
968
|
+
context_params.seed = time(NULL);
|
|
969
|
+
} else if (options.Has("seed")) {
|
|
362
970
|
context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
|
|
363
971
|
}
|
|
364
972
|
|
|
@@ -368,14 +976,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
368
976
|
|
|
369
977
|
if (options.Has("batchSize")) {
|
|
370
978
|
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
|
979
|
+
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
|
|
371
980
|
}
|
|
372
981
|
|
|
373
|
-
if (options.Has("
|
|
374
|
-
context_params.
|
|
982
|
+
if (options.Has("sequences")) {
|
|
983
|
+
context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
|
|
375
984
|
}
|
|
376
985
|
|
|
377
|
-
if (options.Has("
|
|
378
|
-
context_params.
|
|
986
|
+
if (options.Has("embeddings")) {
|
|
987
|
+
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
379
988
|
}
|
|
380
989
|
|
|
381
990
|
if (options.Has("threads")) {
|
|
@@ -386,9 +995,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
386
995
|
context_params.n_threads_batch = resolved_n_threads;
|
|
387
996
|
}
|
|
388
997
|
}
|
|
389
|
-
|
|
390
|
-
ctx = llama_new_context_with_model(model->model, context_params);
|
|
391
|
-
Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
|
|
392
998
|
}
|
|
393
999
|
~AddonContext() {
|
|
394
1000
|
dispose();
|
|
@@ -399,13 +1005,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
399
1005
|
return;
|
|
400
1006
|
}
|
|
401
1007
|
|
|
402
|
-
|
|
403
|
-
|
|
1008
|
+
disposed = true;
|
|
1009
|
+
if (contextLoaded) {
|
|
1010
|
+
contextLoaded = false;
|
|
1011
|
+
llama_free(ctx);
|
|
1012
|
+
|
|
1013
|
+
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
|
|
1014
|
+
loadedContextMemorySize = 0;
|
|
1015
|
+
}
|
|
1016
|
+
|
|
404
1017
|
model->Unref();
|
|
405
1018
|
|
|
406
1019
|
disposeBatch();
|
|
407
|
-
|
|
408
|
-
disposed = true;
|
|
409
1020
|
}
|
|
410
1021
|
void disposeBatch() {
|
|
411
1022
|
if (!has_batch) {
|
|
@@ -415,16 +1026,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
415
1026
|
llama_batch_free(batch);
|
|
416
1027
|
has_batch = false;
|
|
417
1028
|
batch_n_tokens = 0;
|
|
1029
|
+
|
|
1030
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
|
|
1031
|
+
batchMemorySize = 0;
|
|
418
1032
|
}
|
|
419
|
-
Napi::Value Dispose(const Napi::CallbackInfo& info) {
|
|
420
|
-
if (disposed) {
|
|
421
|
-
return info.Env().Undefined();
|
|
422
|
-
}
|
|
423
1033
|
|
|
424
|
-
|
|
1034
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
1035
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
425
1036
|
|
|
426
|
-
return info.Env().Undefined();
|
|
427
|
-
}
|
|
428
1037
|
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
|
|
429
1038
|
if (disposed) {
|
|
430
1039
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -449,6 +1058,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
449
1058
|
has_batch = true;
|
|
450
1059
|
batch_n_tokens = n_tokens;
|
|
451
1060
|
|
|
1061
|
+
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
|
|
1062
|
+
if (newBatchMemorySize > batchMemorySize) {
|
|
1063
|
+
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
|
|
1064
|
+
batchMemorySize = newBatchMemorySize;
|
|
1065
|
+
} else if (newBatchMemorySize < batchMemorySize) {
|
|
1066
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
|
|
1067
|
+
batchMemorySize = newBatchMemorySize;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
452
1070
|
return info.Env().Undefined();
|
|
453
1071
|
}
|
|
454
1072
|
Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
|
|
@@ -497,7 +1115,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
497
1115
|
|
|
498
1116
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
499
1117
|
|
|
500
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
1118
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
1119
|
+
|
|
1120
|
+
if (!result) {
|
|
1121
|
+
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
|
|
1122
|
+
return info.Env().Undefined();
|
|
1123
|
+
}
|
|
501
1124
|
|
|
502
1125
|
return info.Env().Undefined();
|
|
503
1126
|
}
|
|
@@ -511,9 +1134,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
511
1134
|
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
|
512
1135
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
513
1136
|
|
|
514
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
1137
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
515
1138
|
|
|
516
|
-
return info.Env()
|
|
1139
|
+
return Napi::Boolean::New(info.Env(), result);
|
|
517
1140
|
}
|
|
518
1141
|
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
|
|
519
1142
|
if (disposed) {
|
|
@@ -526,7 +1149,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
526
1149
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
527
1150
|
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
|
|
528
1151
|
|
|
529
|
-
|
|
1152
|
+
llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
|
|
530
1153
|
|
|
531
1154
|
return info.Env().Undefined();
|
|
532
1155
|
}
|
|
@@ -534,7 +1157,8 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
534
1157
|
Napi::Value SampleToken(const Napi::CallbackInfo& info);
|
|
535
1158
|
|
|
536
1159
|
Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
|
|
537
|
-
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
1160
|
+
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
1161
|
+
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
538
1162
|
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
539
1163
|
|
|
540
1164
|
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
@@ -544,6 +1168,77 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
544
1168
|
return info.Env().Undefined();
|
|
545
1169
|
}
|
|
546
1170
|
|
|
1171
|
+
Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
|
|
1172
|
+
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
1173
|
+
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
1174
|
+
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
1175
|
+
|
|
1176
|
+
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
1177
|
+
std::vector<llama_token_data> candidates;
|
|
1178
|
+
candidates.reserve(1);
|
|
1179
|
+
candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
|
|
1180
|
+
|
|
1181
|
+
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
1182
|
+
|
|
1183
|
+
llama_sample_grammar(ctx, &candidates_p, (grammar_evaluation_state)->grammar);
|
|
1184
|
+
|
|
1185
|
+
if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
|
|
1186
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
return Napi::Boolean::New(info.Env(), true);
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
|
|
1196
|
+
if (disposed) {
|
|
1197
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1198
|
+
return info.Env().Undefined();
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
|
|
1202
|
+
|
|
1203
|
+
if (inputTokensLength <= 0) {
|
|
1204
|
+
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
|
|
1205
|
+
return info.Env().Undefined();
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
const int n_embd = llama_n_embd(model->model);
|
|
1209
|
+
const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
|
|
1210
|
+
if (embeddings == NULL) {
|
|
1211
|
+
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
|
|
1212
|
+
|
|
1213
|
+
if (embeddings == NULL) {
|
|
1214
|
+
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
|
|
1215
|
+
return info.Env().Undefined();
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
|
|
1220
|
+
for (size_t i = 0; i < n_embd; ++i) {
|
|
1221
|
+
result[i] = embeddings[i];
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
return result;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
|
|
1228
|
+
if (disposed) {
|
|
1229
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1230
|
+
return info.Env().Undefined();
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
|
|
1237
|
+
llama_print_timings(ctx);
|
|
1238
|
+
llama_reset_timings(ctx);
|
|
1239
|
+
return info.Env().Undefined();
|
|
1240
|
+
}
|
|
1241
|
+
|
|
547
1242
|
static void init(Napi::Object exports) {
|
|
548
1243
|
exports.Set(
|
|
549
1244
|
"AddonContext",
|
|
@@ -551,6 +1246,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
551
1246
|
exports.Env(),
|
|
552
1247
|
"AddonContext",
|
|
553
1248
|
{
|
|
1249
|
+
InstanceMethod("init", &AddonContext::Init),
|
|
554
1250
|
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
|
|
555
1251
|
InstanceMethod("initBatch", &AddonContext::InitBatch),
|
|
556
1252
|
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
|
|
@@ -560,7 +1256,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
560
1256
|
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
|
|
561
1257
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
562
1258
|
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
|
|
563
|
-
InstanceMethod("
|
|
1259
|
+
InstanceMethod("canBeNextTokenForGrammarEvaluationState", &AddonContext::CanBeNextTokenForGrammarEvaluationState),
|
|
1260
|
+
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
1261
|
+
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
|
|
1262
|
+
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
1263
|
+
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
564
1264
|
}
|
|
565
1265
|
)
|
|
566
1266
|
);
|
|
@@ -568,53 +1268,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
568
1268
|
};
|
|
569
1269
|
|
|
570
1270
|
|
|
571
|
-
class AddonContextDecodeBatchWorker : Napi::AsyncWorker
|
|
1271
|
+
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
|
|
572
1272
|
public:
|
|
573
1273
|
AddonContext* ctx;
|
|
574
1274
|
|
|
575
|
-
AddonContextDecodeBatchWorker(const Napi::
|
|
576
|
-
: Napi::AsyncWorker(
|
|
1275
|
+
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
|
|
1276
|
+
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
|
|
577
1277
|
ctx(ctx),
|
|
578
|
-
Napi::Promise::Deferred(
|
|
1278
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
579
1279
|
ctx->Ref();
|
|
580
1280
|
}
|
|
581
1281
|
~AddonContextDecodeBatchWorker() {
|
|
582
1282
|
ctx->Unref();
|
|
583
1283
|
}
|
|
584
|
-
|
|
585
|
-
|
|
1284
|
+
|
|
1285
|
+
Napi::Promise GetPromise() {
|
|
1286
|
+
return deferred.Promise();
|
|
1287
|
+
}
|
|
586
1288
|
|
|
587
1289
|
protected:
|
|
1290
|
+
Napi::Promise::Deferred deferred;
|
|
1291
|
+
|
|
588
1292
|
void Execute() {
|
|
589
|
-
|
|
590
|
-
|
|
1293
|
+
try {
|
|
1294
|
+
// Perform the evaluation using llama_decode.
|
|
1295
|
+
int r = llama_decode(ctx->ctx, ctx->batch);
|
|
1296
|
+
|
|
1297
|
+
if (r != 0) {
|
|
1298
|
+
if (r == 1) {
|
|
1299
|
+
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
|
1300
|
+
} else {
|
|
1301
|
+
SetError("Eval has failed");
|
|
1302
|
+
}
|
|
591
1303
|
|
|
592
|
-
|
|
593
|
-
if (r == 1) {
|
|
594
|
-
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
|
595
|
-
} else {
|
|
596
|
-
SetError("Eval has failed");
|
|
1304
|
+
return;
|
|
597
1305
|
}
|
|
598
1306
|
|
|
599
|
-
|
|
1307
|
+
llama_synchronize(ctx->ctx);
|
|
1308
|
+
} catch (const std::exception& e) {
|
|
1309
|
+
SetError(e.what());
|
|
1310
|
+
} catch(...) {
|
|
1311
|
+
SetError("Unknown error when calling \"llama_decode\"");
|
|
600
1312
|
}
|
|
601
1313
|
}
|
|
602
1314
|
void OnOK() {
|
|
603
|
-
|
|
604
|
-
Napi::Promise::Deferred::Resolve(env.Undefined());
|
|
1315
|
+
deferred.Resolve(Env().Undefined());
|
|
605
1316
|
}
|
|
606
1317
|
void OnError(const Napi::Error& err) {
|
|
607
|
-
|
|
1318
|
+
deferred.Reject(err.Value());
|
|
608
1319
|
}
|
|
609
1320
|
};
|
|
610
1321
|
|
|
611
1322
|
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
|
|
612
|
-
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
|
|
1323
|
+
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
|
|
1324
|
+
worker->Queue();
|
|
1325
|
+
return worker->GetPromise();
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
|
1329
|
+
public:
|
|
1330
|
+
AddonContext* context;
|
|
1331
|
+
|
|
1332
|
+
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1333
|
+
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
|
|
1334
|
+
context(context),
|
|
1335
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1336
|
+
context->Ref();
|
|
1337
|
+
}
|
|
1338
|
+
~AddonContextLoadContextWorker() {
|
|
1339
|
+
context->Unref();
|
|
1340
|
+
}
|
|
1341
|
+
|
|
1342
|
+
Napi::Promise GetPromise() {
|
|
1343
|
+
return deferred.Promise();
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
protected:
|
|
1347
|
+
Napi::Promise::Deferred deferred;
|
|
1348
|
+
|
|
1349
|
+
void Execute() {
|
|
1350
|
+
try {
|
|
1351
|
+
context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
|
|
1352
|
+
|
|
1353
|
+
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
|
|
1354
|
+
} catch (const std::exception& e) {
|
|
1355
|
+
SetError(e.what());
|
|
1356
|
+
} catch(...) {
|
|
1357
|
+
SetError("Unknown error when calling \"llama_new_context_with_model\"");
|
|
1358
|
+
}
|
|
1359
|
+
}
|
|
1360
|
+
void OnOK() {
|
|
1361
|
+
if (context->contextLoaded) {
|
|
1362
|
+
uint64_t contextMemorySize = llama_state_get_size(context->ctx);
|
|
1363
|
+
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
|
|
1364
|
+
context->loadedContextMemorySize = contextMemorySize;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
|
|
1368
|
+
}
|
|
1369
|
+
void OnError(const Napi::Error& err) {
|
|
1370
|
+
deferred.Reject(err.Value());
|
|
1371
|
+
}
|
|
1372
|
+
};
|
|
1373
|
+
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
|
|
1374
|
+
public:
|
|
1375
|
+
AddonContext* context;
|
|
1376
|
+
|
|
1377
|
+
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1378
|
+
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
|
|
1379
|
+
context(context),
|
|
1380
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1381
|
+
context->Ref();
|
|
1382
|
+
}
|
|
1383
|
+
~AddonContextUnloadContextWorker() {
|
|
1384
|
+
context->Unref();
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
Napi::Promise GetPromise() {
|
|
1388
|
+
return deferred.Promise();
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
protected:
|
|
1392
|
+
Napi::Promise::Deferred deferred;
|
|
1393
|
+
|
|
1394
|
+
void Execute() {
|
|
1395
|
+
try {
|
|
1396
|
+
llama_free(context->ctx);
|
|
1397
|
+
context->contextLoaded = false;
|
|
1398
|
+
|
|
1399
|
+
try {
|
|
1400
|
+
if (context->has_batch) {
|
|
1401
|
+
llama_batch_free(context->batch);
|
|
1402
|
+
context->has_batch = false;
|
|
1403
|
+
context->batch_n_tokens = 0;
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
context->dispose();
|
|
1407
|
+
} catch (const std::exception& e) {
|
|
1408
|
+
SetError(e.what());
|
|
1409
|
+
} catch(...) {
|
|
1410
|
+
SetError("Unknown error when calling \"llama_batch_free\"");
|
|
1411
|
+
}
|
|
1412
|
+
} catch (const std::exception& e) {
|
|
1413
|
+
SetError(e.what());
|
|
1414
|
+
} catch(...) {
|
|
1415
|
+
SetError("Unknown error when calling \"llama_free\"");
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
void OnOK() {
|
|
1419
|
+
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
|
|
1420
|
+
context->loadedContextMemorySize = 0;
|
|
1421
|
+
|
|
1422
|
+
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
|
|
1423
|
+
context->batchMemorySize = 0;
|
|
1424
|
+
|
|
1425
|
+
deferred.Resolve(Env().Undefined());
|
|
1426
|
+
}
|
|
1427
|
+
void OnError(const Napi::Error& err) {
|
|
1428
|
+
deferred.Reject(err.Value());
|
|
1429
|
+
}
|
|
1430
|
+
};
|
|
1431
|
+
|
|
1432
|
+
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
|
|
1433
|
+
if (disposed) {
|
|
1434
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1435
|
+
return info.Env().Undefined();
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
|
|
613
1439
|
worker->Queue();
|
|
614
|
-
return worker->
|
|
1440
|
+
return worker->GetPromise();
|
|
615
1441
|
}
|
|
1442
|
+
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
|
|
1443
|
+
if (disposed) {
|
|
1444
|
+
return info.Env().Undefined();
|
|
1445
|
+
}
|
|
616
1446
|
|
|
617
|
-
|
|
1447
|
+
if (contextLoaded) {
|
|
1448
|
+
contextLoaded = false;
|
|
1449
|
+
|
|
1450
|
+
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
|
|
1451
|
+
worker->Queue();
|
|
1452
|
+
return worker->GetPromise();
|
|
1453
|
+
} else {
|
|
1454
|
+
dispose();
|
|
1455
|
+
|
|
1456
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1457
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1458
|
+
return deferred.Promise();
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
618
1463
|
public:
|
|
619
1464
|
AddonContext* ctx;
|
|
620
1465
|
AddonGrammarEvaluationState* grammar_evaluation_state;
|
|
@@ -622,18 +1467,21 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
622
1467
|
bool use_grammar = false;
|
|
623
1468
|
llama_token result;
|
|
624
1469
|
float temperature = 0.0f;
|
|
1470
|
+
float min_p = 0;
|
|
625
1471
|
int32_t top_k = 40;
|
|
626
1472
|
float top_p = 0.95f;
|
|
627
1473
|
float repeat_penalty = 1.10f; // 1.0 = disabled
|
|
628
1474
|
float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
|
|
629
1475
|
float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
|
|
630
1476
|
std::vector<llama_token> repeat_penalty_tokens;
|
|
1477
|
+
std::unordered_map<llama_token, float> tokenBiases;
|
|
1478
|
+
bool useTokenBiases = false;
|
|
631
1479
|
bool use_repeat_penalty = false;
|
|
632
1480
|
|
|
633
1481
|
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
|
634
1482
|
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
|
|
635
1483
|
ctx(ctx),
|
|
636
|
-
Napi::Promise::Deferred(info.Env()) {
|
|
1484
|
+
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
|
637
1485
|
ctx->Ref();
|
|
638
1486
|
|
|
639
1487
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
@@ -645,6 +1493,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
645
1493
|
temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
|
|
646
1494
|
}
|
|
647
1495
|
|
|
1496
|
+
if (options.Has("minP")) {
|
|
1497
|
+
min_p = options.Get("minP").As<Napi::Number>().FloatValue();
|
|
1498
|
+
}
|
|
1499
|
+
|
|
648
1500
|
if (options.Has("topK")) {
|
|
649
1501
|
top_k = options.Get("topK").As<Napi::Number>().Int32Value();
|
|
650
1502
|
}
|
|
@@ -668,6 +1520,19 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
668
1520
|
use_repeat_penalty = true;
|
|
669
1521
|
}
|
|
670
1522
|
|
|
1523
|
+
if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
|
|
1524
|
+
Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
|
|
1525
|
+
Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
|
|
1526
|
+
|
|
1527
|
+
if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
|
|
1528
|
+
for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
|
|
1529
|
+
tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
|
|
1530
|
+
}
|
|
1531
|
+
|
|
1532
|
+
useTokenBiases = true;
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
|
|
671
1536
|
if (options.Has("repeatPenaltyPresencePenalty")) {
|
|
672
1537
|
repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
|
|
673
1538
|
}
|
|
@@ -692,14 +1557,33 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
692
1557
|
use_grammar = false;
|
|
693
1558
|
}
|
|
694
1559
|
}
|
|
695
|
-
|
|
696
|
-
|
|
1560
|
+
|
|
1561
|
+
Napi::Promise GetPromise() {
|
|
1562
|
+
return deferred.Promise();
|
|
1563
|
+
}
|
|
697
1564
|
|
|
698
1565
|
protected:
|
|
1566
|
+
Napi::Promise::Deferred deferred;
|
|
1567
|
+
|
|
699
1568
|
void Execute() {
|
|
1569
|
+
try {
|
|
1570
|
+
SampleToken();
|
|
1571
|
+
} catch (const std::exception& e) {
|
|
1572
|
+
SetError(e.what());
|
|
1573
|
+
} catch(...) {
|
|
1574
|
+
SetError("Unknown error when calling \"SampleToken\"");
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
|
|
1578
|
+
void SampleToken() {
|
|
700
1579
|
llama_token new_token_id = 0;
|
|
701
1580
|
|
|
702
1581
|
// Select the best prediction.
|
|
1582
|
+
if (llama_get_logits(ctx->ctx) == nullptr) {
|
|
1583
|
+
SetError("This model does not support token generation");
|
|
1584
|
+
return;
|
|
1585
|
+
}
|
|
1586
|
+
|
|
703
1587
|
auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
|
|
704
1588
|
auto n_vocab = llama_n_vocab(ctx->model->model);
|
|
705
1589
|
|
|
@@ -707,13 +1591,27 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
707
1591
|
candidates.reserve(n_vocab);
|
|
708
1592
|
|
|
709
1593
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
710
|
-
|
|
1594
|
+
auto logit = logits[token_id];
|
|
1595
|
+
|
|
1596
|
+
if (useTokenBiases) {
|
|
1597
|
+
bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
|
|
1598
|
+
if (hasTokenBias) {
|
|
1599
|
+
auto logitBias = tokenBiases.at(token_id);
|
|
1600
|
+
if (logitBias == -INFINITY || logitBias < -INFINITY) {
|
|
1601
|
+
if (!llama_token_is_eog(ctx->model->model, token_id)) {
|
|
1602
|
+
logit = -INFINITY;
|
|
1603
|
+
}
|
|
1604
|
+
} else {
|
|
1605
|
+
logit += logitBias;
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
|
|
711
1611
|
}
|
|
712
1612
|
|
|
713
1613
|
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
714
1614
|
|
|
715
|
-
auto eos_token = llama_token_eos(ctx->model->model);
|
|
716
|
-
|
|
717
1615
|
if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
|
|
718
1616
|
llama_sample_repetition_penalties(
|
|
719
1617
|
ctx->ctx,
|
|
@@ -728,6 +1626,13 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
728
1626
|
|
|
729
1627
|
if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
|
|
730
1628
|
llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
|
|
1629
|
+
|
|
1630
|
+
if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
|
|
1631
|
+
// logit biases caused grammar sampling to fail, so sampling again without logit biases
|
|
1632
|
+
useTokenBiases = false;
|
|
1633
|
+
SampleToken();
|
|
1634
|
+
return;
|
|
1635
|
+
}
|
|
731
1636
|
}
|
|
732
1637
|
|
|
733
1638
|
if (temperature <= 0) {
|
|
@@ -746,45 +1651,359 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
746
1651
|
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
|
|
747
1652
|
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
|
|
748
1653
|
llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
|
|
1654
|
+
llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
|
|
749
1655
|
llama_sample_temp(ctx->ctx, &candidates_p, temperature);
|
|
750
1656
|
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
|
|
751
1657
|
}
|
|
752
1658
|
|
|
753
|
-
if (new_token_id
|
|
1659
|
+
if (!llama_token_is_eog(ctx->model->model, new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
|
|
754
1660
|
llama_grammar_accept_token(ctx->ctx, (grammar_evaluation_state)->grammar, new_token_id);
|
|
755
1661
|
}
|
|
756
1662
|
|
|
757
1663
|
result = new_token_id;
|
|
758
1664
|
}
|
|
759
1665
|
void OnOK() {
|
|
760
|
-
Napi::
|
|
761
|
-
|
|
762
|
-
Napi::Promise::Deferred::Resolve(resultValue);
|
|
1666
|
+
Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
1667
|
+
deferred.Resolve(resultValue);
|
|
763
1668
|
}
|
|
764
1669
|
void OnError(const Napi::Error& err) {
|
|
765
|
-
|
|
1670
|
+
deferred.Reject(err.Value());
|
|
766
1671
|
}
|
|
767
1672
|
};
|
|
768
1673
|
|
|
769
1674
|
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
|
|
770
1675
|
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
|
|
771
1676
|
worker->Queue();
|
|
772
|
-
return worker->
|
|
1677
|
+
return worker->GetPromise();
|
|
773
1678
|
}
|
|
774
1679
|
|
|
775
1680
|
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
776
1681
|
return Napi::String::From(info.Env(), llama_print_system_info());
|
|
777
1682
|
}
|
|
778
1683
|
|
|
1684
|
+
Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
|
|
1685
|
+
return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
|
|
1689
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
|
|
1693
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1697
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1698
|
+
|
|
1699
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1700
|
+
return info.Env().Undefined();
|
|
1701
|
+
}
|
|
1702
|
+
|
|
1703
|
+
const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
|
|
1704
|
+
|
|
1705
|
+
return Napi::Number::New(info.Env(), blockSize);
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1709
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1710
|
+
|
|
1711
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1712
|
+
return info.Env().Undefined();
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
|
|
1716
|
+
|
|
1717
|
+
return Napi::Number::New(info.Env(), typeSize);
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
|
|
1721
|
+
Napi::Object consts = Napi::Object::New(info.Env());
|
|
1722
|
+
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
|
|
1723
|
+
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
|
|
1724
|
+
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
|
|
1725
|
+
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
|
|
1726
|
+
consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
|
|
1727
|
+
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
|
|
1728
|
+
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
|
|
1729
|
+
|
|
1730
|
+
return consts;
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1733
|
+
int addonGetGgmlLogLevelNumber(ggml_log_level level) {
|
|
1734
|
+
switch (level) {
|
|
1735
|
+
case GGML_LOG_LEVEL_ERROR: return 2;
|
|
1736
|
+
case GGML_LOG_LEVEL_WARN: return 3;
|
|
1737
|
+
case GGML_LOG_LEVEL_INFO: return 4;
|
|
1738
|
+
case GGML_LOG_LEVEL_DEBUG: return 5;
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
return 1;
|
|
1742
|
+
}
|
|
1743
|
+
|
|
1744
|
+
void addonCallJsLogCallback(
|
|
1745
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
1746
|
+
) {
|
|
1747
|
+
bool called = false;
|
|
1748
|
+
|
|
1749
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
1750
|
+
try {
|
|
1751
|
+
callback.Call({
|
|
1752
|
+
Napi::Number::New(env, data->logLevelNumber),
|
|
1753
|
+
Napi::String::New(env, data->stringStream->str()),
|
|
1754
|
+
});
|
|
1755
|
+
called = true;
|
|
1756
|
+
} catch (const Napi::Error& e) {
|
|
1757
|
+
called = false;
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
if (!called && data != nullptr) {
|
|
1762
|
+
if (data->logLevelNumber == 2) {
|
|
1763
|
+
fputs(data->stringStream->str().c_str(), stderr);
|
|
1764
|
+
fflush(stderr);
|
|
1765
|
+
} else {
|
|
1766
|
+
fputs(data->stringStream->str().c_str(), stdout);
|
|
1767
|
+
fflush(stdout);
|
|
1768
|
+
}
|
|
1769
|
+
}
|
|
1770
|
+
|
|
1771
|
+
if (data != nullptr) {
|
|
1772
|
+
delete data->stringStream;
|
|
1773
|
+
delete data;
|
|
1774
|
+
}
|
|
1775
|
+
}
|
|
1776
|
+
|
|
1777
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
|
|
1778
|
+
int logLevelNumber = addonGetGgmlLogLevelNumber(level);
|
|
1779
|
+
|
|
1780
|
+
if (logLevelNumber > addonLoggerLogLevel) {
|
|
1781
|
+
return;
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
if (addonJsLoggerCallbackSet) {
|
|
1785
|
+
std::stringstream* stringStream = new std::stringstream();
|
|
1786
|
+
if (text != nullptr) {
|
|
1787
|
+
*stringStream << text;
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
addon_logger_log* data = new addon_logger_log {
|
|
1791
|
+
logLevelNumber,
|
|
1792
|
+
stringStream,
|
|
1793
|
+
};
|
|
1794
|
+
|
|
1795
|
+
auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
|
|
1796
|
+
|
|
1797
|
+
if (status == napi_ok) {
|
|
1798
|
+
return;
|
|
1799
|
+
} else {
|
|
1800
|
+
delete stringStream;
|
|
1801
|
+
delete data;
|
|
1802
|
+
}
|
|
1803
|
+
}
|
|
1804
|
+
|
|
1805
|
+
if (text != nullptr) {
|
|
1806
|
+
if (level == 2) {
|
|
1807
|
+
fputs(text, stderr);
|
|
1808
|
+
fflush(stderr);
|
|
1809
|
+
} else {
|
|
1810
|
+
fputs(text, stdout);
|
|
1811
|
+
fflush(stdout);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1816
|
+
Napi::Value setLogger(const Napi::CallbackInfo& info) {
|
|
1817
|
+
if (info.Length() < 1 || !info[0].IsFunction()) {
|
|
1818
|
+
if (addonJsLoggerCallbackSet) {
|
|
1819
|
+
addonJsLoggerCallbackSet = false;
|
|
1820
|
+
addonThreadSafeLoggerCallback.Release();
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
return info.Env().Undefined();
|
|
1824
|
+
}
|
|
1825
|
+
|
|
1826
|
+
auto addonLoggerJSCallback = info[0].As<Napi::Function>();
|
|
1827
|
+
AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
1828
|
+
addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
|
|
1829
|
+
info.Env(),
|
|
1830
|
+
addonLoggerJSCallback,
|
|
1831
|
+
"loggerCallback",
|
|
1832
|
+
0,
|
|
1833
|
+
1,
|
|
1834
|
+
context,
|
|
1835
|
+
[](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
|
|
1836
|
+
addonJsLoggerCallbackSet = false;
|
|
1837
|
+
|
|
1838
|
+
delete ctx;
|
|
1839
|
+
}
|
|
1840
|
+
);
|
|
1841
|
+
addonJsLoggerCallbackSet = true;
|
|
1842
|
+
|
|
1843
|
+
// prevent blocking the main node process from exiting due to active resources
|
|
1844
|
+
addonThreadSafeLoggerCallback.Unref(info.Env());
|
|
1845
|
+
|
|
1846
|
+
return info.Env().Undefined();
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1849
|
+
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
1850
|
+
if (info.Length() < 1 || !info[0].IsNumber()) {
|
|
1851
|
+
addonLoggerLogLevel = 5;
|
|
1852
|
+
|
|
1853
|
+
return info.Env().Undefined();
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
|
|
1857
|
+
|
|
1858
|
+
return info.Env().Undefined();
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
class AddonBackendLoadWorker : public Napi::AsyncWorker {
|
|
1862
|
+
public:
|
|
1863
|
+
AddonBackendLoadWorker(const Napi::Env& env)
|
|
1864
|
+
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
|
|
1865
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1866
|
+
}
|
|
1867
|
+
~AddonBackendLoadWorker() {
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
Napi::Promise GetPromise() {
|
|
1871
|
+
return deferred.Promise();
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
protected:
|
|
1875
|
+
Napi::Promise::Deferred deferred;
|
|
1876
|
+
|
|
1877
|
+
void Execute() {
|
|
1878
|
+
try {
|
|
1879
|
+
llama_backend_init();
|
|
1880
|
+
|
|
1881
|
+
try {
|
|
1882
|
+
if (backendDisposed) {
|
|
1883
|
+
llama_backend_free();
|
|
1884
|
+
} else {
|
|
1885
|
+
backendInitialized = true;
|
|
1886
|
+
}
|
|
1887
|
+
} catch (const std::exception& e) {
|
|
1888
|
+
SetError(e.what());
|
|
1889
|
+
} catch(...) {
|
|
1890
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1891
|
+
}
|
|
1892
|
+
} catch (const std::exception& e) {
|
|
1893
|
+
SetError(e.what());
|
|
1894
|
+
} catch(...) {
|
|
1895
|
+
SetError("Unknown error when calling \"llama_backend_init\"");
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
void OnOK() {
|
|
1899
|
+
deferred.Resolve(Env().Undefined());
|
|
1900
|
+
}
|
|
1901
|
+
void OnError(const Napi::Error& err) {
|
|
1902
|
+
deferred.Reject(err.Value());
|
|
1903
|
+
}
|
|
1904
|
+
};
|
|
1905
|
+
|
|
1906
|
+
|
|
1907
|
+
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
|
|
1908
|
+
public:
|
|
1909
|
+
AddonBackendUnloadWorker(const Napi::Env& env)
|
|
1910
|
+
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
|
|
1911
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1912
|
+
}
|
|
1913
|
+
~AddonBackendUnloadWorker() {
|
|
1914
|
+
}
|
|
1915
|
+
|
|
1916
|
+
Napi::Promise GetPromise() {
|
|
1917
|
+
return deferred.Promise();
|
|
1918
|
+
}
|
|
1919
|
+
|
|
1920
|
+
protected:
|
|
1921
|
+
Napi::Promise::Deferred deferred;
|
|
1922
|
+
|
|
1923
|
+
void Execute() {
|
|
1924
|
+
try {
|
|
1925
|
+
if (backendInitialized) {
|
|
1926
|
+
backendInitialized = false;
|
|
1927
|
+
llama_backend_free();
|
|
1928
|
+
}
|
|
1929
|
+
} catch (const std::exception& e) {
|
|
1930
|
+
SetError(e.what());
|
|
1931
|
+
} catch(...) {
|
|
1932
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
void OnOK() {
|
|
1936
|
+
deferred.Resolve(Env().Undefined());
|
|
1937
|
+
}
|
|
1938
|
+
void OnError(const Napi::Error& err) {
|
|
1939
|
+
deferred.Reject(err.Value());
|
|
1940
|
+
}
|
|
1941
|
+
};
|
|
1942
|
+
|
|
1943
|
+
Napi::Value addonInit(const Napi::CallbackInfo& info) {
|
|
1944
|
+
if (backendInitialized) {
|
|
1945
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1946
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1947
|
+
return deferred.Promise();
|
|
1948
|
+
}
|
|
1949
|
+
|
|
1950
|
+
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
|
|
1951
|
+
worker->Queue();
|
|
1952
|
+
return worker->GetPromise();
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
|
|
1956
|
+
if (backendDisposed) {
|
|
1957
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1958
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1959
|
+
return deferred.Promise();
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
backendDisposed = true;
|
|
1963
|
+
|
|
1964
|
+
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
|
|
1965
|
+
worker->Queue();
|
|
1966
|
+
return worker->GetPromise();
|
|
1967
|
+
}
|
|
1968
|
+
|
|
1969
|
+
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
|
1970
|
+
if (backendDisposed) {
|
|
1971
|
+
return;
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1974
|
+
backendDisposed = true;
|
|
1975
|
+
if (backendInitialized) {
|
|
1976
|
+
backendInitialized = false;
|
|
1977
|
+
llama_backend_free();
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
|
|
779
1981
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
780
|
-
llama_backend_init(false);
|
|
781
1982
|
exports.DefineProperties({
|
|
782
1983
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
1984
|
+
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
|
|
1985
|
+
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
|
|
1986
|
+
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
|
|
1987
|
+
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
|
|
1988
|
+
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
|
|
1989
|
+
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
|
|
1990
|
+
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
1991
|
+
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
|
1992
|
+
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
1993
|
+
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
|
|
1994
|
+
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
|
1995
|
+
Napi::PropertyDescriptor::Function("init", addonInit),
|
|
1996
|
+
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
|
783
1997
|
});
|
|
784
1998
|
AddonModel::init(exports);
|
|
785
1999
|
AddonGrammar::init(exports);
|
|
786
2000
|
AddonGrammarEvaluationState::init(exports);
|
|
787
2001
|
AddonContext::init(exports);
|
|
2002
|
+
|
|
2003
|
+
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
|
2004
|
+
|
|
2005
|
+
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
|
|
2006
|
+
|
|
788
2007
|
return exports;
|
|
789
2008
|
}
|
|
790
2009
|
|