node-llama-cpp 3.0.0-beta.3 → 3.0.0-beta.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -11
- package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/bins/linux-x64-cuda/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
- package/bins/mac-arm64-metal/default.metallib +0 -0
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/bins/win-arm64/llama-addon.exp +0 -0
- package/bins/win-arm64/llama-addon.lib +0 -0
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +1 -0
- package/bins/win-x64/llama-addon.exp +0 -0
- package/bins/win-x64/llama-addon.lib +0 -0
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/bins/win-x64-cuda/llama-addon.exp +0 -0
- package/bins/win-x64-cuda/llama-addon.lib +0 -0
- package/bins/win-x64-cuda/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/bins/win-x64-vulkan/llama-addon.exp +0 -0
- package/bins/win-x64-vulkan/llama-addon.lib +0 -0
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/dist/ChatWrapper.d.ts +8 -39
- package/dist/ChatWrapper.js +115 -72
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsIndex.d.ts +1 -0
- package/dist/apiDocsIndex.js +7 -0
- package/dist/apiDocsIndex.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -8
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +47 -0
- package/dist/bindings/Llama.js +353 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +11 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +148 -0
- package/dist/bindings/getLlama.js +401 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +56 -0
- package/dist/bindings/types.js +77 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +17 -0
- package/dist/bindings/utils/compileLLamaCpp.js +226 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +47 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +100 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
- package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/FalconChatWrapper.js +38 -21
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
- package/dist/chatWrappers/FunctionaryChatWrapper.js +323 -118
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +174 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +67 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +243 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +21 -7
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +11 -4
- package/dist/cli/commands/BuildCommand.js +114 -41
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +18 -6
- package/dist/cli/commands/ChatCommand.js +299 -143
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +11 -12
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +29 -0
- package/dist/cli/commands/CompleteCommand.js +365 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +7 -4
- package/dist/cli/commands/DownloadCommand.js +121 -70
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +31 -0
- package/dist/cli/commands/InfillCommand.js +401 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +9 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +117 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +19 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +164 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +363 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
- package/dist/cli/utils/interactivelyAskForModel.js +451 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +75 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +1 -0
- package/dist/commands.js +3 -0
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +38 -5
- package/dist/config.js +61 -16
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +3 -0
- package/dist/consts.js +10 -0
- package/dist/consts.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +112 -39
- package/dist/evaluator/LlamaChat/LlamaChat.js +1512 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +288 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +419 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +143 -0
- package/dist/evaluator/LlamaCompletion.js +418 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -21
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +270 -81
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +140 -0
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +10 -7
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -11
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +236 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +679 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +22 -0
- package/dist/evaluator/TokenBias.js +33 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +46 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +48 -0
- package/dist/gguf/insights/GgufInsights.js +381 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +141 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +63 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +33 -0
- package/dist/gguf/readGgufFileInfo.js +66 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +335 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +41 -18
- package/dist/index.js +36 -15
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +4 -0
- package/dist/state.js +14 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +116 -5
- package/dist/types.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +70 -26
- package/dist/utils/LlamaText.js +469 -157
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +12 -0
- package/dist/utils/LruCache.js +44 -0
- package/dist/utils/LruCache.js.map +1 -0
- package/dist/utils/ReplHistory.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +25 -9
- package/dist/utils/StopGenerationDetector.js +93 -22
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +9 -4
- package/dist/utils/TokenStreamRegulator.js +81 -8
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/clearTempFolder.js.map +1 -1
- package/dist/utils/cmake.js +38 -20
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +108 -0
- package/dist/utils/createModelDownloader.js +231 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/types.d.ts +3 -0
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +10 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +73 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -0
- package/dist/utils/prettyPrintObject.js +84 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -1
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/safeEventCallback.d.ts +6 -0
- package/dist/utils/safeEventCallback.js +29 -0
- package/dist/utils/safeEventCallback.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +56 -6
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +12 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
- package/dist/utils/truncateTextAndRoundToWords.js +30 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +22 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +23 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +12 -9
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/dist/utils/wrapAbortSignal.d.ts +2 -0
- package/dist/utils/wrapAbortSignal.js +9 -0
- package/dist/utils/wrapAbortSignal.js.map +1 -0
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +115 -4
- package/llama/addon.cpp +1300 -97
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +58 -5
- package/llama/grammars/json.gbnf +4 -4
- package/llama/grammars/json_arr.gbnf +4 -4
- package/llama/llama.cpp.info.json +4 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/package.json +78 -53
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
- package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
- package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
- package/dist/llamaEvaluator/LlamaModel.js +0 -322
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -132
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/usedBin.json +0 -3
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp
CHANGED
|
@@ -3,42 +3,284 @@
|
|
|
3
3
|
#include <algorithm>
|
|
4
4
|
#include <sstream>
|
|
5
5
|
#include <vector>
|
|
6
|
+
#include <unordered_map>
|
|
6
7
|
|
|
7
8
|
#include "common.h"
|
|
8
9
|
#include "common/grammar-parser.h"
|
|
9
10
|
#include "llama.h"
|
|
10
11
|
#include "napi.h"
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
14
|
+
# include "gpuInfo/cuda-gpu-info.h"
|
|
15
|
+
#endif
|
|
16
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
17
|
+
# include "gpuInfo/vulkan-gpu-info.h"
|
|
18
|
+
#endif
|
|
19
|
+
#ifdef GPU_INFO_USE_METAL
|
|
20
|
+
# include "gpuInfo/metal-gpu-info.h"
|
|
21
|
+
#endif
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
struct addon_logger_log {
|
|
25
|
+
public:
|
|
26
|
+
const int logLevelNumber;
|
|
27
|
+
const std::stringstream* stringStream;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
|
|
31
|
+
|
|
32
|
+
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
33
|
+
void addonCallJsLogCallback(
|
|
34
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
35
|
+
);
|
|
36
|
+
using AddonThreadSafeLogCallbackFunction =
|
|
37
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
struct addon_progress_event {
|
|
41
|
+
public:
|
|
42
|
+
const float progress;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
46
|
+
void addonCallJsProgressCallback(
|
|
47
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
48
|
+
);
|
|
49
|
+
using AddonThreadSafeProgressEventCallbackFunction =
|
|
50
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
54
|
+
bool addonJsLoggerCallbackSet = false;
|
|
55
|
+
int addonLoggerLogLevel = 5;
|
|
56
|
+
bool backendInitialized = false;
|
|
57
|
+
bool backendDisposed = false;
|
|
58
|
+
|
|
59
|
+
void addonCallJsProgressCallback(
|
|
60
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
61
|
+
) {
|
|
62
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
63
|
+
try {
|
|
64
|
+
callback.Call({Napi::Number::New(env, data->progress)});
|
|
65
|
+
} catch (const Napi::Error& e) {}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (data != nullptr) {
|
|
69
|
+
delete data;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
|
|
74
|
+
uint64_t totalSize = 0;
|
|
75
|
+
|
|
76
|
+
if (embd) {
|
|
77
|
+
totalSize += sizeof(float) * n_tokens_alloc * embd;
|
|
78
|
+
} else {
|
|
79
|
+
totalSize += sizeof(llama_token) * n_tokens_alloc;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
totalSize += sizeof(llama_pos) * n_tokens_alloc;
|
|
83
|
+
totalSize += sizeof(int32_t) * n_tokens_alloc;
|
|
84
|
+
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
|
|
85
|
+
|
|
86
|
+
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
|
|
87
|
+
|
|
88
|
+
totalSize += sizeof(int8_t) * n_tokens_alloc;
|
|
89
|
+
|
|
90
|
+
return totalSize;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
|
|
94
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
95
|
+
while (size > 0) {
|
|
96
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
97
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
|
|
98
|
+
size -= adjustSize;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
|
|
103
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
104
|
+
while (size > 0) {
|
|
105
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
106
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
|
|
107
|
+
size -= adjustSize;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token, bool specialTokens) {
|
|
13
112
|
std::vector<char> result(8, 0);
|
|
14
|
-
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
|
|
113
|
+
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
|
|
15
114
|
if (n_tokens < 0) {
|
|
16
115
|
result.resize(-n_tokens);
|
|
17
|
-
int check = llama_token_to_piece(model, token, result.data(), result.size());
|
|
116
|
+
int check = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
|
|
18
117
|
GGML_ASSERT(check == -n_tokens);
|
|
19
|
-
}
|
|
20
|
-
else {
|
|
118
|
+
} else {
|
|
21
119
|
result.resize(n_tokens);
|
|
22
120
|
}
|
|
23
121
|
|
|
24
122
|
return std::string(result.data(), result.size());
|
|
25
123
|
}
|
|
26
124
|
|
|
125
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
126
|
+
void logCudaError(const char* message) {
|
|
127
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
128
|
+
}
|
|
129
|
+
#endif
|
|
130
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
131
|
+
void logVulkanWarning(const char* message) {
|
|
132
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
|
|
133
|
+
}
|
|
134
|
+
#endif
|
|
135
|
+
|
|
136
|
+
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
137
|
+
uint64_t total = 0;
|
|
138
|
+
uint64_t used = 0;
|
|
139
|
+
|
|
140
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
141
|
+
size_t cudaDeviceTotal = 0;
|
|
142
|
+
size_t cudaDeviceUsed = 0;
|
|
143
|
+
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
|
|
144
|
+
|
|
145
|
+
if (cudeGetInfoSuccess) {
|
|
146
|
+
total += cudaDeviceTotal;
|
|
147
|
+
used += cudaDeviceUsed;
|
|
148
|
+
}
|
|
149
|
+
#endif
|
|
150
|
+
|
|
151
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
152
|
+
uint64_t vulkanDeviceTotal = 0;
|
|
153
|
+
uint64_t vulkanDeviceUsed = 0;
|
|
154
|
+
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
|
|
155
|
+
|
|
156
|
+
if (vulkanDeviceSupportsMemoryBudgetExtension) {
|
|
157
|
+
total += vulkanDeviceTotal;
|
|
158
|
+
used += vulkanDeviceUsed;
|
|
159
|
+
}
|
|
160
|
+
#endif
|
|
161
|
+
|
|
162
|
+
#ifdef GPU_INFO_USE_METAL
|
|
163
|
+
uint64_t metalDeviceTotal = 0;
|
|
164
|
+
uint64_t metalDeviceUsed = 0;
|
|
165
|
+
getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
|
|
166
|
+
|
|
167
|
+
total += metalDeviceTotal;
|
|
168
|
+
used += metalDeviceUsed;
|
|
169
|
+
#endif
|
|
170
|
+
|
|
171
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
172
|
+
result.Set("total", Napi::Number::From(info.Env(), total));
|
|
173
|
+
result.Set("used", Napi::Number::From(info.Env(), used));
|
|
174
|
+
|
|
175
|
+
return result;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
|
|
179
|
+
std::vector<std::string> deviceNames;
|
|
180
|
+
|
|
181
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
182
|
+
gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
|
|
183
|
+
#endif
|
|
184
|
+
|
|
185
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
186
|
+
gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
|
|
187
|
+
#endif
|
|
188
|
+
|
|
189
|
+
#ifdef GPU_INFO_USE_METAL
|
|
190
|
+
getMetalGpuDeviceNames(&deviceNames);
|
|
191
|
+
#endif
|
|
192
|
+
|
|
193
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
194
|
+
|
|
195
|
+
Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
|
|
196
|
+
for (size_t i = 0; i < deviceNames.size(); ++i) {
|
|
197
|
+
deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
|
|
198
|
+
}
|
|
199
|
+
result.Set("deviceNames", deviceNamesNapiArray);
|
|
200
|
+
|
|
201
|
+
return result;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
|
205
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
206
|
+
return Napi::String::New(info.Env(), "cuda");
|
|
207
|
+
#endif
|
|
208
|
+
|
|
209
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
210
|
+
return Napi::String::New(info.Env(), "vulkan");
|
|
211
|
+
#endif
|
|
212
|
+
|
|
213
|
+
#ifdef GPU_INFO_USE_METAL
|
|
214
|
+
return Napi::String::New(info.Env(), "metal");
|
|
215
|
+
#endif
|
|
216
|
+
|
|
217
|
+
return info.Env().Undefined();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
221
|
+
if (token < 0) {
|
|
222
|
+
return Napi::Number::From(info.Env(), -1);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
auto tokenAttributes = llama_token_get_attr(model, token);
|
|
226
|
+
|
|
227
|
+
if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
|
|
228
|
+
return Napi::Number::From(info.Env(), -1);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return Napi::Number::From(info.Env(), token);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
235
|
+
if (token < 0) {
|
|
236
|
+
return Napi::Number::From(info.Env(), -1);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
auto tokenAttributes = llama_token_get_attr(model, token);
|
|
240
|
+
|
|
241
|
+
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
|
|
242
|
+
return Napi::Number::From(info.Env(), -1);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return Napi::Number::From(info.Env(), token);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data);
|
|
249
|
+
|
|
27
250
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
28
251
|
public:
|
|
29
252
|
llama_model_params model_params;
|
|
30
253
|
llama_model* model;
|
|
254
|
+
uint64_t loadedModelSize = 0;
|
|
255
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
256
|
+
bool hasAddonExportsRef = false;
|
|
257
|
+
|
|
258
|
+
std::string modelPath;
|
|
259
|
+
bool modelLoaded = false;
|
|
260
|
+
bool abortModelLoad = false;
|
|
261
|
+
bool model_load_stopped = false;
|
|
262
|
+
float rawModelLoadPercentage = 0;
|
|
263
|
+
unsigned modelLoadPercentage = 0;
|
|
264
|
+
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
|
|
265
|
+
bool onLoadProgressEventCallbackSet = false;
|
|
266
|
+
bool hasLoadAbortSignal = false;
|
|
267
|
+
|
|
31
268
|
bool disposed = false;
|
|
32
269
|
|
|
33
270
|
AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
|
|
34
271
|
model_params = llama_model_default_params();
|
|
35
272
|
|
|
36
273
|
// Get the model path
|
|
37
|
-
|
|
274
|
+
modelPath = info[0].As<Napi::String>().Utf8Value();
|
|
38
275
|
|
|
39
276
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
40
277
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
41
278
|
|
|
279
|
+
if (options.Has("addonExports")) {
|
|
280
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
281
|
+
hasAddonExportsRef = true;
|
|
282
|
+
}
|
|
283
|
+
|
|
42
284
|
if (options.Has("gpuLayers")) {
|
|
43
285
|
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
|
44
286
|
}
|
|
@@ -54,14 +296,41 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
54
296
|
if (options.Has("useMlock")) {
|
|
55
297
|
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
|
|
56
298
|
}
|
|
57
|
-
}
|
|
58
299
|
|
|
59
|
-
|
|
60
|
-
|
|
300
|
+
if (options.Has("checkTensors")) {
|
|
301
|
+
model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
|
|
302
|
+
}
|
|
61
303
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
304
|
+
if (options.Has("onLoadProgress")) {
|
|
305
|
+
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
|
|
306
|
+
if (onLoadProgressJSCallback.IsFunction()) {
|
|
307
|
+
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
308
|
+
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
|
|
309
|
+
info.Env(),
|
|
310
|
+
onLoadProgressJSCallback,
|
|
311
|
+
"onLoadProgressCallback",
|
|
312
|
+
0,
|
|
313
|
+
1,
|
|
314
|
+
context,
|
|
315
|
+
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
|
|
316
|
+
addonModel->onLoadProgressEventCallbackSet = false;
|
|
317
|
+
|
|
318
|
+
delete ctx;
|
|
319
|
+
},
|
|
320
|
+
this
|
|
321
|
+
);
|
|
322
|
+
onLoadProgressEventCallbackSet = true;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (options.Has("hasLoadAbortSignal")) {
|
|
327
|
+
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
|
|
331
|
+
model_params.progress_callback_user_data = &(*this);
|
|
332
|
+
model_params.progress_callback = llamaModelParamsProgressCallback;
|
|
333
|
+
}
|
|
65
334
|
}
|
|
66
335
|
}
|
|
67
336
|
|
|
@@ -74,23 +343,32 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
74
343
|
return;
|
|
75
344
|
}
|
|
76
345
|
|
|
77
|
-
llama_free_model(model);
|
|
78
346
|
disposed = true;
|
|
79
|
-
|
|
347
|
+
if (modelLoaded) {
|
|
348
|
+
modelLoaded = false;
|
|
349
|
+
llama_free_model(model);
|
|
80
350
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
return info.Env().Undefined();
|
|
351
|
+
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
|
352
|
+
loadedModelSize = 0;
|
|
84
353
|
}
|
|
85
354
|
|
|
86
|
-
|
|
355
|
+
if (hasAddonExportsRef) {
|
|
356
|
+
addonExportsRef.Unref();
|
|
357
|
+
hasAddonExportsRef = false;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
87
360
|
|
|
361
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
362
|
+
Napi::Value LoadLora(const Napi::CallbackInfo& info);
|
|
363
|
+
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
|
|
364
|
+
abortModelLoad = true;
|
|
88
365
|
return info.Env().Undefined();
|
|
89
366
|
}
|
|
367
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
90
368
|
|
|
91
369
|
Napi::Value Tokenize(const Napi::CallbackInfo& info) {
|
|
92
370
|
if (disposed) {
|
|
93
|
-
Napi::Error::New(info.Env(), "
|
|
371
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
94
372
|
return info.Env().Undefined();
|
|
95
373
|
}
|
|
96
374
|
|
|
@@ -108,18 +386,21 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
108
386
|
}
|
|
109
387
|
Napi::Value Detokenize(const Napi::CallbackInfo& info) {
|
|
110
388
|
if (disposed) {
|
|
111
|
-
Napi::Error::New(info.Env(), "
|
|
389
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
112
390
|
return info.Env().Undefined();
|
|
113
391
|
}
|
|
114
392
|
|
|
115
393
|
Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
|
|
394
|
+
bool decodeSpecialTokens = info.Length() > 0
|
|
395
|
+
? info[1].As<Napi::Boolean>().Value()
|
|
396
|
+
: false;
|
|
116
397
|
|
|
117
398
|
// Create a stringstream for accumulating the decoded string.
|
|
118
399
|
std::stringstream ss;
|
|
119
400
|
|
|
120
401
|
// Decode each token and accumulate the result.
|
|
121
402
|
for (size_t i = 0; i < tokens.ElementLength(); i++) {
|
|
122
|
-
const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i]);
|
|
403
|
+
const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i], decodeSpecialTokens);
|
|
123
404
|
|
|
124
405
|
if (piece.empty()) {
|
|
125
406
|
continue;
|
|
@@ -133,16 +414,25 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
133
414
|
|
|
134
415
|
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
|
|
135
416
|
if (disposed) {
|
|
136
|
-
Napi::Error::New(info.Env(), "
|
|
417
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
137
418
|
return info.Env().Undefined();
|
|
138
419
|
}
|
|
139
420
|
|
|
140
421
|
return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
|
|
141
422
|
}
|
|
142
423
|
|
|
424
|
+
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
425
|
+
if (disposed) {
|
|
426
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
427
|
+
return info.Env().Undefined();
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return Napi::Number::From(info.Env(), llama_n_embd(model));
|
|
431
|
+
}
|
|
432
|
+
|
|
143
433
|
Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
|
|
144
434
|
if (disposed) {
|
|
145
|
-
Napi::Error::New(info.Env(), "
|
|
435
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
146
436
|
return info.Env().Undefined();
|
|
147
437
|
}
|
|
148
438
|
|
|
@@ -151,7 +441,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
151
441
|
|
|
152
442
|
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
|
|
153
443
|
if (disposed) {
|
|
154
|
-
Napi::Error::New(info.Env(), "
|
|
444
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
155
445
|
return info.Env().Undefined();
|
|
156
446
|
}
|
|
157
447
|
|
|
@@ -160,7 +450,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
160
450
|
|
|
161
451
|
Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
|
|
162
452
|
if (disposed) {
|
|
163
|
-
Napi::Error::New(info.Env(), "
|
|
453
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
164
454
|
return info.Env().Undefined();
|
|
165
455
|
}
|
|
166
456
|
|
|
@@ -172,63 +462,63 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
172
462
|
|
|
173
463
|
Napi::Value TokenBos(const Napi::CallbackInfo& info) {
|
|
174
464
|
if (disposed) {
|
|
175
|
-
Napi::Error::New(info.Env(), "
|
|
465
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
176
466
|
return info.Env().Undefined();
|
|
177
467
|
}
|
|
178
468
|
|
|
179
|
-
return
|
|
469
|
+
return getNapiControlToken(info, model, llama_token_bos(model));
|
|
180
470
|
}
|
|
181
471
|
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
182
472
|
if (disposed) {
|
|
183
|
-
Napi::Error::New(info.Env(), "
|
|
473
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
184
474
|
return info.Env().Undefined();
|
|
185
475
|
}
|
|
186
476
|
|
|
187
|
-
return
|
|
477
|
+
return getNapiControlToken(info, model, llama_token_eos(model));
|
|
188
478
|
}
|
|
189
479
|
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
190
480
|
if (disposed) {
|
|
191
|
-
Napi::Error::New(info.Env(), "
|
|
481
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
192
482
|
return info.Env().Undefined();
|
|
193
483
|
}
|
|
194
484
|
|
|
195
|
-
return
|
|
485
|
+
return getNapiToken(info, model, llama_token_nl(model));
|
|
196
486
|
}
|
|
197
487
|
Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
|
|
198
488
|
if (disposed) {
|
|
199
|
-
Napi::Error::New(info.Env(), "
|
|
489
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
200
490
|
return info.Env().Undefined();
|
|
201
491
|
}
|
|
202
492
|
|
|
203
|
-
return
|
|
493
|
+
return getNapiControlToken(info, model, llama_token_prefix(model));
|
|
204
494
|
}
|
|
205
495
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
|
|
206
496
|
if (disposed) {
|
|
207
|
-
Napi::Error::New(info.Env(), "
|
|
497
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
208
498
|
return info.Env().Undefined();
|
|
209
499
|
}
|
|
210
500
|
|
|
211
|
-
return
|
|
501
|
+
return getNapiControlToken(info, model, llama_token_middle(model));
|
|
212
502
|
}
|
|
213
503
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
|
|
214
504
|
if (disposed) {
|
|
215
|
-
Napi::Error::New(info.Env(), "
|
|
505
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
216
506
|
return info.Env().Undefined();
|
|
217
507
|
}
|
|
218
508
|
|
|
219
|
-
return
|
|
509
|
+
return getNapiControlToken(info, model, llama_token_suffix(model));
|
|
220
510
|
}
|
|
221
511
|
Napi::Value EotToken(const Napi::CallbackInfo& info) {
|
|
222
512
|
if (disposed) {
|
|
223
|
-
Napi::Error::New(info.Env(), "
|
|
513
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
224
514
|
return info.Env().Undefined();
|
|
225
515
|
}
|
|
226
516
|
|
|
227
|
-
return
|
|
517
|
+
return getNapiControlToken(info, model, llama_token_eot(model));
|
|
228
518
|
}
|
|
229
519
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
230
520
|
if (disposed) {
|
|
231
|
-
Napi::Error::New(info.Env(), "
|
|
521
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
232
522
|
return info.Env().Undefined();
|
|
233
523
|
}
|
|
234
524
|
|
|
@@ -245,6 +535,57 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
245
535
|
return Napi::String::New(info.Env(), ss.str());
|
|
246
536
|
}
|
|
247
537
|
|
|
538
|
+
Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info) {
|
|
539
|
+
if (disposed) {
|
|
540
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
541
|
+
return info.Env().Undefined();
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
if (info[0].IsNumber() == false) {
|
|
545
|
+
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
int token = info[0].As<Napi::Number>().Int32Value();
|
|
549
|
+
auto tokenAttributes = llama_token_get_attr(model, token);
|
|
550
|
+
|
|
551
|
+
return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
|
|
552
|
+
}
|
|
553
|
+
Napi::Value IsEogToken(const Napi::CallbackInfo& info) {
|
|
554
|
+
if (disposed) {
|
|
555
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
556
|
+
return info.Env().Undefined();
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
if (info[0].IsNumber() == false) {
|
|
560
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
int token = info[0].As<Napi::Number>().Int32Value();
|
|
564
|
+
|
|
565
|
+
return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
|
|
566
|
+
}
|
|
567
|
+
Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
|
|
568
|
+
if (disposed) {
|
|
569
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
570
|
+
return info.Env().Undefined();
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
auto vocabularyType = llama_vocab_type(model);
|
|
574
|
+
|
|
575
|
+
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
|
|
576
|
+
}
|
|
577
|
+
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
|
578
|
+
const int addBos = llama_add_bos_token(model);
|
|
579
|
+
|
|
580
|
+
bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
|
581
|
+
|
|
582
|
+
return Napi::Boolean::New(info.Env(), shouldPrependBos);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
|
|
586
|
+
return Napi::Number::From(info.Env(), llama_model_size(model));
|
|
587
|
+
}
|
|
588
|
+
|
|
248
589
|
static void init(Napi::Object exports) {
|
|
249
590
|
exports.Set(
|
|
250
591
|
"AddonModel",
|
|
@@ -252,9 +593,13 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
252
593
|
exports.Env(),
|
|
253
594
|
"AddonModel",
|
|
254
595
|
{
|
|
596
|
+
InstanceMethod("init", &AddonModel::Init),
|
|
597
|
+
InstanceMethod("loadLora", &AddonModel::LoadLora),
|
|
598
|
+
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
|
|
255
599
|
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
|
256
600
|
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
|
257
601
|
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
|
602
|
+
InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
|
|
258
603
|
InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
|
|
259
604
|
InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
|
|
260
605
|
InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
|
|
@@ -266,16 +611,260 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
266
611
|
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
|
267
612
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
268
613
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
269
|
-
InstanceMethod("
|
|
614
|
+
InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
|
|
615
|
+
InstanceMethod("isEogToken", &AddonModel::IsEogToken),
|
|
616
|
+
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
|
|
617
|
+
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
|
|
618
|
+
InstanceMethod("getModelSize", &AddonModel::GetModelSize),
|
|
619
|
+
InstanceMethod("dispose", &AddonModel::Dispose),
|
|
270
620
|
}
|
|
271
621
|
)
|
|
272
622
|
);
|
|
273
623
|
}
|
|
274
624
|
};
|
|
275
625
|
|
|
626
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
|
|
627
|
+
AddonModel* addonModel = (AddonModel *) user_data;
|
|
628
|
+
unsigned percentage = (unsigned) (100 * progress);
|
|
629
|
+
|
|
630
|
+
if (percentage > addonModel->modelLoadPercentage) {
|
|
631
|
+
addonModel->modelLoadPercentage = percentage;
|
|
632
|
+
|
|
633
|
+
// original llama.cpp logs
|
|
634
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
|
|
635
|
+
if (percentage >= 100) {
|
|
636
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
if (progress > addonModel->rawModelLoadPercentage) {
|
|
641
|
+
addonModel->rawModelLoadPercentage = progress;
|
|
642
|
+
|
|
643
|
+
if (addonModel->onLoadProgressEventCallbackSet) {
|
|
644
|
+
addon_progress_event* data = new addon_progress_event {
|
|
645
|
+
progress
|
|
646
|
+
};
|
|
647
|
+
|
|
648
|
+
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
|
|
649
|
+
|
|
650
|
+
if (status != napi_ok) {
|
|
651
|
+
delete data;
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
return !(addonModel->abortModelLoad);
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
|
660
|
+
public:
|
|
661
|
+
AddonModel* model;
|
|
662
|
+
|
|
663
|
+
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
664
|
+
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
|
|
665
|
+
model(model),
|
|
666
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
667
|
+
model->Ref();
|
|
668
|
+
}
|
|
669
|
+
~AddonModelLoadModelWorker() {
|
|
670
|
+
model->Unref();
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
Napi::Promise GetPromise() {
|
|
674
|
+
return deferred.Promise();
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
protected:
|
|
678
|
+
Napi::Promise::Deferred deferred;
|
|
679
|
+
|
|
680
|
+
void Execute() {
|
|
681
|
+
try {
|
|
682
|
+
model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
|
|
683
|
+
|
|
684
|
+
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
|
685
|
+
} catch (const std::exception& e) {
|
|
686
|
+
SetError(e.what());
|
|
687
|
+
} catch(...) {
|
|
688
|
+
SetError("Unknown error when calling \"llama_load_model_from_file\"");
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
void OnOK() {
|
|
692
|
+
if (model->modelLoaded) {
|
|
693
|
+
uint64_t modelSize = llama_model_size(model->model);
|
|
694
|
+
adjustNapiExternalMemoryAdd(Env(), modelSize);
|
|
695
|
+
model->loadedModelSize = modelSize;
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
|
|
699
|
+
if (model->onLoadProgressEventCallbackSet) {
|
|
700
|
+
model->addonThreadSafeOnLoadProgressEventCallback.Release();
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
void OnError(const Napi::Error& err) {
|
|
704
|
+
deferred.Reject(err.Value());
|
|
705
|
+
}
|
|
706
|
+
};
|
|
707
|
+
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
|
708
|
+
public:
|
|
709
|
+
AddonModel* model;
|
|
710
|
+
|
|
711
|
+
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
712
|
+
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
|
|
713
|
+
model(model),
|
|
714
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
715
|
+
model->Ref();
|
|
716
|
+
}
|
|
717
|
+
~AddonModelUnloadModelWorker() {
|
|
718
|
+
model->Unref();
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
Napi::Promise GetPromise() {
|
|
722
|
+
return deferred.Promise();
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
protected:
|
|
726
|
+
Napi::Promise::Deferred deferred;
|
|
727
|
+
|
|
728
|
+
void Execute() {
|
|
729
|
+
try {
|
|
730
|
+
llama_free_model(model->model);
|
|
731
|
+
model->modelLoaded = false;
|
|
732
|
+
|
|
733
|
+
model->dispose();
|
|
734
|
+
} catch (const std::exception& e) {
|
|
735
|
+
SetError(e.what());
|
|
736
|
+
} catch(...) {
|
|
737
|
+
SetError("Unknown error when calling \"llama_free_model\"");
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
void OnOK() {
|
|
741
|
+
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
|
|
742
|
+
model->loadedModelSize = 0;
|
|
743
|
+
|
|
744
|
+
deferred.Resolve(Env().Undefined());
|
|
745
|
+
}
|
|
746
|
+
void OnError(const Napi::Error& err) {
|
|
747
|
+
deferred.Reject(err.Value());
|
|
748
|
+
}
|
|
749
|
+
};
|
|
750
|
+
class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
|
|
751
|
+
public:
|
|
752
|
+
AddonModel* model;
|
|
753
|
+
std::string loraFilePath;
|
|
754
|
+
float loraScale;
|
|
755
|
+
int32_t loraThreads;
|
|
756
|
+
std::string baseModelPath;
|
|
757
|
+
|
|
758
|
+
AddonModelLoadLoraWorker(
|
|
759
|
+
const Napi::Env& env,
|
|
760
|
+
AddonModel* model,
|
|
761
|
+
std::string loraFilePath,
|
|
762
|
+
float loraScale,
|
|
763
|
+
int32_t loraThreads,
|
|
764
|
+
std::string baseModelPath
|
|
765
|
+
)
|
|
766
|
+
: Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
|
|
767
|
+
model(model),
|
|
768
|
+
loraFilePath(loraFilePath),
|
|
769
|
+
loraScale(loraScale),
|
|
770
|
+
loraThreads(loraThreads),
|
|
771
|
+
baseModelPath(baseModelPath),
|
|
772
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
773
|
+
model->Ref();
|
|
774
|
+
}
|
|
775
|
+
~AddonModelLoadLoraWorker() {
|
|
776
|
+
model->Unref();
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
Napi::Promise GetPromise() {
|
|
780
|
+
return deferred.Promise();
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
protected:
|
|
784
|
+
Napi::Promise::Deferred deferred;
|
|
785
|
+
|
|
786
|
+
void Execute() {
|
|
787
|
+
try {
|
|
788
|
+
const auto res = llama_model_apply_lora_from_file(
|
|
789
|
+
model->model,
|
|
790
|
+
loraFilePath.c_str(),
|
|
791
|
+
loraScale,
|
|
792
|
+
baseModelPath.empty() ? NULL : baseModelPath.c_str(),
|
|
793
|
+
loraThreads
|
|
794
|
+
);
|
|
795
|
+
|
|
796
|
+
if (res != 0) {
|
|
797
|
+
SetError(
|
|
798
|
+
std::string(
|
|
799
|
+
std::string("Failed to apply LoRA \"") + loraFilePath + std::string("\"") + (
|
|
800
|
+
baseModelPath.empty()
|
|
801
|
+
? std::string("")
|
|
802
|
+
: (std::string(" with base model \"") + baseModelPath + std::string("\""))
|
|
803
|
+
)
|
|
804
|
+
)
|
|
805
|
+
);
|
|
806
|
+
}
|
|
807
|
+
} catch (const std::exception& e) {
|
|
808
|
+
SetError(e.what());
|
|
809
|
+
} catch(...) {
|
|
810
|
+
SetError("Unknown error when calling \"llama_model_apply_lora_from_file\"");
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
void OnOK() {
|
|
814
|
+
deferred.Resolve(Env().Undefined());
|
|
815
|
+
}
|
|
816
|
+
void OnError(const Napi::Error& err) {
|
|
817
|
+
deferred.Reject(err.Value());
|
|
818
|
+
}
|
|
819
|
+
};
|
|
820
|
+
|
|
821
|
+
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
|
|
822
|
+
if (disposed) {
|
|
823
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
824
|
+
return info.Env().Undefined();
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
|
|
828
|
+
worker->Queue();
|
|
829
|
+
return worker->GetPromise();
|
|
830
|
+
}
|
|
831
|
+
Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
|
|
832
|
+
std::string loraFilePath = info[0].As<Napi::String>().Utf8Value();
|
|
833
|
+
float scale = info[1].As<Napi::Number>().FloatValue();
|
|
834
|
+
int32_t threads = info[2].As<Napi::Number>().Int32Value();
|
|
835
|
+
std::string baseModelPath = (info.Length() > 3 && info[3].IsString()) ? info[3].As<Napi::String>().Utf8Value() : std::string("");
|
|
836
|
+
|
|
837
|
+
int32_t resolvedThreads = threads == 0 ? std::thread::hardware_concurrency() : threads;
|
|
838
|
+
|
|
839
|
+
AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), this, loraFilePath, scale, threads, baseModelPath);
|
|
840
|
+
worker->Queue();
|
|
841
|
+
return worker->GetPromise();
|
|
842
|
+
}
|
|
843
|
+
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
|
|
844
|
+
if (disposed) {
|
|
845
|
+
return info.Env().Undefined();
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if (modelLoaded) {
|
|
849
|
+
modelLoaded = false;
|
|
850
|
+
|
|
851
|
+
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
|
|
852
|
+
worker->Queue();
|
|
853
|
+
return worker->GetPromise();
|
|
854
|
+
} else {
|
|
855
|
+
dispose();
|
|
856
|
+
|
|
857
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
858
|
+
deferred.Resolve(info.Env().Undefined());
|
|
859
|
+
return deferred.Promise();
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
|
|
276
863
|
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
277
864
|
public:
|
|
278
865
|
grammar_parser::parse_state parsed_grammar;
|
|
866
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
867
|
+
bool hasAddonExportsRef = false;
|
|
279
868
|
|
|
280
869
|
AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
|
|
281
870
|
// Get the model path
|
|
@@ -285,6 +874,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
285
874
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
286
875
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
287
876
|
|
|
877
|
+
if (options.Has("addonExports")) {
|
|
878
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
879
|
+
hasAddonExportsRef = true;
|
|
880
|
+
}
|
|
881
|
+
|
|
288
882
|
if (options.Has("printGrammar")) {
|
|
289
883
|
should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
|
|
290
884
|
}
|
|
@@ -302,6 +896,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
302
896
|
}
|
|
303
897
|
}
|
|
304
898
|
|
|
899
|
+
~AddonGrammar() {
|
|
900
|
+
if (hasAddonExportsRef) {
|
|
901
|
+
addonExportsRef.Unref();
|
|
902
|
+
hasAddonExportsRef = false;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
305
906
|
static void init(Napi::Object exports) {
|
|
306
907
|
exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
|
|
307
908
|
}
|
|
@@ -340,9 +941,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
340
941
|
llama_context_params context_params;
|
|
341
942
|
llama_context* ctx;
|
|
342
943
|
llama_batch batch;
|
|
944
|
+
uint64_t batchMemorySize = 0;
|
|
343
945
|
bool has_batch = false;
|
|
344
946
|
int32_t batch_n_tokens = 0;
|
|
345
947
|
int n_cur = 0;
|
|
948
|
+
|
|
949
|
+
uint64_t loadedContextMemorySize = 0;
|
|
950
|
+
bool contextLoaded = false;
|
|
951
|
+
|
|
346
952
|
bool disposed = false;
|
|
347
953
|
|
|
348
954
|
AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
|
|
@@ -370,10 +976,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
370
976
|
|
|
371
977
|
if (options.Has("batchSize")) {
|
|
372
978
|
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
|
979
|
+
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
if (options.Has("sequences")) {
|
|
983
|
+
context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
|
|
373
984
|
}
|
|
374
985
|
|
|
375
|
-
if (options.Has("
|
|
376
|
-
context_params.
|
|
986
|
+
if (options.Has("embeddings")) {
|
|
987
|
+
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
377
988
|
}
|
|
378
989
|
|
|
379
990
|
if (options.Has("threads")) {
|
|
@@ -384,9 +995,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
384
995
|
context_params.n_threads_batch = resolved_n_threads;
|
|
385
996
|
}
|
|
386
997
|
}
|
|
387
|
-
|
|
388
|
-
ctx = llama_new_context_with_model(model->model, context_params);
|
|
389
|
-
Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
|
|
390
998
|
}
|
|
391
999
|
~AddonContext() {
|
|
392
1000
|
dispose();
|
|
@@ -397,13 +1005,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
397
1005
|
return;
|
|
398
1006
|
}
|
|
399
1007
|
|
|
400
|
-
|
|
401
|
-
|
|
1008
|
+
disposed = true;
|
|
1009
|
+
if (contextLoaded) {
|
|
1010
|
+
contextLoaded = false;
|
|
1011
|
+
llama_free(ctx);
|
|
1012
|
+
|
|
1013
|
+
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
|
|
1014
|
+
loadedContextMemorySize = 0;
|
|
1015
|
+
}
|
|
1016
|
+
|
|
402
1017
|
model->Unref();
|
|
403
1018
|
|
|
404
1019
|
disposeBatch();
|
|
405
|
-
|
|
406
|
-
disposed = true;
|
|
407
1020
|
}
|
|
408
1021
|
void disposeBatch() {
|
|
409
1022
|
if (!has_batch) {
|
|
@@ -413,16 +1026,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
413
1026
|
llama_batch_free(batch);
|
|
414
1027
|
has_batch = false;
|
|
415
1028
|
batch_n_tokens = 0;
|
|
1029
|
+
|
|
1030
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
|
|
1031
|
+
batchMemorySize = 0;
|
|
416
1032
|
}
|
|
417
|
-
Napi::Value Dispose(const Napi::CallbackInfo& info) {
|
|
418
|
-
if (disposed) {
|
|
419
|
-
return info.Env().Undefined();
|
|
420
|
-
}
|
|
421
1033
|
|
|
422
|
-
|
|
1034
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
1035
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
423
1036
|
|
|
424
|
-
return info.Env().Undefined();
|
|
425
|
-
}
|
|
426
1037
|
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
|
|
427
1038
|
if (disposed) {
|
|
428
1039
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -447,6 +1058,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
447
1058
|
has_batch = true;
|
|
448
1059
|
batch_n_tokens = n_tokens;
|
|
449
1060
|
|
|
1061
|
+
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
|
|
1062
|
+
if (newBatchMemorySize > batchMemorySize) {
|
|
1063
|
+
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
|
|
1064
|
+
batchMemorySize = newBatchMemorySize;
|
|
1065
|
+
} else if (newBatchMemorySize < batchMemorySize) {
|
|
1066
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
|
|
1067
|
+
batchMemorySize = newBatchMemorySize;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
450
1070
|
return info.Env().Undefined();
|
|
451
1071
|
}
|
|
452
1072
|
Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
|
|
@@ -495,7 +1115,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
495
1115
|
|
|
496
1116
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
497
1117
|
|
|
498
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
1118
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
1119
|
+
|
|
1120
|
+
if (!result) {
|
|
1121
|
+
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
|
|
1122
|
+
return info.Env().Undefined();
|
|
1123
|
+
}
|
|
499
1124
|
|
|
500
1125
|
return info.Env().Undefined();
|
|
501
1126
|
}
|
|
@@ -509,9 +1134,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
509
1134
|
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
|
510
1135
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
511
1136
|
|
|
512
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
1137
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
513
1138
|
|
|
514
|
-
return info.Env()
|
|
1139
|
+
return Napi::Boolean::New(info.Env(), result);
|
|
515
1140
|
}
|
|
516
1141
|
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
|
|
517
1142
|
if (disposed) {
|
|
@@ -524,7 +1149,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
524
1149
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
525
1150
|
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
|
|
526
1151
|
|
|
527
|
-
|
|
1152
|
+
llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
|
|
528
1153
|
|
|
529
1154
|
return info.Env().Undefined();
|
|
530
1155
|
}
|
|
@@ -532,7 +1157,8 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
532
1157
|
Napi::Value SampleToken(const Napi::CallbackInfo& info);
|
|
533
1158
|
|
|
534
1159
|
Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
|
|
535
|
-
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
1160
|
+
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
1161
|
+
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
536
1162
|
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
537
1163
|
|
|
538
1164
|
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
@@ -542,14 +1168,53 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
542
1168
|
return info.Env().Undefined();
|
|
543
1169
|
}
|
|
544
1170
|
|
|
1171
|
+
Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
|
|
1172
|
+
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
1173
|
+
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
1174
|
+
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
1175
|
+
|
|
1176
|
+
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
1177
|
+
std::vector<llama_token_data> candidates;
|
|
1178
|
+
candidates.reserve(1);
|
|
1179
|
+
candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
|
|
1180
|
+
|
|
1181
|
+
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
1182
|
+
|
|
1183
|
+
llama_sample_grammar(ctx, &candidates_p, (grammar_evaluation_state)->grammar);
|
|
1184
|
+
|
|
1185
|
+
if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
|
|
1186
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
return Napi::Boolean::New(info.Env(), true);
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
return Napi::Boolean::New(info.Env(), false);
|
|
1193
|
+
}
|
|
1194
|
+
|
|
545
1195
|
Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
|
|
546
1196
|
if (disposed) {
|
|
547
1197
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
548
1198
|
return info.Env().Undefined();
|
|
549
1199
|
}
|
|
550
1200
|
|
|
1201
|
+
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
|
|
1202
|
+
|
|
1203
|
+
if (inputTokensLength <= 0) {
|
|
1204
|
+
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
|
|
1205
|
+
return info.Env().Undefined();
|
|
1206
|
+
}
|
|
1207
|
+
|
|
551
1208
|
const int n_embd = llama_n_embd(model->model);
|
|
552
|
-
const auto
|
|
1209
|
+
const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
|
|
1210
|
+
if (embeddings == NULL) {
|
|
1211
|
+
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
|
|
1212
|
+
|
|
1213
|
+
if (embeddings == NULL) {
|
|
1214
|
+
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
|
|
1215
|
+
return info.Env().Undefined();
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
553
1218
|
|
|
554
1219
|
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
|
|
555
1220
|
for (size_t i = 0; i < n_embd; ++i) {
|
|
@@ -559,6 +1224,21 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
559
1224
|
return result;
|
|
560
1225
|
}
|
|
561
1226
|
|
|
1227
|
+
Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
|
|
1228
|
+
if (disposed) {
|
|
1229
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1230
|
+
return info.Env().Undefined();
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
|
|
1237
|
+
llama_print_timings(ctx);
|
|
1238
|
+
llama_reset_timings(ctx);
|
|
1239
|
+
return info.Env().Undefined();
|
|
1240
|
+
}
|
|
1241
|
+
|
|
562
1242
|
static void init(Napi::Object exports) {
|
|
563
1243
|
exports.Set(
|
|
564
1244
|
"AddonContext",
|
|
@@ -566,6 +1246,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
566
1246
|
exports.Env(),
|
|
567
1247
|
"AddonContext",
|
|
568
1248
|
{
|
|
1249
|
+
InstanceMethod("init", &AddonContext::Init),
|
|
569
1250
|
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
|
|
570
1251
|
InstanceMethod("initBatch", &AddonContext::InitBatch),
|
|
571
1252
|
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
|
|
@@ -575,8 +1256,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
575
1256
|
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
|
|
576
1257
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
577
1258
|
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
|
|
1259
|
+
InstanceMethod("canBeNextTokenForGrammarEvaluationState", &AddonContext::CanBeNextTokenForGrammarEvaluationState),
|
|
578
1260
|
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
579
|
-
InstanceMethod("
|
|
1261
|
+
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
|
|
1262
|
+
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
1263
|
+
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
580
1264
|
}
|
|
581
1265
|
)
|
|
582
1266
|
);
|
|
@@ -584,53 +1268,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
584
1268
|
};
|
|
585
1269
|
|
|
586
1270
|
|
|
587
|
-
class AddonContextDecodeBatchWorker : Napi::AsyncWorker
|
|
1271
|
+
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
|
|
588
1272
|
public:
|
|
589
1273
|
AddonContext* ctx;
|
|
590
1274
|
|
|
591
|
-
AddonContextDecodeBatchWorker(const Napi::
|
|
592
|
-
: Napi::AsyncWorker(
|
|
1275
|
+
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
|
|
1276
|
+
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
|
|
593
1277
|
ctx(ctx),
|
|
594
|
-
Napi::Promise::Deferred(
|
|
1278
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
595
1279
|
ctx->Ref();
|
|
596
1280
|
}
|
|
597
1281
|
~AddonContextDecodeBatchWorker() {
|
|
598
1282
|
ctx->Unref();
|
|
599
1283
|
}
|
|
600
|
-
|
|
601
|
-
|
|
1284
|
+
|
|
1285
|
+
Napi::Promise GetPromise() {
|
|
1286
|
+
return deferred.Promise();
|
|
1287
|
+
}
|
|
602
1288
|
|
|
603
1289
|
protected:
|
|
1290
|
+
Napi::Promise::Deferred deferred;
|
|
1291
|
+
|
|
604
1292
|
void Execute() {
|
|
605
|
-
|
|
606
|
-
|
|
1293
|
+
try {
|
|
1294
|
+
// Perform the evaluation using llama_decode.
|
|
1295
|
+
int r = llama_decode(ctx->ctx, ctx->batch);
|
|
1296
|
+
|
|
1297
|
+
if (r != 0) {
|
|
1298
|
+
if (r == 1) {
|
|
1299
|
+
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
|
1300
|
+
} else {
|
|
1301
|
+
SetError("Eval has failed");
|
|
1302
|
+
}
|
|
607
1303
|
|
|
608
|
-
|
|
609
|
-
if (r == 1) {
|
|
610
|
-
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
|
611
|
-
} else {
|
|
612
|
-
SetError("Eval has failed");
|
|
1304
|
+
return;
|
|
613
1305
|
}
|
|
614
1306
|
|
|
615
|
-
|
|
1307
|
+
llama_synchronize(ctx->ctx);
|
|
1308
|
+
} catch (const std::exception& e) {
|
|
1309
|
+
SetError(e.what());
|
|
1310
|
+
} catch(...) {
|
|
1311
|
+
SetError("Unknown error when calling \"llama_decode\"");
|
|
616
1312
|
}
|
|
617
1313
|
}
|
|
618
1314
|
void OnOK() {
|
|
619
|
-
|
|
620
|
-
Napi::Promise::Deferred::Resolve(env.Undefined());
|
|
1315
|
+
deferred.Resolve(Env().Undefined());
|
|
621
1316
|
}
|
|
622
1317
|
void OnError(const Napi::Error& err) {
|
|
623
|
-
|
|
1318
|
+
deferred.Reject(err.Value());
|
|
624
1319
|
}
|
|
625
1320
|
};
|
|
626
1321
|
|
|
627
1322
|
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
|
|
628
|
-
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
|
|
1323
|
+
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
|
|
1324
|
+
worker->Queue();
|
|
1325
|
+
return worker->GetPromise();
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
|
1329
|
+
public:
|
|
1330
|
+
AddonContext* context;
|
|
1331
|
+
|
|
1332
|
+
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1333
|
+
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
|
|
1334
|
+
context(context),
|
|
1335
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1336
|
+
context->Ref();
|
|
1337
|
+
}
|
|
1338
|
+
~AddonContextLoadContextWorker() {
|
|
1339
|
+
context->Unref();
|
|
1340
|
+
}
|
|
1341
|
+
|
|
1342
|
+
Napi::Promise GetPromise() {
|
|
1343
|
+
return deferred.Promise();
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
protected:
|
|
1347
|
+
Napi::Promise::Deferred deferred;
|
|
1348
|
+
|
|
1349
|
+
void Execute() {
|
|
1350
|
+
try {
|
|
1351
|
+
context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
|
|
1352
|
+
|
|
1353
|
+
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
|
|
1354
|
+
} catch (const std::exception& e) {
|
|
1355
|
+
SetError(e.what());
|
|
1356
|
+
} catch(...) {
|
|
1357
|
+
SetError("Unknown error when calling \"llama_new_context_with_model\"");
|
|
1358
|
+
}
|
|
1359
|
+
}
|
|
1360
|
+
void OnOK() {
|
|
1361
|
+
if (context->contextLoaded) {
|
|
1362
|
+
uint64_t contextMemorySize = llama_state_get_size(context->ctx);
|
|
1363
|
+
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
|
|
1364
|
+
context->loadedContextMemorySize = contextMemorySize;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
|
|
1368
|
+
}
|
|
1369
|
+
void OnError(const Napi::Error& err) {
|
|
1370
|
+
deferred.Reject(err.Value());
|
|
1371
|
+
}
|
|
1372
|
+
};
|
|
1373
|
+
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
|
|
1374
|
+
public:
|
|
1375
|
+
AddonContext* context;
|
|
1376
|
+
|
|
1377
|
+
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1378
|
+
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
|
|
1379
|
+
context(context),
|
|
1380
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1381
|
+
context->Ref();
|
|
1382
|
+
}
|
|
1383
|
+
~AddonContextUnloadContextWorker() {
|
|
1384
|
+
context->Unref();
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
Napi::Promise GetPromise() {
|
|
1388
|
+
return deferred.Promise();
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
protected:
|
|
1392
|
+
Napi::Promise::Deferred deferred;
|
|
1393
|
+
|
|
1394
|
+
void Execute() {
|
|
1395
|
+
try {
|
|
1396
|
+
llama_free(context->ctx);
|
|
1397
|
+
context->contextLoaded = false;
|
|
1398
|
+
|
|
1399
|
+
try {
|
|
1400
|
+
if (context->has_batch) {
|
|
1401
|
+
llama_batch_free(context->batch);
|
|
1402
|
+
context->has_batch = false;
|
|
1403
|
+
context->batch_n_tokens = 0;
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
context->dispose();
|
|
1407
|
+
} catch (const std::exception& e) {
|
|
1408
|
+
SetError(e.what());
|
|
1409
|
+
} catch(...) {
|
|
1410
|
+
SetError("Unknown error when calling \"llama_batch_free\"");
|
|
1411
|
+
}
|
|
1412
|
+
} catch (const std::exception& e) {
|
|
1413
|
+
SetError(e.what());
|
|
1414
|
+
} catch(...) {
|
|
1415
|
+
SetError("Unknown error when calling \"llama_free\"");
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
void OnOK() {
|
|
1419
|
+
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
|
|
1420
|
+
context->loadedContextMemorySize = 0;
|
|
1421
|
+
|
|
1422
|
+
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
|
|
1423
|
+
context->batchMemorySize = 0;
|
|
1424
|
+
|
|
1425
|
+
deferred.Resolve(Env().Undefined());
|
|
1426
|
+
}
|
|
1427
|
+
void OnError(const Napi::Error& err) {
|
|
1428
|
+
deferred.Reject(err.Value());
|
|
1429
|
+
}
|
|
1430
|
+
};
|
|
1431
|
+
|
|
1432
|
+
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
|
|
1433
|
+
if (disposed) {
|
|
1434
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1435
|
+
return info.Env().Undefined();
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
|
|
629
1439
|
worker->Queue();
|
|
630
|
-
return worker->
|
|
1440
|
+
return worker->GetPromise();
|
|
1441
|
+
}
|
|
1442
|
+
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
|
|
1443
|
+
if (disposed) {
|
|
1444
|
+
return info.Env().Undefined();
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
if (contextLoaded) {
|
|
1448
|
+
contextLoaded = false;
|
|
1449
|
+
|
|
1450
|
+
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
|
|
1451
|
+
worker->Queue();
|
|
1452
|
+
return worker->GetPromise();
|
|
1453
|
+
} else {
|
|
1454
|
+
dispose();
|
|
1455
|
+
|
|
1456
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1457
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1458
|
+
return deferred.Promise();
|
|
1459
|
+
}
|
|
631
1460
|
}
|
|
632
1461
|
|
|
633
|
-
class AddonContextSampleTokenWorker : Napi::AsyncWorker
|
|
1462
|
+
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
634
1463
|
public:
|
|
635
1464
|
AddonContext* ctx;
|
|
636
1465
|
AddonGrammarEvaluationState* grammar_evaluation_state;
|
|
@@ -638,18 +1467,21 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
638
1467
|
bool use_grammar = false;
|
|
639
1468
|
llama_token result;
|
|
640
1469
|
float temperature = 0.0f;
|
|
1470
|
+
float min_p = 0;
|
|
641
1471
|
int32_t top_k = 40;
|
|
642
1472
|
float top_p = 0.95f;
|
|
643
1473
|
float repeat_penalty = 1.10f; // 1.0 = disabled
|
|
644
1474
|
float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
|
|
645
1475
|
float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
|
|
646
1476
|
std::vector<llama_token> repeat_penalty_tokens;
|
|
1477
|
+
std::unordered_map<llama_token, float> tokenBiases;
|
|
1478
|
+
bool useTokenBiases = false;
|
|
647
1479
|
bool use_repeat_penalty = false;
|
|
648
1480
|
|
|
649
1481
|
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
|
650
1482
|
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
|
|
651
1483
|
ctx(ctx),
|
|
652
|
-
Napi::Promise::Deferred(info.Env()) {
|
|
1484
|
+
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
|
653
1485
|
ctx->Ref();
|
|
654
1486
|
|
|
655
1487
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
@@ -661,6 +1493,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
661
1493
|
temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
|
|
662
1494
|
}
|
|
663
1495
|
|
|
1496
|
+
if (options.Has("minP")) {
|
|
1497
|
+
min_p = options.Get("minP").As<Napi::Number>().FloatValue();
|
|
1498
|
+
}
|
|
1499
|
+
|
|
664
1500
|
if (options.Has("topK")) {
|
|
665
1501
|
top_k = options.Get("topK").As<Napi::Number>().Int32Value();
|
|
666
1502
|
}
|
|
@@ -684,6 +1520,19 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
684
1520
|
use_repeat_penalty = true;
|
|
685
1521
|
}
|
|
686
1522
|
|
|
1523
|
+
if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
|
|
1524
|
+
Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
|
|
1525
|
+
Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
|
|
1526
|
+
|
|
1527
|
+
if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
|
|
1528
|
+
for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
|
|
1529
|
+
tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
|
|
1530
|
+
}
|
|
1531
|
+
|
|
1532
|
+
useTokenBiases = true;
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
|
|
687
1536
|
if (options.Has("repeatPenaltyPresencePenalty")) {
|
|
688
1537
|
repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
|
|
689
1538
|
}
|
|
@@ -708,14 +1557,33 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
708
1557
|
use_grammar = false;
|
|
709
1558
|
}
|
|
710
1559
|
}
|
|
711
|
-
|
|
712
|
-
|
|
1560
|
+
|
|
1561
|
+
Napi::Promise GetPromise() {
|
|
1562
|
+
return deferred.Promise();
|
|
1563
|
+
}
|
|
713
1564
|
|
|
714
1565
|
protected:
|
|
1566
|
+
Napi::Promise::Deferred deferred;
|
|
1567
|
+
|
|
715
1568
|
void Execute() {
|
|
1569
|
+
try {
|
|
1570
|
+
SampleToken();
|
|
1571
|
+
} catch (const std::exception& e) {
|
|
1572
|
+
SetError(e.what());
|
|
1573
|
+
} catch(...) {
|
|
1574
|
+
SetError("Unknown error when calling \"SampleToken\"");
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
|
|
1578
|
+
void SampleToken() {
|
|
716
1579
|
llama_token new_token_id = 0;
|
|
717
1580
|
|
|
718
1581
|
// Select the best prediction.
|
|
1582
|
+
if (llama_get_logits(ctx->ctx) == nullptr) {
|
|
1583
|
+
SetError("This model does not support token generation");
|
|
1584
|
+
return;
|
|
1585
|
+
}
|
|
1586
|
+
|
|
719
1587
|
auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
|
|
720
1588
|
auto n_vocab = llama_n_vocab(ctx->model->model);
|
|
721
1589
|
|
|
@@ -723,13 +1591,27 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
723
1591
|
candidates.reserve(n_vocab);
|
|
724
1592
|
|
|
725
1593
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
726
|
-
|
|
1594
|
+
auto logit = logits[token_id];
|
|
1595
|
+
|
|
1596
|
+
if (useTokenBiases) {
|
|
1597
|
+
bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
|
|
1598
|
+
if (hasTokenBias) {
|
|
1599
|
+
auto logitBias = tokenBiases.at(token_id);
|
|
1600
|
+
if (logitBias == -INFINITY || logitBias < -INFINITY) {
|
|
1601
|
+
if (!llama_token_is_eog(ctx->model->model, token_id)) {
|
|
1602
|
+
logit = -INFINITY;
|
|
1603
|
+
}
|
|
1604
|
+
} else {
|
|
1605
|
+
logit += logitBias;
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
|
|
727
1611
|
}
|
|
728
1612
|
|
|
729
1613
|
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
730
1614
|
|
|
731
|
-
auto eos_token = llama_token_eos(ctx->model->model);
|
|
732
|
-
|
|
733
1615
|
if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
|
|
734
1616
|
llama_sample_repetition_penalties(
|
|
735
1617
|
ctx->ctx,
|
|
@@ -744,6 +1626,13 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
744
1626
|
|
|
745
1627
|
if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
|
|
746
1628
|
llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
|
|
1629
|
+
|
|
1630
|
+
if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
|
|
1631
|
+
// logit biases caused grammar sampling to fail, so sampling again without logit biases
|
|
1632
|
+
useTokenBiases = false;
|
|
1633
|
+
SampleToken();
|
|
1634
|
+
return;
|
|
1635
|
+
}
|
|
747
1636
|
}
|
|
748
1637
|
|
|
749
1638
|
if (temperature <= 0) {
|
|
@@ -762,45 +1651,359 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
762
1651
|
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
|
|
763
1652
|
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
|
|
764
1653
|
llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
|
|
1654
|
+
llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
|
|
765
1655
|
llama_sample_temp(ctx->ctx, &candidates_p, temperature);
|
|
766
1656
|
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
|
|
767
1657
|
}
|
|
768
1658
|
|
|
769
|
-
if (new_token_id
|
|
1659
|
+
if (!llama_token_is_eog(ctx->model->model, new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
|
|
770
1660
|
llama_grammar_accept_token(ctx->ctx, (grammar_evaluation_state)->grammar, new_token_id);
|
|
771
1661
|
}
|
|
772
1662
|
|
|
773
1663
|
result = new_token_id;
|
|
774
1664
|
}
|
|
775
1665
|
void OnOK() {
|
|
776
|
-
Napi::
|
|
777
|
-
|
|
778
|
-
Napi::Promise::Deferred::Resolve(resultValue);
|
|
1666
|
+
Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
1667
|
+
deferred.Resolve(resultValue);
|
|
779
1668
|
}
|
|
780
1669
|
void OnError(const Napi::Error& err) {
|
|
781
|
-
|
|
1670
|
+
deferred.Reject(err.Value());
|
|
782
1671
|
}
|
|
783
1672
|
};
|
|
784
1673
|
|
|
785
1674
|
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
|
|
786
1675
|
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
|
|
787
1676
|
worker->Queue();
|
|
788
|
-
return worker->
|
|
1677
|
+
return worker->GetPromise();
|
|
789
1678
|
}
|
|
790
1679
|
|
|
791
1680
|
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
792
1681
|
return Napi::String::From(info.Env(), llama_print_system_info());
|
|
793
1682
|
}
|
|
794
1683
|
|
|
1684
|
+
Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
|
|
1685
|
+
return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
|
|
1689
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
|
|
1693
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1697
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1698
|
+
|
|
1699
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1700
|
+
return info.Env().Undefined();
|
|
1701
|
+
}
|
|
1702
|
+
|
|
1703
|
+
const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
|
|
1704
|
+
|
|
1705
|
+
return Napi::Number::New(info.Env(), blockSize);
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1709
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1710
|
+
|
|
1711
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1712
|
+
return info.Env().Undefined();
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
|
|
1716
|
+
|
|
1717
|
+
return Napi::Number::New(info.Env(), typeSize);
|
|
1718
|
+
}
|
|
1719
|
+
|
|
1720
|
+
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
|
|
1721
|
+
Napi::Object consts = Napi::Object::New(info.Env());
|
|
1722
|
+
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
|
|
1723
|
+
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
|
|
1724
|
+
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
|
|
1725
|
+
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
|
|
1726
|
+
consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
|
|
1727
|
+
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
|
|
1728
|
+
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
|
|
1729
|
+
|
|
1730
|
+
return consts;
|
|
1731
|
+
}
|
|
1732
|
+
|
|
1733
|
+
int addonGetGgmlLogLevelNumber(ggml_log_level level) {
|
|
1734
|
+
switch (level) {
|
|
1735
|
+
case GGML_LOG_LEVEL_ERROR: return 2;
|
|
1736
|
+
case GGML_LOG_LEVEL_WARN: return 3;
|
|
1737
|
+
case GGML_LOG_LEVEL_INFO: return 4;
|
|
1738
|
+
case GGML_LOG_LEVEL_DEBUG: return 5;
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
return 1;
|
|
1742
|
+
}
|
|
1743
|
+
|
|
1744
|
+
void addonCallJsLogCallback(
|
|
1745
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
1746
|
+
) {
|
|
1747
|
+
bool called = false;
|
|
1748
|
+
|
|
1749
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
1750
|
+
try {
|
|
1751
|
+
callback.Call({
|
|
1752
|
+
Napi::Number::New(env, data->logLevelNumber),
|
|
1753
|
+
Napi::String::New(env, data->stringStream->str()),
|
|
1754
|
+
});
|
|
1755
|
+
called = true;
|
|
1756
|
+
} catch (const Napi::Error& e) {
|
|
1757
|
+
called = false;
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
if (!called && data != nullptr) {
|
|
1762
|
+
if (data->logLevelNumber == 2) {
|
|
1763
|
+
fputs(data->stringStream->str().c_str(), stderr);
|
|
1764
|
+
fflush(stderr);
|
|
1765
|
+
} else {
|
|
1766
|
+
fputs(data->stringStream->str().c_str(), stdout);
|
|
1767
|
+
fflush(stdout);
|
|
1768
|
+
}
|
|
1769
|
+
}
|
|
1770
|
+
|
|
1771
|
+
if (data != nullptr) {
|
|
1772
|
+
delete data->stringStream;
|
|
1773
|
+
delete data;
|
|
1774
|
+
}
|
|
1775
|
+
}
|
|
1776
|
+
|
|
1777
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
|
|
1778
|
+
int logLevelNumber = addonGetGgmlLogLevelNumber(level);
|
|
1779
|
+
|
|
1780
|
+
if (logLevelNumber > addonLoggerLogLevel) {
|
|
1781
|
+
return;
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
if (addonJsLoggerCallbackSet) {
|
|
1785
|
+
std::stringstream* stringStream = new std::stringstream();
|
|
1786
|
+
if (text != nullptr) {
|
|
1787
|
+
*stringStream << text;
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
addon_logger_log* data = new addon_logger_log {
|
|
1791
|
+
logLevelNumber,
|
|
1792
|
+
stringStream,
|
|
1793
|
+
};
|
|
1794
|
+
|
|
1795
|
+
auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
|
|
1796
|
+
|
|
1797
|
+
if (status == napi_ok) {
|
|
1798
|
+
return;
|
|
1799
|
+
} else {
|
|
1800
|
+
delete stringStream;
|
|
1801
|
+
delete data;
|
|
1802
|
+
}
|
|
1803
|
+
}
|
|
1804
|
+
|
|
1805
|
+
if (text != nullptr) {
|
|
1806
|
+
if (level == 2) {
|
|
1807
|
+
fputs(text, stderr);
|
|
1808
|
+
fflush(stderr);
|
|
1809
|
+
} else {
|
|
1810
|
+
fputs(text, stdout);
|
|
1811
|
+
fflush(stdout);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
}
|
|
1815
|
+
|
|
1816
|
+
Napi::Value setLogger(const Napi::CallbackInfo& info) {
|
|
1817
|
+
if (info.Length() < 1 || !info[0].IsFunction()) {
|
|
1818
|
+
if (addonJsLoggerCallbackSet) {
|
|
1819
|
+
addonJsLoggerCallbackSet = false;
|
|
1820
|
+
addonThreadSafeLoggerCallback.Release();
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
return info.Env().Undefined();
|
|
1824
|
+
}
|
|
1825
|
+
|
|
1826
|
+
auto addonLoggerJSCallback = info[0].As<Napi::Function>();
|
|
1827
|
+
AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
1828
|
+
addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
|
|
1829
|
+
info.Env(),
|
|
1830
|
+
addonLoggerJSCallback,
|
|
1831
|
+
"loggerCallback",
|
|
1832
|
+
0,
|
|
1833
|
+
1,
|
|
1834
|
+
context,
|
|
1835
|
+
[](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
|
|
1836
|
+
addonJsLoggerCallbackSet = false;
|
|
1837
|
+
|
|
1838
|
+
delete ctx;
|
|
1839
|
+
}
|
|
1840
|
+
);
|
|
1841
|
+
addonJsLoggerCallbackSet = true;
|
|
1842
|
+
|
|
1843
|
+
// prevent blocking the main node process from exiting due to active resources
|
|
1844
|
+
addonThreadSafeLoggerCallback.Unref(info.Env());
|
|
1845
|
+
|
|
1846
|
+
return info.Env().Undefined();
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1849
|
+
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
1850
|
+
if (info.Length() < 1 || !info[0].IsNumber()) {
|
|
1851
|
+
addonLoggerLogLevel = 5;
|
|
1852
|
+
|
|
1853
|
+
return info.Env().Undefined();
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
|
|
1857
|
+
|
|
1858
|
+
return info.Env().Undefined();
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
class AddonBackendLoadWorker : public Napi::AsyncWorker {
|
|
1862
|
+
public:
|
|
1863
|
+
AddonBackendLoadWorker(const Napi::Env& env)
|
|
1864
|
+
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
|
|
1865
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1866
|
+
}
|
|
1867
|
+
~AddonBackendLoadWorker() {
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
Napi::Promise GetPromise() {
|
|
1871
|
+
return deferred.Promise();
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
protected:
|
|
1875
|
+
Napi::Promise::Deferred deferred;
|
|
1876
|
+
|
|
1877
|
+
void Execute() {
|
|
1878
|
+
try {
|
|
1879
|
+
llama_backend_init();
|
|
1880
|
+
|
|
1881
|
+
try {
|
|
1882
|
+
if (backendDisposed) {
|
|
1883
|
+
llama_backend_free();
|
|
1884
|
+
} else {
|
|
1885
|
+
backendInitialized = true;
|
|
1886
|
+
}
|
|
1887
|
+
} catch (const std::exception& e) {
|
|
1888
|
+
SetError(e.what());
|
|
1889
|
+
} catch(...) {
|
|
1890
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1891
|
+
}
|
|
1892
|
+
} catch (const std::exception& e) {
|
|
1893
|
+
SetError(e.what());
|
|
1894
|
+
} catch(...) {
|
|
1895
|
+
SetError("Unknown error when calling \"llama_backend_init\"");
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
void OnOK() {
|
|
1899
|
+
deferred.Resolve(Env().Undefined());
|
|
1900
|
+
}
|
|
1901
|
+
void OnError(const Napi::Error& err) {
|
|
1902
|
+
deferred.Reject(err.Value());
|
|
1903
|
+
}
|
|
1904
|
+
};
|
|
1905
|
+
|
|
1906
|
+
|
|
1907
|
+
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
|
|
1908
|
+
public:
|
|
1909
|
+
AddonBackendUnloadWorker(const Napi::Env& env)
|
|
1910
|
+
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
|
|
1911
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1912
|
+
}
|
|
1913
|
+
~AddonBackendUnloadWorker() {
|
|
1914
|
+
}
|
|
1915
|
+
|
|
1916
|
+
Napi::Promise GetPromise() {
|
|
1917
|
+
return deferred.Promise();
|
|
1918
|
+
}
|
|
1919
|
+
|
|
1920
|
+
protected:
|
|
1921
|
+
Napi::Promise::Deferred deferred;
|
|
1922
|
+
|
|
1923
|
+
void Execute() {
|
|
1924
|
+
try {
|
|
1925
|
+
if (backendInitialized) {
|
|
1926
|
+
backendInitialized = false;
|
|
1927
|
+
llama_backend_free();
|
|
1928
|
+
}
|
|
1929
|
+
} catch (const std::exception& e) {
|
|
1930
|
+
SetError(e.what());
|
|
1931
|
+
} catch(...) {
|
|
1932
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
void OnOK() {
|
|
1936
|
+
deferred.Resolve(Env().Undefined());
|
|
1937
|
+
}
|
|
1938
|
+
void OnError(const Napi::Error& err) {
|
|
1939
|
+
deferred.Reject(err.Value());
|
|
1940
|
+
}
|
|
1941
|
+
};
|
|
1942
|
+
|
|
1943
|
+
Napi::Value addonInit(const Napi::CallbackInfo& info) {
|
|
1944
|
+
if (backendInitialized) {
|
|
1945
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1946
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1947
|
+
return deferred.Promise();
|
|
1948
|
+
}
|
|
1949
|
+
|
|
1950
|
+
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
|
|
1951
|
+
worker->Queue();
|
|
1952
|
+
return worker->GetPromise();
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
|
|
1956
|
+
if (backendDisposed) {
|
|
1957
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1958
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1959
|
+
return deferred.Promise();
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
backendDisposed = true;
|
|
1963
|
+
|
|
1964
|
+
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
|
|
1965
|
+
worker->Queue();
|
|
1966
|
+
return worker->GetPromise();
|
|
1967
|
+
}
|
|
1968
|
+
|
|
1969
|
+
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
|
1970
|
+
if (backendDisposed) {
|
|
1971
|
+
return;
|
|
1972
|
+
}
|
|
1973
|
+
|
|
1974
|
+
backendDisposed = true;
|
|
1975
|
+
if (backendInitialized) {
|
|
1976
|
+
backendInitialized = false;
|
|
1977
|
+
llama_backend_free();
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
|
|
795
1981
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
796
|
-
llama_backend_init(false);
|
|
797
1982
|
exports.DefineProperties({
|
|
798
1983
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
1984
|
+
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
|
|
1985
|
+
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
|
|
1986
|
+
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
|
|
1987
|
+
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
|
|
1988
|
+
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
|
|
1989
|
+
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
|
|
1990
|
+
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
1991
|
+
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
|
1992
|
+
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
1993
|
+
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
|
|
1994
|
+
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
|
1995
|
+
Napi::PropertyDescriptor::Function("init", addonInit),
|
|
1996
|
+
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
|
799
1997
|
});
|
|
800
1998
|
AddonModel::init(exports);
|
|
801
1999
|
AddonGrammar::init(exports);
|
|
802
2000
|
AddonGrammarEvaluationState::init(exports);
|
|
803
2001
|
AddonContext::init(exports);
|
|
2002
|
+
|
|
2003
|
+
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
|
2004
|
+
|
|
2005
|
+
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
|
|
2006
|
+
|
|
804
2007
|
return exports;
|
|
805
2008
|
}
|
|
806
2009
|
|