node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/ChatWrapper.d.ts +49 -0
- package/dist/ChatWrapper.js +120 -0
- package/dist/ChatWrapper.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +14 -4
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +23 -0
- package/dist/bindings/Llama.js +225 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +103 -0
- package/dist/bindings/getLlama.js +228 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +33 -0
- package/dist/bindings/types.js +30 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +155 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +12 -0
- package/dist/bindings/utils/compileLLamaCpp.js +157 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +85 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +28 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +26 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +43 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +18 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
- package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
- package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
- package/dist/chatWrappers/FalconChatWrapper.js +104 -0
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
- package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
- package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
- package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +5 -5
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +28 -17
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
- package/dist/cli/cli.js +8 -0
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +2 -1
- package/dist/cli/commands/BuildCommand.js +50 -10
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +11 -4
- package/dist/cli/commands/ChatCommand.js +138 -64
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.js +4 -6
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +25 -0
- package/dist/cli/commands/CompleteCommand.js +278 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +59 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +2 -1
- package/dist/cli/commands/DownloadCommand.js +47 -40
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +27 -0
- package/dist/cli/commands/InfillCommand.js +316 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +7 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/config.d.ts +10 -3
- package/dist/config.js +18 -7
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +1 -0
- package/dist/consts.js +2 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +154 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +684 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +22 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +121 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +135 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator/LlamaChatSession}/LlamaChatSession.d.ts +59 -25
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +219 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +148 -0
- package/dist/evaluator/LlamaCompletion.js +402 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +20 -23
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +77 -107
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.d.ts +6 -14
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +37 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +78 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/evaluator/LlamaGrammar.d.ts +30 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -18
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +4 -2
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaModel.d.ts +24 -6
- package/dist/{llamaEvaluator → evaluator}/LlamaModel.js +51 -12
- package/dist/evaluator/LlamaModel.js.map +1 -0
- package/dist/index.d.ts +30 -17
- package/dist/index.js +29 -15
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +72 -3
- package/dist/types.js +5 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +42 -0
- package/dist/utils/LlamaText.js +207 -0
- package/dist/utils/LlamaText.js.map +1 -0
- package/dist/utils/StopGenerationDetector.d.ts +28 -0
- package/dist/utils/StopGenerationDetector.js +205 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +30 -0
- package/dist/utils/TokenStreamRegulator.js +96 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -0
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
- package/dist/utils/appendUserMessageToChatHistory.js +18 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
- package/dist/utils/cmake.js +16 -11
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +2 -0
- package/dist/utils/compareTokens.js +4 -0
- package/dist/utils/compareTokens.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
- package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
- package/dist/utils/gbnfJson/types.d.ts +1 -1
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +9 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gitReleaseBundles.js +6 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/parseModelTypeDescription.d.ts +1 -1
- package/dist/utils/prettyPrintObject.d.ts +1 -0
- package/dist/utils/prettyPrintObject.js +40 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +1 -0
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveChatWrapper.d.ts +4 -0
- package/dist/utils/resolveChatWrapper.js +16 -0
- package/dist/utils/resolveChatWrapper.js.map +1 -0
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +1 -1
- package/dist/utils/spawnCommand.js +4 -2
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +9 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
- package/dist/utils/truncateTextAndRoundToWords.js +27 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.js +11 -1
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +11 -8
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +87 -2
- package/llama/addon.cpp +319 -31
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +99 -0
- package/llama/gpuInfo/cuda-gpu-info.h +7 -0
- package/llama/gpuInfo/metal-gpu-info.h +5 -0
- package/llama/gpuInfo/metal-gpu-info.mm +17 -0
- package/llama/llama.cpp.info.json +4 -0
- package/llamaBins/linux-arm64/.buildMetadata.json +1 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/.buildMetadata.json +1 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/.buildMetadata.json +1 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -0
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -0
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +6491 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/.buildMetadata.json +1 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/.buildMetadata.json +1 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -0
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/package.json +37 -15
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/ChatPromptWrapper.d.ts +0 -11
- package/dist/ChatPromptWrapper.js +0 -20
- package/dist/ChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
- package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
- package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
- package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
- package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
- package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
- package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
- package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
- package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
- package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
- package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -15
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -39
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -19
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession.js +0 -290
- package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -32
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -127
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/getTextCompletion.d.ts +0 -3
- package/dist/utils/getTextCompletion.js +0 -12
- package/dist/utils/getTextCompletion.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/usedBin.json +0 -3
- package/llamaBins/mac-arm64/ggml-metal.metal +0 -2929
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/ggml-metal.metal +0 -2929
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp
CHANGED
|
@@ -9,21 +9,104 @@
|
|
|
9
9
|
#include "llama.h"
|
|
10
10
|
#include "napi.h"
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
#ifdef GPU_INFO_USE_CUBLAS
|
|
13
|
+
# include "gpuInfo/cuda-gpu-info.h"
|
|
14
|
+
#endif
|
|
15
|
+
#ifdef GPU_INFO_USE_METAL
|
|
16
|
+
# include "gpuInfo/metal-gpu-info.h"
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
struct addon_logger_log {
|
|
21
|
+
public:
|
|
22
|
+
const int logLevelNumber;
|
|
23
|
+
const std::stringstream* stringStream;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
|
|
27
|
+
|
|
28
|
+
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
29
|
+
void addonCallJsLogCallback(
|
|
30
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
31
|
+
);
|
|
32
|
+
using AddonThreadSafeLogCallbackFunction =
|
|
33
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
|
34
|
+
|
|
35
|
+
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
36
|
+
bool addonJsLoggerCallbackSet = false;
|
|
37
|
+
int addonLoggerLogLevel = 5;
|
|
38
|
+
|
|
39
|
+
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
|
|
13
40
|
std::vector<char> result(8, 0);
|
|
14
41
|
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
|
|
15
42
|
if (n_tokens < 0) {
|
|
16
43
|
result.resize(-n_tokens);
|
|
17
44
|
int check = llama_token_to_piece(model, token, result.data(), result.size());
|
|
18
45
|
GGML_ASSERT(check == -n_tokens);
|
|
19
|
-
}
|
|
20
|
-
else {
|
|
46
|
+
} else {
|
|
21
47
|
result.resize(n_tokens);
|
|
22
48
|
}
|
|
23
49
|
|
|
24
50
|
return std::string(result.data(), result.size());
|
|
25
51
|
}
|
|
26
52
|
|
|
53
|
+
#ifdef GPU_INFO_USE_CUBLAS
|
|
54
|
+
void lodCudaError(const char* message) {
|
|
55
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
56
|
+
}
|
|
57
|
+
#endif
|
|
58
|
+
|
|
59
|
+
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
60
|
+
uint64_t total = 0;
|
|
61
|
+
uint64_t used = 0;
|
|
62
|
+
|
|
63
|
+
#ifdef GPU_INFO_USE_CUBLAS
|
|
64
|
+
size_t cudaDeviceTotal = 0;
|
|
65
|
+
size_t cudaDeviceUsed = 0;
|
|
66
|
+
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
|
|
67
|
+
|
|
68
|
+
if (cudeGetInfoSuccess) {
|
|
69
|
+
total += cudaDeviceTotal;
|
|
70
|
+
used += cudaDeviceUsed;
|
|
71
|
+
}
|
|
72
|
+
#endif
|
|
73
|
+
|
|
74
|
+
#ifdef GPU_INFO_USE_METAL
|
|
75
|
+
uint64_t metalDeviceTotal = 0;
|
|
76
|
+
uint64_t metalDeviceUsed = 0;
|
|
77
|
+
get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
|
|
78
|
+
|
|
79
|
+
total += metalDeviceTotal;
|
|
80
|
+
used += metalDeviceUsed;
|
|
81
|
+
#endif
|
|
82
|
+
|
|
83
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
84
|
+
result.Set("total", Napi::Number::From(info.Env(), total));
|
|
85
|
+
result.Set("used", Napi::Number::From(info.Env(), used));
|
|
86
|
+
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
91
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
92
|
+
|
|
93
|
+
if (tokenType == LLAMA_TOKEN_TYPE_UNDEFINED || tokenType == LLAMA_TOKEN_TYPE_UNKNOWN) {
|
|
94
|
+
return Napi::Number::From(info.Env(), -1);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return Napi::Number::From(info.Env(), token);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
|
|
101
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
102
|
+
|
|
103
|
+
if (tokenType != LLAMA_TOKEN_TYPE_CONTROL) {
|
|
104
|
+
return Napi::Number::From(info.Env(), -1);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return Napi::Number::From(info.Env(), token);
|
|
108
|
+
}
|
|
109
|
+
|
|
27
110
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
28
111
|
public:
|
|
29
112
|
llama_model_params model_params;
|
|
@@ -56,7 +139,6 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
56
139
|
}
|
|
57
140
|
}
|
|
58
141
|
|
|
59
|
-
llama_backend_init(false);
|
|
60
142
|
model = llama_load_model_from_file(modelPath.c_str(), model_params);
|
|
61
143
|
|
|
62
144
|
if (model == NULL) {
|
|
@@ -95,8 +177,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
95
177
|
}
|
|
96
178
|
|
|
97
179
|
std::string text = info[0].As<Napi::String>().Utf8Value();
|
|
180
|
+
bool specialTokens = info[1].As<Napi::Boolean>().Value();
|
|
98
181
|
|
|
99
|
-
std::vector<llama_token> tokens = llama_tokenize(model, text,
|
|
182
|
+
std::vector<llama_token> tokens = llama_tokenize(model, text, false, specialTokens);
|
|
100
183
|
|
|
101
184
|
Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
|
|
102
185
|
for (size_t i = 0; i < tokens.size(); ++i) {
|
|
@@ -139,6 +222,15 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
139
222
|
return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
|
|
140
223
|
}
|
|
141
224
|
|
|
225
|
+
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
226
|
+
if (disposed) {
|
|
227
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
228
|
+
return info.Env().Undefined();
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return Napi::Number::From(info.Env(), llama_n_embd(model));
|
|
232
|
+
}
|
|
233
|
+
|
|
142
234
|
Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
|
|
143
235
|
if (disposed) {
|
|
144
236
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -162,7 +254,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
162
254
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
163
255
|
return info.Env().Undefined();
|
|
164
256
|
}
|
|
165
|
-
|
|
257
|
+
|
|
166
258
|
char model_desc[128];
|
|
167
259
|
int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
|
|
168
260
|
|
|
@@ -175,7 +267,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
175
267
|
return info.Env().Undefined();
|
|
176
268
|
}
|
|
177
269
|
|
|
178
|
-
return
|
|
270
|
+
return getNapiControlToken(info, model, llama_token_bos(model));
|
|
179
271
|
}
|
|
180
272
|
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
181
273
|
if (disposed) {
|
|
@@ -183,7 +275,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
183
275
|
return info.Env().Undefined();
|
|
184
276
|
}
|
|
185
277
|
|
|
186
|
-
return
|
|
278
|
+
return getNapiControlToken(info, model, llama_token_eos(model));
|
|
187
279
|
}
|
|
188
280
|
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
189
281
|
if (disposed) {
|
|
@@ -191,7 +283,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
191
283
|
return info.Env().Undefined();
|
|
192
284
|
}
|
|
193
285
|
|
|
194
|
-
return
|
|
286
|
+
return getNapiToken(info, model, llama_token_nl(model));
|
|
195
287
|
}
|
|
196
288
|
Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
|
|
197
289
|
if (disposed) {
|
|
@@ -199,7 +291,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
199
291
|
return info.Env().Undefined();
|
|
200
292
|
}
|
|
201
293
|
|
|
202
|
-
return
|
|
294
|
+
return getNapiControlToken(info, model, llama_token_prefix(model));
|
|
203
295
|
}
|
|
204
296
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
|
|
205
297
|
if (disposed) {
|
|
@@ -207,7 +299,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
207
299
|
return info.Env().Undefined();
|
|
208
300
|
}
|
|
209
301
|
|
|
210
|
-
return
|
|
302
|
+
return getNapiControlToken(info, model, llama_token_middle(model));
|
|
211
303
|
}
|
|
212
304
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
|
|
213
305
|
if (disposed) {
|
|
@@ -215,7 +307,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
215
307
|
return info.Env().Undefined();
|
|
216
308
|
}
|
|
217
309
|
|
|
218
|
-
return
|
|
310
|
+
return getNapiControlToken(info, model, llama_token_suffix(model));
|
|
219
311
|
}
|
|
220
312
|
Napi::Value EotToken(const Napi::CallbackInfo& info) {
|
|
221
313
|
if (disposed) {
|
|
@@ -223,7 +315,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
223
315
|
return info.Env().Undefined();
|
|
224
316
|
}
|
|
225
317
|
|
|
226
|
-
return
|
|
318
|
+
return getNapiControlToken(info, model, llama_token_eot(model));
|
|
227
319
|
}
|
|
228
320
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
229
321
|
if (disposed) {
|
|
@@ -244,6 +336,29 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
244
336
|
return Napi::String::New(info.Env(), ss.str());
|
|
245
337
|
}
|
|
246
338
|
|
|
339
|
+
Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
|
|
340
|
+
if (disposed) {
|
|
341
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
342
|
+
return info.Env().Undefined();
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (info[0].IsNumber() == false) {
|
|
346
|
+
return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_TYPE_UNDEFINED));
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
int token = info[0].As<Napi::Number>().Int32Value();
|
|
350
|
+
auto tokenType = llama_token_get_type(model, token);
|
|
351
|
+
|
|
352
|
+
return Napi::Number::From(info.Env(), int32_t(tokenType));
|
|
353
|
+
}
|
|
354
|
+
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
|
355
|
+
const int addBos = llama_add_bos_token(model);
|
|
356
|
+
|
|
357
|
+
bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
|
358
|
+
|
|
359
|
+
return Napi::Boolean::New(info.Env(), shouldPrependBos);
|
|
360
|
+
}
|
|
361
|
+
|
|
247
362
|
static void init(Napi::Object exports) {
|
|
248
363
|
exports.Set(
|
|
249
364
|
"AddonModel",
|
|
@@ -254,6 +369,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
254
369
|
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
|
255
370
|
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
|
256
371
|
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
|
372
|
+
InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
|
|
257
373
|
InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
|
|
258
374
|
InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
|
|
259
375
|
InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
|
|
@@ -265,7 +381,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
265
381
|
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
|
266
382
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
267
383
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
268
|
-
InstanceMethod("
|
|
384
|
+
InstanceMethod("getTokenType", &AddonModel::GetTokenType),
|
|
385
|
+
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
|
|
386
|
+
InstanceMethod("dispose", &AddonModel::Dispose),
|
|
269
387
|
}
|
|
270
388
|
)
|
|
271
389
|
);
|
|
@@ -352,29 +470,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
352
470
|
context_params.seed = -1;
|
|
353
471
|
context_params.n_ctx = 4096;
|
|
354
472
|
context_params.n_threads = 6;
|
|
355
|
-
context_params.n_threads_batch
|
|
473
|
+
context_params.n_threads_batch = context_params.n_threads;
|
|
356
474
|
|
|
357
475
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
358
476
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
359
477
|
|
|
360
|
-
if (options.Has("
|
|
361
|
-
context_params.seed =
|
|
478
|
+
if (options.Has("noSeed")) {
|
|
479
|
+
context_params.seed = time(NULL);
|
|
480
|
+
} else if (options.Has("seed")) {
|
|
481
|
+
context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
|
|
362
482
|
}
|
|
363
483
|
|
|
364
484
|
if (options.Has("contextSize")) {
|
|
365
|
-
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().
|
|
485
|
+
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
|
|
366
486
|
}
|
|
367
487
|
|
|
368
488
|
if (options.Has("batchSize")) {
|
|
369
|
-
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
if (options.Has("f16Kv")) {
|
|
373
|
-
context_params.f16_kv = options.Get("f16Kv").As<Napi::Boolean>().Value();
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
if (options.Has("logitsAll")) {
|
|
377
|
-
context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
|
|
489
|
+
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
|
378
490
|
}
|
|
379
491
|
|
|
380
492
|
if (options.Has("embedding")) {
|
|
@@ -382,8 +494,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
382
494
|
}
|
|
383
495
|
|
|
384
496
|
if (options.Has("threads")) {
|
|
385
|
-
|
|
386
|
-
|
|
497
|
+
const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
|
|
498
|
+
const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
|
|
499
|
+
|
|
500
|
+
context_params.n_threads = resolved_n_threads;
|
|
501
|
+
context_params.n_threads_batch = resolved_n_threads;
|
|
387
502
|
}
|
|
388
503
|
}
|
|
389
504
|
|
|
@@ -533,6 +648,41 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
533
648
|
Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
|
|
534
649
|
Napi::Value SampleToken(const Napi::CallbackInfo& info);
|
|
535
650
|
|
|
651
|
+
Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
|
|
652
|
+
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
653
|
+
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
654
|
+
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
655
|
+
|
|
656
|
+
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
657
|
+
llama_grammar_accept_token(ctx, (grammar_evaluation_state)->grammar, tokenId);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
return info.Env().Undefined();
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
|
|
664
|
+
if (disposed) {
|
|
665
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
666
|
+
return info.Env().Undefined();
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
const int n_embd = llama_n_embd(model->model);
|
|
670
|
+
const auto* embeddings = llama_get_embeddings(ctx);
|
|
671
|
+
|
|
672
|
+
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
|
|
673
|
+
for (size_t i = 0; i < n_embd; ++i) {
|
|
674
|
+
result[i] = embeddings[i];
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
return result;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
|
|
681
|
+
llama_print_timings(ctx);
|
|
682
|
+
llama_reset_timings(ctx);
|
|
683
|
+
return info.Env().Undefined();
|
|
684
|
+
}
|
|
685
|
+
|
|
536
686
|
static void init(Napi::Object exports) {
|
|
537
687
|
exports.Set(
|
|
538
688
|
"AddonContext",
|
|
@@ -548,7 +698,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
548
698
|
InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
|
|
549
699
|
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
|
|
550
700
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
551
|
-
InstanceMethod("
|
|
701
|
+
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
|
|
702
|
+
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
703
|
+
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
704
|
+
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
552
705
|
}
|
|
553
706
|
)
|
|
554
707
|
);
|
|
@@ -610,6 +763,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
610
763
|
bool use_grammar = false;
|
|
611
764
|
llama_token result;
|
|
612
765
|
float temperature = 0.0f;
|
|
766
|
+
float min_p = 0;
|
|
613
767
|
int32_t top_k = 40;
|
|
614
768
|
float top_p = 0.95f;
|
|
615
769
|
float repeat_penalty = 1.10f; // 1.0 = disabled
|
|
@@ -633,6 +787,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
633
787
|
temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
|
|
634
788
|
}
|
|
635
789
|
|
|
790
|
+
if (options.Has("minP")) {
|
|
791
|
+
min_p = options.Get("minP").As<Napi::Number>().FloatValue();
|
|
792
|
+
}
|
|
793
|
+
|
|
636
794
|
if (options.Has("topK")) {
|
|
637
795
|
top_k = options.Get("topK").As<Napi::Number>().Int32Value();
|
|
638
796
|
}
|
|
@@ -734,6 +892,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
734
892
|
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
|
|
735
893
|
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
|
|
736
894
|
llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
|
|
895
|
+
llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
|
|
737
896
|
llama_sample_temp(ctx->ctx, &candidates_p, temperature);
|
|
738
897
|
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
|
|
739
898
|
}
|
|
@@ -764,15 +923,144 @@ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
|
764
923
|
return Napi::String::From(info.Env(), llama_print_system_info());
|
|
765
924
|
}
|
|
766
925
|
|
|
926
|
+
int addonGetGgmlLogLevelNumber(ggml_log_level level) {
|
|
927
|
+
switch (level) {
|
|
928
|
+
case GGML_LOG_LEVEL_ERROR: return 2;
|
|
929
|
+
case GGML_LOG_LEVEL_WARN: return 3;
|
|
930
|
+
case GGML_LOG_LEVEL_INFO: return 4;
|
|
931
|
+
case GGML_LOG_LEVEL_DEBUG: return 5;
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
return 1;
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
void addonCallJsLogCallback(
|
|
938
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
939
|
+
) {
|
|
940
|
+
bool called = false;
|
|
941
|
+
|
|
942
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
943
|
+
try {
|
|
944
|
+
callback.Call({
|
|
945
|
+
Napi::Number::New(env, data->logLevelNumber),
|
|
946
|
+
Napi::String::New(env, data->stringStream->str()),
|
|
947
|
+
});
|
|
948
|
+
called = true;
|
|
949
|
+
} catch (const Napi::Error& e) {
|
|
950
|
+
called = false;
|
|
951
|
+
}
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (!called && data != nullptr) {
|
|
955
|
+
if (data->logLevelNumber == 2) {
|
|
956
|
+
fputs(data->stringStream->str().c_str(), stderr);
|
|
957
|
+
fflush(stderr);
|
|
958
|
+
} else {
|
|
959
|
+
fputs(data->stringStream->str().c_str(), stdout);
|
|
960
|
+
fflush(stdout);
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
if (data != nullptr) {
|
|
965
|
+
delete data->stringStream;
|
|
966
|
+
delete data;
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
|
|
971
|
+
int logLevelNumber = addonGetGgmlLogLevelNumber(level);
|
|
972
|
+
|
|
973
|
+
if (logLevelNumber > addonLoggerLogLevel) {
|
|
974
|
+
return;
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
if (addonJsLoggerCallbackSet) {
|
|
978
|
+
std::stringstream* stringStream = new std::stringstream();
|
|
979
|
+
if (text != nullptr) {
|
|
980
|
+
*stringStream << text;
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
addon_logger_log* data = new addon_logger_log {
|
|
984
|
+
logLevelNumber,
|
|
985
|
+
stringStream,
|
|
986
|
+
};
|
|
987
|
+
|
|
988
|
+
auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
|
|
989
|
+
|
|
990
|
+
if (status == napi_ok) {
|
|
991
|
+
return;
|
|
992
|
+
}
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
if (level == 2) {
|
|
996
|
+
fputs(text, stderr);
|
|
997
|
+
fflush(stderr);
|
|
998
|
+
} else {
|
|
999
|
+
fputs(text, stdout);
|
|
1000
|
+
fflush(stdout);
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
Napi::Value setLogger(const Napi::CallbackInfo& info) {
|
|
1005
|
+
if (info.Length() < 1 || !info[0].IsFunction()) {
|
|
1006
|
+
if (addonJsLoggerCallbackSet) {
|
|
1007
|
+
addonJsLoggerCallbackSet = false;
|
|
1008
|
+
addonThreadSafeLoggerCallback.Release();
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
return info.Env().Undefined();
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
auto addonLoggerJSCallback = info[0].As<Napi::Function>();
|
|
1015
|
+
AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
1016
|
+
addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
|
|
1017
|
+
info.Env(),
|
|
1018
|
+
addonLoggerJSCallback,
|
|
1019
|
+
"loggerCallback",
|
|
1020
|
+
0,
|
|
1021
|
+
1,
|
|
1022
|
+
context,
|
|
1023
|
+
[](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
|
|
1024
|
+
addonJsLoggerCallbackSet = false;
|
|
1025
|
+
|
|
1026
|
+
delete ctx;
|
|
1027
|
+
}
|
|
1028
|
+
);
|
|
1029
|
+
addonJsLoggerCallbackSet = true;
|
|
1030
|
+
|
|
1031
|
+
// prevent blocking the main node process from exiting due to active resources
|
|
1032
|
+
addonThreadSafeLoggerCallback.Unref(info.Env());
|
|
1033
|
+
|
|
1034
|
+
return info.Env().Undefined();
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
1038
|
+
if (info.Length() < 1 || !info[0].IsNumber()) {
|
|
1039
|
+
addonLoggerLogLevel = 5;
|
|
1040
|
+
|
|
1041
|
+
return info.Env().Undefined();
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
|
|
1045
|
+
|
|
1046
|
+
return info.Env().Undefined();
|
|
1047
|
+
}
|
|
1048
|
+
|
|
767
1049
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
768
|
-
llama_backend_init(
|
|
1050
|
+
llama_backend_init();
|
|
769
1051
|
exports.DefineProperties({
|
|
770
1052
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
1053
|
+
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
1054
|
+
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
|
1055
|
+
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
771
1056
|
});
|
|
772
1057
|
AddonModel::init(exports);
|
|
773
1058
|
AddonGrammar::init(exports);
|
|
774
1059
|
AddonGrammarEvaluationState::init(exports);
|
|
775
1060
|
AddonContext::init(exports);
|
|
1061
|
+
|
|
1062
|
+
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
|
1063
|
+
|
|
776
1064
|
return exports;
|
|
777
1065
|
}
|
|
778
1066
|
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#include <stddef.h>
|
|
2
|
+
|
|
3
|
+
#if defined(GPU_INFO_USE_HIPBLAS)
|
|
4
|
+
#include <hip/hip_runtime.h>
|
|
5
|
+
#include <hipblas/hipblas.h>
|
|
6
|
+
#define cudaGetDevice hipGetDevice
|
|
7
|
+
#define cudaGetDeviceCount hipGetDeviceCount
|
|
8
|
+
#define cudaGetErrorString hipGetErrorString
|
|
9
|
+
#define cudaMemGetInfo hipMemGetInfo
|
|
10
|
+
#define cudaSetDevice hipSetDevice
|
|
11
|
+
#define cudaSuccess hipSuccess
|
|
12
|
+
#else
|
|
13
|
+
#include <cuda_runtime.h>
|
|
14
|
+
#include <cuda.h>
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
|
|
19
|
+
|
|
20
|
+
bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
21
|
+
int current_device;
|
|
22
|
+
auto getDeviceResult = cudaGetDevice(¤t_device);
|
|
23
|
+
|
|
24
|
+
if (getDeviceResult != cudaSuccess) {
|
|
25
|
+
errorLogCallback(cudaGetErrorString(getDeviceResult));
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (device == current_device) {
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const auto setDeviceResult = cudaSetDevice(device);
|
|
34
|
+
|
|
35
|
+
if (setDeviceResult != cudaSuccess) {
|
|
36
|
+
errorLogCallback(cudaGetErrorString(setDeviceResult));
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
44
|
+
gpuInfoSetCudaDevice(device, errorLogCallback);
|
|
45
|
+
|
|
46
|
+
size_t freeMem;
|
|
47
|
+
size_t totalMem;
|
|
48
|
+
auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);
|
|
49
|
+
|
|
50
|
+
if (getMemInfoResult != cudaSuccess) {
|
|
51
|
+
errorLogCallback(cudaGetErrorString(getMemInfoResult));
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
*total = totalMem;
|
|
56
|
+
*used = totalMem - freeMem;
|
|
57
|
+
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
62
|
+
int deviceCount;
|
|
63
|
+
auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
|
|
64
|
+
|
|
65
|
+
if (getDeviceCountResult != cudaSuccess) {
|
|
66
|
+
errorLogCallback(cudaGetErrorString(getDeviceCountResult));
|
|
67
|
+
return -1;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return deviceCount;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
74
|
+
int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
|
|
75
|
+
|
|
76
|
+
if (deviceCount < 0) {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
size_t usedMem = 0;
|
|
81
|
+
size_t totalMem = 0;
|
|
82
|
+
|
|
83
|
+
for (int i = 0; i < deviceCount; i++) {
|
|
84
|
+
size_t deviceUsedMem;
|
|
85
|
+
size_t deviceTotalMem;
|
|
86
|
+
|
|
87
|
+
if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
usedMem += deviceUsedMem;
|
|
92
|
+
totalMem += deviceTotalMem;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
*total = totalMem;
|
|
96
|
+
*used = usedMem;
|
|
97
|
+
|
|
98
|
+
return true;
|
|
99
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#include <stdint.h>
|
|
2
|
+
#import <Metal/Metal.h>
|
|
3
|
+
|
|
4
|
+
void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
|
|
5
|
+
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
|
6
|
+
|
|
7
|
+
if (device) {
|
|
8
|
+
*total = device.recommendedMaxWorkingSetSize;
|
|
9
|
+
*used = device.currentAllocatedSize;
|
|
10
|
+
} else {
|
|
11
|
+
*total = 0;
|
|
12
|
+
*used = 0;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
[device release];
|
|
16
|
+
device = nil;
|
|
17
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2174"}}}
|