node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/ChatWrapper.d.ts +49 -0
- package/dist/ChatWrapper.js +120 -0
- package/dist/ChatWrapper.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +11 -4
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +23 -0
- package/dist/bindings/Llama.js +225 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +86 -0
- package/dist/bindings/getLlama.js +225 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +33 -0
- package/dist/bindings/types.js +30 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +155 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +12 -0
- package/dist/bindings/utils/compileLLamaCpp.js +157 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +85 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +28 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +18 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
- package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
- package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
- package/dist/chatWrappers/FalconChatWrapper.js +104 -0
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
- package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
- package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
- package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +5 -5
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +28 -17
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
- package/dist/cli/cli.js +4 -0
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +2 -1
- package/dist/cli/commands/BuildCommand.js +50 -10
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +10 -3
- package/dist/cli/commands/ChatCommand.js +152 -42
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.js +4 -6
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +59 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +2 -1
- package/dist/cli/commands/DownloadCommand.js +47 -40
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +7 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/config.d.ts +10 -3
- package/dist/config.js +18 -7
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +185 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +705 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +22 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +121 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +135 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator/LlamaChatSession}/LlamaChatSession.d.ts +59 -25
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +219 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +20 -23
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +71 -105
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.d.ts +6 -14
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +37 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +78 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/evaluator/LlamaGrammar.d.ts +30 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -18
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +4 -2
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaModel.d.ts +14 -5
- package/dist/{llamaEvaluator → evaluator}/LlamaModel.js +18 -9
- package/dist/evaluator/LlamaModel.js.map +1 -0
- package/dist/index.d.ts +27 -16
- package/dist/index.js +26 -14
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +41 -3
- package/dist/types.js +5 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +42 -0
- package/dist/utils/LlamaText.js +207 -0
- package/dist/utils/LlamaText.js.map +1 -0
- package/dist/utils/StopGenerationDetector.d.ts +28 -0
- package/dist/utils/StopGenerationDetector.js +205 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +30 -0
- package/dist/utils/TokenStreamRegulator.js +96 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
- package/dist/utils/appendUserMessageToChatHistory.js +18 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
- package/dist/utils/cmake.js +16 -11
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +2 -0
- package/dist/utils/compareTokens.js +4 -0
- package/dist/utils/compareTokens.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
- package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
- package/dist/utils/gbnfJson/types.d.ts +1 -1
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +9 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gitReleaseBundles.js +6 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/parseModelTypeDescription.d.ts +1 -1
- package/dist/utils/prettyPrintObject.d.ts +1 -0
- package/dist/utils/prettyPrintObject.js +40 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +1 -0
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveChatWrapper.d.ts +4 -0
- package/dist/utils/resolveChatWrapper.js +16 -0
- package/dist/utils/resolveChatWrapper.js.map +1 -0
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +1 -1
- package/dist/utils/spawnCommand.js +4 -2
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +9 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
- package/dist/utils/truncateTextAndRoundToWords.js +27 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.js +11 -1
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +11 -8
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +87 -2
- package/llama/addon.cpp +256 -22
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +99 -0
- package/llama/gpuInfo/cuda-gpu-info.h +7 -0
- package/llama/gpuInfo/metal-gpu-info.h +5 -0
- package/llama/gpuInfo/metal-gpu-info.mm +17 -0
- package/llama/llama.cpp.info.json +4 -0
- package/llamaBins/linux-arm64/.buildMetadata.json +1 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/.buildMetadata.json +1 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/.buildMetadata.json +1 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -0
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -0
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +6119 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/.buildMetadata.json +1 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/.buildMetadata.json +1 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -0
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/package.json +37 -15
- package/dist/ChatPromptWrapper.d.ts +0 -11
- package/dist/ChatPromptWrapper.js +0 -20
- package/dist/ChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
- package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
- package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
- package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
- package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
- package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
- package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
- package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
- package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
- package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
- package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -15
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -39
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -19
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession.js +0 -290
- package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -32
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -127
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/getTextCompletion.d.ts +0 -3
- package/dist/utils/getTextCompletion.js +0 -12
- package/dist/utils/getTextCompletion.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/usedBin.json +0 -3
- package/llamaBins/mac-arm64/ggml-metal.metal +0 -2929
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/ggml-metal.metal +0 -2929
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
package/llama/addon.cpp
CHANGED
|
@@ -9,21 +9,84 @@
|
|
|
9
9
|
#include "llama.h"
|
|
10
10
|
#include "napi.h"
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
#ifdef GPU_INFO_USE_CUBLAS
|
|
13
|
+
# include "gpuInfo/cuda-gpu-info.h"
|
|
14
|
+
#endif
|
|
15
|
+
#ifdef GPU_INFO_USE_METAL
|
|
16
|
+
# include "gpuInfo/metal-gpu-info.h"
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
struct addon_logger_log {
|
|
21
|
+
public:
|
|
22
|
+
const int logLevelNumber;
|
|
23
|
+
const std::stringstream* stringStream;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
|
|
27
|
+
|
|
28
|
+
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
29
|
+
void addonCallJsLogCallback(
|
|
30
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
31
|
+
);
|
|
32
|
+
using AddonThreadSafeLogCallbackFunction =
|
|
33
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
|
34
|
+
|
|
35
|
+
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
36
|
+
bool addonJsLoggerCallbackSet = false;
|
|
37
|
+
int addonLoggerLogLevel = 5;
|
|
38
|
+
|
|
39
|
+
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
|
|
13
40
|
std::vector<char> result(8, 0);
|
|
14
41
|
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size());
|
|
15
42
|
if (n_tokens < 0) {
|
|
16
43
|
result.resize(-n_tokens);
|
|
17
44
|
int check = llama_token_to_piece(model, token, result.data(), result.size());
|
|
18
45
|
GGML_ASSERT(check == -n_tokens);
|
|
19
|
-
}
|
|
20
|
-
else {
|
|
46
|
+
} else {
|
|
21
47
|
result.resize(n_tokens);
|
|
22
48
|
}
|
|
23
49
|
|
|
24
50
|
return std::string(result.data(), result.size());
|
|
25
51
|
}
|
|
26
52
|
|
|
53
|
+
#ifdef GPU_INFO_USE_CUBLAS
|
|
54
|
+
void lodCudaError(const char* message) {
|
|
55
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
56
|
+
}
|
|
57
|
+
#endif
|
|
58
|
+
|
|
59
|
+
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
60
|
+
uint64_t total = 0;
|
|
61
|
+
uint64_t used = 0;
|
|
62
|
+
|
|
63
|
+
#ifdef GPU_INFO_USE_CUBLAS
|
|
64
|
+
size_t cudaDeviceTotal = 0;
|
|
65
|
+
size_t cudaDeviceUsed = 0;
|
|
66
|
+
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
|
|
67
|
+
|
|
68
|
+
if (cudeGetInfoSuccess) {
|
|
69
|
+
total += cudaDeviceTotal;
|
|
70
|
+
used += cudaDeviceUsed;
|
|
71
|
+
}
|
|
72
|
+
#endif
|
|
73
|
+
|
|
74
|
+
#ifdef GPU_INFO_USE_METAL
|
|
75
|
+
uint64_t metalDeviceTotal = 0;
|
|
76
|
+
uint64_t metalDeviceUsed = 0;
|
|
77
|
+
get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
|
|
78
|
+
|
|
79
|
+
total += metalDeviceTotal;
|
|
80
|
+
used += metalDeviceUsed;
|
|
81
|
+
#endif
|
|
82
|
+
|
|
83
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
84
|
+
result.Set("total", Napi::Number::From(info.Env(), total));
|
|
85
|
+
result.Set("used", Napi::Number::From(info.Env(), used));
|
|
86
|
+
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
89
|
+
|
|
27
90
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
28
91
|
public:
|
|
29
92
|
llama_model_params model_params;
|
|
@@ -95,8 +158,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
95
158
|
}
|
|
96
159
|
|
|
97
160
|
std::string text = info[0].As<Napi::String>().Utf8Value();
|
|
161
|
+
bool specialTokens = info[1].As<Napi::Boolean>().Value();
|
|
98
162
|
|
|
99
|
-
std::vector<llama_token> tokens = llama_tokenize(model, text,
|
|
163
|
+
std::vector<llama_token> tokens = llama_tokenize(model, text, false, specialTokens);
|
|
100
164
|
|
|
101
165
|
Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
|
|
102
166
|
for (size_t i = 0; i < tokens.size(); ++i) {
|
|
@@ -162,7 +226,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
162
226
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
163
227
|
return info.Env().Undefined();
|
|
164
228
|
}
|
|
165
|
-
|
|
229
|
+
|
|
166
230
|
char model_desc[128];
|
|
167
231
|
int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
|
|
168
232
|
|
|
@@ -265,7 +329,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
265
329
|
InstanceMethod("suffixToken", &AddonModel::SuffixToken),
|
|
266
330
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
267
331
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
268
|
-
InstanceMethod("dispose", &AddonModel::Dispose)
|
|
332
|
+
InstanceMethod("dispose", &AddonModel::Dispose),
|
|
269
333
|
}
|
|
270
334
|
)
|
|
271
335
|
);
|
|
@@ -352,29 +416,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
352
416
|
context_params.seed = -1;
|
|
353
417
|
context_params.n_ctx = 4096;
|
|
354
418
|
context_params.n_threads = 6;
|
|
355
|
-
context_params.n_threads_batch
|
|
419
|
+
context_params.n_threads_batch = context_params.n_threads;
|
|
356
420
|
|
|
357
421
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
358
422
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
359
423
|
|
|
360
|
-
if (options.Has("
|
|
361
|
-
context_params.seed =
|
|
424
|
+
if (options.Has("noSeed")) {
|
|
425
|
+
context_params.seed = time(NULL);
|
|
426
|
+
} else if (options.Has("seed")) {
|
|
427
|
+
context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
|
|
362
428
|
}
|
|
363
429
|
|
|
364
430
|
if (options.Has("contextSize")) {
|
|
365
|
-
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().
|
|
431
|
+
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
|
|
366
432
|
}
|
|
367
433
|
|
|
368
434
|
if (options.Has("batchSize")) {
|
|
369
|
-
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
if (options.Has("f16Kv")) {
|
|
373
|
-
context_params.f16_kv = options.Get("f16Kv").As<Napi::Boolean>().Value();
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
if (options.Has("logitsAll")) {
|
|
377
|
-
context_params.logits_all = options.Get("logitsAll").As<Napi::Boolean>().Value();
|
|
435
|
+
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
|
378
436
|
}
|
|
379
437
|
|
|
380
438
|
if (options.Has("embedding")) {
|
|
@@ -382,8 +440,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
382
440
|
}
|
|
383
441
|
|
|
384
442
|
if (options.Has("threads")) {
|
|
385
|
-
|
|
386
|
-
|
|
443
|
+
const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
|
|
444
|
+
const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
|
|
445
|
+
|
|
446
|
+
context_params.n_threads = resolved_n_threads;
|
|
447
|
+
context_params.n_threads_batch = resolved_n_threads;
|
|
387
448
|
}
|
|
388
449
|
}
|
|
389
450
|
|
|
@@ -533,6 +594,41 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
533
594
|
Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
|
|
534
595
|
Napi::Value SampleToken(const Napi::CallbackInfo& info);
|
|
535
596
|
|
|
597
|
+
Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
|
|
598
|
+
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
599
|
+
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
600
|
+
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
601
|
+
|
|
602
|
+
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
603
|
+
llama_grammar_accept_token(ctx, (grammar_evaluation_state)->grammar, tokenId);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
return info.Env().Undefined();
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
|
|
610
|
+
if (disposed) {
|
|
611
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
612
|
+
return info.Env().Undefined();
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
const int n_embd = llama_n_embd(model->model);
|
|
616
|
+
const auto* embeddings = llama_get_embeddings(ctx);
|
|
617
|
+
|
|
618
|
+
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
|
|
619
|
+
for (size_t i = 0; i < n_embd; ++i) {
|
|
620
|
+
result[i] = embeddings[i];
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
return result;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
|
|
627
|
+
llama_print_timings(ctx);
|
|
628
|
+
llama_reset_timings(ctx);
|
|
629
|
+
return info.Env().Undefined();
|
|
630
|
+
}
|
|
631
|
+
|
|
536
632
|
static void init(Napi::Object exports) {
|
|
537
633
|
exports.Set(
|
|
538
634
|
"AddonContext",
|
|
@@ -548,7 +644,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
548
644
|
InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
|
|
549
645
|
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
|
|
550
646
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
551
|
-
InstanceMethod("
|
|
647
|
+
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
|
|
648
|
+
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
649
|
+
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
650
|
+
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
552
651
|
}
|
|
553
652
|
)
|
|
554
653
|
);
|
|
@@ -610,6 +709,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
610
709
|
bool use_grammar = false;
|
|
611
710
|
llama_token result;
|
|
612
711
|
float temperature = 0.0f;
|
|
712
|
+
float min_p = 0;
|
|
613
713
|
int32_t top_k = 40;
|
|
614
714
|
float top_p = 0.95f;
|
|
615
715
|
float repeat_penalty = 1.10f; // 1.0 = disabled
|
|
@@ -633,6 +733,10 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
633
733
|
temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
|
|
634
734
|
}
|
|
635
735
|
|
|
736
|
+
if (options.Has("minP")) {
|
|
737
|
+
min_p = options.Get("minP").As<Napi::Number>().FloatValue();
|
|
738
|
+
}
|
|
739
|
+
|
|
636
740
|
if (options.Has("topK")) {
|
|
637
741
|
top_k = options.Get("topK").As<Napi::Number>().Int32Value();
|
|
638
742
|
}
|
|
@@ -734,6 +838,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
734
838
|
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
|
|
735
839
|
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
|
|
736
840
|
llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
|
|
841
|
+
llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
|
|
737
842
|
llama_sample_temp(ctx->ctx, &candidates_p, temperature);
|
|
738
843
|
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
|
|
739
844
|
}
|
|
@@ -764,15 +869,144 @@ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
|
764
869
|
return Napi::String::From(info.Env(), llama_print_system_info());
|
|
765
870
|
}
|
|
766
871
|
|
|
872
|
+
int addonGetGgmlLogLevelNumber(ggml_log_level level) {
|
|
873
|
+
switch (level) {
|
|
874
|
+
case GGML_LOG_LEVEL_ERROR: return 2;
|
|
875
|
+
case GGML_LOG_LEVEL_WARN: return 3;
|
|
876
|
+
case GGML_LOG_LEVEL_INFO: return 4;
|
|
877
|
+
case GGML_LOG_LEVEL_DEBUG: return 5;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
return 1;
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
void addonCallJsLogCallback(
|
|
884
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
|
|
885
|
+
) {
|
|
886
|
+
bool called = false;
|
|
887
|
+
|
|
888
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
889
|
+
try {
|
|
890
|
+
callback.Call({
|
|
891
|
+
Napi::Number::New(env, data->logLevelNumber),
|
|
892
|
+
Napi::String::New(env, data->stringStream->str()),
|
|
893
|
+
});
|
|
894
|
+
called = true;
|
|
895
|
+
} catch (const Napi::Error& e) {
|
|
896
|
+
called = false;
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
if (!called && data != nullptr) {
|
|
901
|
+
if (data->logLevelNumber == 2) {
|
|
902
|
+
fputs(data->stringStream->str().c_str(), stderr);
|
|
903
|
+
fflush(stderr);
|
|
904
|
+
} else {
|
|
905
|
+
fputs(data->stringStream->str().c_str(), stdout);
|
|
906
|
+
fflush(stdout);
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
if (data != nullptr) {
|
|
911
|
+
delete data->stringStream;
|
|
912
|
+
delete data;
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
|
|
917
|
+
int logLevelNumber = addonGetGgmlLogLevelNumber(level);
|
|
918
|
+
|
|
919
|
+
if (logLevelNumber > addonLoggerLogLevel) {
|
|
920
|
+
return;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
if (addonJsLoggerCallbackSet) {
|
|
924
|
+
std::stringstream* stringStream = new std::stringstream();
|
|
925
|
+
if (text != nullptr) {
|
|
926
|
+
*stringStream << text;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
addon_logger_log* data = new addon_logger_log {
|
|
930
|
+
logLevelNumber,
|
|
931
|
+
stringStream,
|
|
932
|
+
};
|
|
933
|
+
|
|
934
|
+
auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
|
|
935
|
+
|
|
936
|
+
if (status == napi_ok) {
|
|
937
|
+
return;
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
if (level == 2) {
|
|
942
|
+
fputs(text, stderr);
|
|
943
|
+
fflush(stderr);
|
|
944
|
+
} else {
|
|
945
|
+
fputs(text, stdout);
|
|
946
|
+
fflush(stdout);
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
Napi::Value setLogger(const Napi::CallbackInfo& info) {
|
|
951
|
+
if (info.Length() < 1 || !info[0].IsFunction()) {
|
|
952
|
+
if (addonJsLoggerCallbackSet) {
|
|
953
|
+
addonJsLoggerCallbackSet = false;
|
|
954
|
+
addonThreadSafeLoggerCallback.Release();
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
return info.Env().Undefined();
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
auto addonLoggerJSCallback = info[0].As<Napi::Function>();
|
|
961
|
+
AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
962
|
+
addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
|
|
963
|
+
info.Env(),
|
|
964
|
+
addonLoggerJSCallback,
|
|
965
|
+
"loggerCallback",
|
|
966
|
+
0,
|
|
967
|
+
1,
|
|
968
|
+
context,
|
|
969
|
+
[](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
|
|
970
|
+
addonJsLoggerCallbackSet = false;
|
|
971
|
+
|
|
972
|
+
delete ctx;
|
|
973
|
+
}
|
|
974
|
+
);
|
|
975
|
+
addonJsLoggerCallbackSet = true;
|
|
976
|
+
|
|
977
|
+
// prevent blocking the main node process from exiting due to active resources
|
|
978
|
+
addonThreadSafeLoggerCallback.Unref(info.Env());
|
|
979
|
+
|
|
980
|
+
return info.Env().Undefined();
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
984
|
+
if (info.Length() < 1 || !info[0].IsNumber()) {
|
|
985
|
+
addonLoggerLogLevel = 5;
|
|
986
|
+
|
|
987
|
+
return info.Env().Undefined();
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
|
|
991
|
+
|
|
992
|
+
return info.Env().Undefined();
|
|
993
|
+
}
|
|
994
|
+
|
|
767
995
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
768
996
|
llama_backend_init(false);
|
|
769
997
|
exports.DefineProperties({
|
|
770
998
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
999
|
+
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
1000
|
+
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
|
1001
|
+
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
771
1002
|
});
|
|
772
1003
|
AddonModel::init(exports);
|
|
773
1004
|
AddonGrammar::init(exports);
|
|
774
1005
|
AddonGrammarEvaluationState::init(exports);
|
|
775
1006
|
AddonContext::init(exports);
|
|
1007
|
+
|
|
1008
|
+
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
|
1009
|
+
|
|
776
1010
|
return exports;
|
|
777
1011
|
}
|
|
778
1012
|
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#include <stddef.h>
|
|
2
|
+
|
|
3
|
+
#if defined(GPU_INFO_USE_HIPBLAS)
|
|
4
|
+
#include <hip/hip_runtime.h>
|
|
5
|
+
#include <hipblas/hipblas.h>
|
|
6
|
+
#define cudaGetDevice hipGetDevice
|
|
7
|
+
#define cudaGetDeviceCount hipGetDeviceCount
|
|
8
|
+
#define cudaGetErrorString hipGetErrorString
|
|
9
|
+
#define cudaMemGetInfo hipMemGetInfo
|
|
10
|
+
#define cudaSetDevice hipSetDevice
|
|
11
|
+
#define cudaSuccess hipSuccess
|
|
12
|
+
#else
|
|
13
|
+
#include <cuda_runtime.h>
|
|
14
|
+
#include <cuda.h>
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
|
|
19
|
+
|
|
20
|
+
bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
21
|
+
int current_device;
|
|
22
|
+
auto getDeviceResult = cudaGetDevice(¤t_device);
|
|
23
|
+
|
|
24
|
+
if (getDeviceResult != cudaSuccess) {
|
|
25
|
+
errorLogCallback(cudaGetErrorString(getDeviceResult));
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (device == current_device) {
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const auto setDeviceResult = cudaSetDevice(device);
|
|
34
|
+
|
|
35
|
+
if (setDeviceResult != cudaSuccess) {
|
|
36
|
+
errorLogCallback(cudaGetErrorString(setDeviceResult));
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
44
|
+
gpuInfoSetCudaDevice(device, errorLogCallback);
|
|
45
|
+
|
|
46
|
+
size_t freeMem;
|
|
47
|
+
size_t totalMem;
|
|
48
|
+
auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);
|
|
49
|
+
|
|
50
|
+
if (getMemInfoResult != cudaSuccess) {
|
|
51
|
+
errorLogCallback(cudaGetErrorString(getMemInfoResult));
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
*total = totalMem;
|
|
56
|
+
*used = totalMem - freeMem;
|
|
57
|
+
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
62
|
+
int deviceCount;
|
|
63
|
+
auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
|
|
64
|
+
|
|
65
|
+
if (getDeviceCountResult != cudaSuccess) {
|
|
66
|
+
errorLogCallback(cudaGetErrorString(getDeviceCountResult));
|
|
67
|
+
return -1;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return deviceCount;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
|
|
74
|
+
int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
|
|
75
|
+
|
|
76
|
+
if (deviceCount < 0) {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
size_t usedMem = 0;
|
|
81
|
+
size_t totalMem = 0;
|
|
82
|
+
|
|
83
|
+
for (int i = 0; i < deviceCount; i++) {
|
|
84
|
+
size_t deviceUsedMem;
|
|
85
|
+
size_t deviceTotalMem;
|
|
86
|
+
|
|
87
|
+
if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
usedMem += deviceUsedMem;
|
|
92
|
+
totalMem += deviceTotalMem;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
*total = totalMem;
|
|
96
|
+
*used = usedMem;
|
|
97
|
+
|
|
98
|
+
return true;
|
|
99
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#include <stdint.h>
|
|
2
|
+
#import <Metal/Metal.h>
|
|
3
|
+
|
|
4
|
+
void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
|
|
5
|
+
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
|
6
|
+
|
|
7
|
+
if (device) {
|
|
8
|
+
*total = device.recommendedMaxWorkingSetSize;
|
|
9
|
+
*used = device.currentAllocatedSize;
|
|
10
|
+
} else {
|
|
11
|
+
*total = 0;
|
|
12
|
+
*used = 0;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
[device release];
|
|
16
|
+
device = nil;
|
|
17
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"arm64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"armv7l","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","arch":"x64","computeLayers":{"metal":false,"cuda":true},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","arch":"arm64","computeLayers":{"metal":true,"cuda":false},"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2127"}}}
|