node-llama-cpp 3.0.0-beta.14 → 3.0.0-beta.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/ChatWrapper.js +4 -0
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +23 -0
- package/dist/bindings/Llama.d.ts +11 -0
- package/dist/bindings/Llama.js +56 -4
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +20 -2
- package/dist/bindings/getLlama.js +15 -5
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +15 -0
- package/dist/bindings/types.js +27 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +26 -25
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +2 -2
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/FalconChatWrapper.js +28 -11
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
- package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +6 -9
- package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +208 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +1 -1
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.js +1 -1
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +9 -5
- package/dist/cli/commands/ChatCommand.js +203 -118
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +5 -5
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +5 -3
- package/dist/cli/commands/CompleteCommand.js +136 -85
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +4 -4
- package/dist/cli/commands/DownloadCommand.js +3 -4
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +5 -3
- package/dist/cli/commands/InfillCommand.js +138 -89
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
- package/dist/cli/commands/inspect/InspectCommand.js +17 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +11 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +121 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +136 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +15 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +579 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +281 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +5 -0
- package/dist/cli/utils/consolePromptQuestion.js +80 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +70 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +494 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +17 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/config.d.ts +5 -0
- package/dist/config.js +11 -2
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +2 -0
- package/dist/consts.js +8 -0
- package/dist/consts.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +8 -1
- package/dist/evaluator/LlamaChat/LlamaChat.js +15 -6
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +9 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +5 -3
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +9 -2
- package/dist/evaluator/LlamaCompletion.js +11 -6
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +30 -3
- package/dist/evaluator/LlamaContext/LlamaContext.js +227 -102
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +57 -6
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +23 -2
- package/dist/evaluator/LlamaEmbeddingContext.js +4 -5
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +3 -2
- package/dist/evaluator/LlamaGrammar.js +3 -2
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +56 -6
- package/dist/evaluator/LlamaModel.js +99 -7
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/evaluator/TokenBias.d.ts +22 -0
- package/dist/evaluator/TokenBias.js +33 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +42 -0
- package/dist/gguf/insights/GgufInsights.js +361 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +52 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +58 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +30 -0
- package/dist/gguf/readGgufFileInfo.js +38 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/index.d.ts +14 -7
- package/dist/index.js +12 -6
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +25 -10
- package/dist/utils/LlamaText.js +205 -23
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.js +3 -1
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/getConsoleLogPrefix.js +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/gitReleaseBundles.js +68 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +4 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/prettyPrintObject.d.ts +10 -1
- package/dist/utils/prettyPrintObject.js +57 -13
- package/dist/utils/prettyPrintObject.js.map +1 -1
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +1 -1
- package/dist/utils/tokenizeInput.js +6 -3
- package/dist/utils/tokenizeInput.js.map +1 -1
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +14 -8
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +23 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.js +1 -1
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/CMakeLists.txt +5 -5
- package/llama/addon.cpp +159 -9
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
- package/llama/gpuInfo/cuda-gpu-info.h +3 -0
- package/llama/gpuInfo/metal-gpu-info.h +4 -1
- package/llama/gpuInfo/metal-gpu-info.mm +14 -1
- package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
- package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
- package/llama/grammars/README.md +10 -0
- package/llama/llama.cpp.info.json +1 -1
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-arm64/llama-addon.exp +0 -0
- package/llamaBins/win-arm64/llama-addon.lib +0 -0
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +15 -12
- package/dist/TemplateChatWrapper.js.map +0 -1
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/cli/commands/InspectCommand.js +0 -113
- package/dist/cli/commands/InspectCommand.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/gguf/GGUFInsights.d.ts +0 -28
- package/dist/gguf/GGUFInsights.js +0 -58
- package/dist/gguf/GGUFInsights.js.map +0 -1
- package/dist/gguf/GGUFMetadata.d.ts +0 -19
- package/dist/gguf/GGUFMetadata.js +0 -38
- package/dist/gguf/GGUFMetadata.js.map +0 -1
- package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
- package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
- package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
- package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
- package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
- package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
- package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
- package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
- package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -11
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
- package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
- package/dist/gguf/ggufParser/GGUFParser.js +0 -123
- package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
- package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
- package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
- package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
- package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
- package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
package/llama/addon.cpp
CHANGED
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
#include <algorithm>
|
|
4
4
|
#include <sstream>
|
|
5
5
|
#include <vector>
|
|
6
|
+
#include <unordered_map>
|
|
6
7
|
|
|
7
8
|
#include "common.h"
|
|
8
9
|
#include "common/grammar-parser.h"
|
|
9
10
|
#include "llama.h"
|
|
10
11
|
#include "napi.h"
|
|
11
12
|
|
|
12
|
-
#ifdef
|
|
13
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
13
14
|
# include "gpuInfo/cuda-gpu-info.h"
|
|
14
15
|
#endif
|
|
15
16
|
#ifdef GPU_INFO_USE_VULKAN
|
|
@@ -121,7 +122,7 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
|
|
|
121
122
|
return std::string(result.data(), result.size());
|
|
122
123
|
}
|
|
123
124
|
|
|
124
|
-
#ifdef
|
|
125
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
125
126
|
void logCudaError(const char* message) {
|
|
126
127
|
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
127
128
|
}
|
|
@@ -136,7 +137,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
136
137
|
uint64_t total = 0;
|
|
137
138
|
uint64_t used = 0;
|
|
138
139
|
|
|
139
|
-
#ifdef
|
|
140
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
140
141
|
size_t cudaDeviceTotal = 0;
|
|
141
142
|
size_t cudaDeviceUsed = 0;
|
|
142
143
|
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
|
|
@@ -161,7 +162,7 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
161
162
|
#ifdef GPU_INFO_USE_METAL
|
|
162
163
|
uint64_t metalDeviceTotal = 0;
|
|
163
164
|
uint64_t metalDeviceUsed = 0;
|
|
164
|
-
|
|
165
|
+
getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
|
|
165
166
|
|
|
166
167
|
total += metalDeviceTotal;
|
|
167
168
|
used += metalDeviceUsed;
|
|
@@ -174,8 +175,34 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
|
|
|
174
175
|
return result;
|
|
175
176
|
}
|
|
176
177
|
|
|
178
|
+
Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
|
|
179
|
+
std::vector<std::string> deviceNames;
|
|
180
|
+
|
|
181
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
182
|
+
gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
|
|
183
|
+
#endif
|
|
184
|
+
|
|
185
|
+
#ifdef GPU_INFO_USE_VULKAN
|
|
186
|
+
gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
|
|
187
|
+
#endif
|
|
188
|
+
|
|
189
|
+
#ifdef GPU_INFO_USE_METAL
|
|
190
|
+
getMetalGpuDeviceNames(&deviceNames);
|
|
191
|
+
#endif
|
|
192
|
+
|
|
193
|
+
Napi::Object result = Napi::Object::New(info.Env());
|
|
194
|
+
|
|
195
|
+
Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
|
|
196
|
+
for (size_t i = 0; i < deviceNames.size(); ++i) {
|
|
197
|
+
deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
|
|
198
|
+
}
|
|
199
|
+
result.Set("deviceNames", deviceNamesNapiArray);
|
|
200
|
+
|
|
201
|
+
return result;
|
|
202
|
+
}
|
|
203
|
+
|
|
177
204
|
Napi::Value getGpuType(const Napi::CallbackInfo& info) {
|
|
178
|
-
#ifdef
|
|
205
|
+
#ifdef GPU_INFO_USE_CUDA
|
|
179
206
|
return Napi::String::New(info.Env(), "cuda");
|
|
180
207
|
#endif
|
|
181
208
|
|
|
@@ -507,6 +534,16 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
507
534
|
|
|
508
535
|
return Napi::Number::From(info.Env(), int32_t(tokenType));
|
|
509
536
|
}
|
|
537
|
+
Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
|
|
538
|
+
if (disposed) {
|
|
539
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
540
|
+
return info.Env().Undefined();
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
auto vocabularyType = llama_vocab_type(model);
|
|
544
|
+
|
|
545
|
+
return Napi::Number::From(info.Env(), int32_t(vocabularyType));
|
|
546
|
+
}
|
|
510
547
|
Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
|
|
511
548
|
const int addBos = llama_add_bos_token(model);
|
|
512
549
|
|
|
@@ -515,6 +552,10 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
515
552
|
return Napi::Boolean::New(info.Env(), shouldPrependBos);
|
|
516
553
|
}
|
|
517
554
|
|
|
555
|
+
Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
|
|
556
|
+
return Napi::Number::From(info.Env(), llama_model_size(model));
|
|
557
|
+
}
|
|
558
|
+
|
|
518
559
|
static void init(Napi::Object exports) {
|
|
519
560
|
exports.Set(
|
|
520
561
|
"AddonModel",
|
|
@@ -540,7 +581,9 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
540
581
|
InstanceMethod("eotToken", &AddonModel::EotToken),
|
|
541
582
|
InstanceMethod("getTokenString", &AddonModel::GetTokenString),
|
|
542
583
|
InstanceMethod("getTokenType", &AddonModel::GetTokenType),
|
|
584
|
+
InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
|
|
543
585
|
InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
|
|
586
|
+
InstanceMethod("getModelSize", &AddonModel::GetModelSize),
|
|
544
587
|
InstanceMethod("dispose", &AddonModel::Dispose),
|
|
545
588
|
}
|
|
546
589
|
)
|
|
@@ -822,6 +865,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
822
865
|
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
|
|
823
866
|
}
|
|
824
867
|
|
|
868
|
+
if (options.Has("sequences")) {
|
|
869
|
+
context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
|
|
870
|
+
}
|
|
871
|
+
|
|
825
872
|
if (options.Has("embeddings")) {
|
|
826
873
|
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
827
874
|
}
|
|
@@ -1039,6 +1086,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
1039
1086
|
return result;
|
|
1040
1087
|
}
|
|
1041
1088
|
|
|
1089
|
+
Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
|
|
1090
|
+
if (disposed) {
|
|
1091
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1092
|
+
return info.Env().Undefined();
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1042
1098
|
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
|
|
1043
1099
|
llama_print_timings(ctx);
|
|
1044
1100
|
llama_reset_timings(ctx);
|
|
@@ -1063,6 +1119,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
1063
1119
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
1064
1120
|
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
|
|
1065
1121
|
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
1122
|
+
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
|
|
1066
1123
|
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
1067
1124
|
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
1068
1125
|
}
|
|
@@ -1163,7 +1220,7 @@ class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
|
|
1163
1220
|
}
|
|
1164
1221
|
void OnOK() {
|
|
1165
1222
|
if (context->contextLoaded) {
|
|
1166
|
-
uint64_t contextMemorySize =
|
|
1223
|
+
uint64_t contextMemorySize = llama_state_get_size(context->ctx);
|
|
1167
1224
|
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
|
|
1168
1225
|
context->loadedContextMemorySize = contextMemorySize;
|
|
1169
1226
|
}
|
|
@@ -1278,6 +1335,8 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
1278
1335
|
float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
|
|
1279
1336
|
float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
|
|
1280
1337
|
std::vector<llama_token> repeat_penalty_tokens;
|
|
1338
|
+
std::unordered_map<llama_token, float> tokenBiases;
|
|
1339
|
+
bool useTokenBiases = false;
|
|
1281
1340
|
bool use_repeat_penalty = false;
|
|
1282
1341
|
|
|
1283
1342
|
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
|
@@ -1322,6 +1381,19 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
1322
1381
|
use_repeat_penalty = true;
|
|
1323
1382
|
}
|
|
1324
1383
|
|
|
1384
|
+
if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
|
|
1385
|
+
Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
|
|
1386
|
+
Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
|
|
1387
|
+
|
|
1388
|
+
if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
|
|
1389
|
+
for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
|
|
1390
|
+
tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
useTokenBiases = true;
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
|
|
1325
1397
|
if (options.Has("repeatPenaltyPresencePenalty")) {
|
|
1326
1398
|
repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
|
|
1327
1399
|
}
|
|
@@ -1370,18 +1442,33 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
1370
1442
|
// Select the best prediction.
|
|
1371
1443
|
auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
|
|
1372
1444
|
auto n_vocab = llama_n_vocab(ctx->model->model);
|
|
1445
|
+
auto eos_token = llama_token_eos(ctx->model->model);
|
|
1373
1446
|
|
|
1374
1447
|
std::vector<llama_token_data> candidates;
|
|
1375
1448
|
candidates.reserve(n_vocab);
|
|
1376
1449
|
|
|
1377
1450
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
1378
|
-
|
|
1451
|
+
auto logit = logits[token_id];
|
|
1452
|
+
|
|
1453
|
+
if (useTokenBiases) {
|
|
1454
|
+
bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
|
|
1455
|
+
if (hasTokenBias) {
|
|
1456
|
+
auto logitBias = tokenBiases.at(token_id);
|
|
1457
|
+
if (logitBias == -INFINITY || logitBias < -INFINITY) {
|
|
1458
|
+
if (token_id != eos_token) {
|
|
1459
|
+
logit = -INFINITY;
|
|
1460
|
+
}
|
|
1461
|
+
} else {
|
|
1462
|
+
logit += logitBias;
|
|
1463
|
+
}
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
|
|
1379
1468
|
}
|
|
1380
1469
|
|
|
1381
1470
|
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
1382
1471
|
|
|
1383
|
-
auto eos_token = llama_token_eos(ctx->model->model);
|
|
1384
|
-
|
|
1385
1472
|
if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
|
|
1386
1473
|
llama_sample_repetition_penalties(
|
|
1387
1474
|
ctx->ctx,
|
|
@@ -1396,6 +1483,13 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
1396
1483
|
|
|
1397
1484
|
if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
|
|
1398
1485
|
llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
|
|
1486
|
+
|
|
1487
|
+
if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
|
|
1488
|
+
// logit biases caused grammar sampling to fail, so sampling again without logit biases
|
|
1489
|
+
useTokenBiases = false;
|
|
1490
|
+
SampleToken();
|
|
1491
|
+
return;
|
|
1492
|
+
}
|
|
1399
1493
|
}
|
|
1400
1494
|
|
|
1401
1495
|
if (temperature <= 0) {
|
|
@@ -1444,6 +1538,55 @@ Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
|
1444
1538
|
return Napi::String::From(info.Env(), llama_print_system_info());
|
|
1445
1539
|
}
|
|
1446
1540
|
|
|
1541
|
+
Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
|
|
1542
|
+
return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
|
|
1546
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mmap());
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
|
|
1550
|
+
return Napi::Boolean::New(info.Env(), llama_supports_mlock());
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1554
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1555
|
+
|
|
1556
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1557
|
+
return info.Env().Undefined();
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
|
|
1561
|
+
|
|
1562
|
+
return Napi::Number::New(info.Env(), blockSize);
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
1566
|
+
const int ggmlType = info[0].As<Napi::Number>().Int32Value();
|
|
1567
|
+
|
|
1568
|
+
if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
|
|
1569
|
+
return info.Env().Undefined();
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
|
|
1573
|
+
|
|
1574
|
+
return Napi::Number::New(info.Env(), typeSize);
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
|
|
1578
|
+
Napi::Object consts = Napi::Object::New(info.Env());
|
|
1579
|
+
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
|
|
1580
|
+
consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
|
|
1581
|
+
consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
|
|
1582
|
+
consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
|
|
1583
|
+
consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
|
|
1584
|
+
consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
|
|
1585
|
+
consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
|
|
1586
|
+
|
|
1587
|
+
return consts;
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1447
1590
|
int addonGetGgmlLogLevelNumber(ggml_log_level level) {
|
|
1448
1591
|
switch (level) {
|
|
1449
1592
|
case GGML_LOG_LEVEL_ERROR: return 2;
|
|
@@ -1693,9 +1836,16 @@ static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
|
|
1693
1836
|
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
1694
1837
|
exports.DefineProperties({
|
|
1695
1838
|
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
|
|
1839
|
+
Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
|
|
1840
|
+
Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
|
|
1841
|
+
Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
|
|
1842
|
+
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
|
|
1843
|
+
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
|
|
1844
|
+
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
|
|
1696
1845
|
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
1697
1846
|
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
|
1698
1847
|
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
1848
|
+
Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
|
|
1699
1849
|
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
|
1700
1850
|
Napi::PropertyDescriptor::Function("init", addonInit),
|
|
1701
1851
|
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
#include <stddef.h>
|
|
2
|
+
#include <vector>
|
|
3
|
+
#include <string>
|
|
2
4
|
|
|
3
5
|
#if defined(GPU_INFO_USE_HIPBLAS)
|
|
4
6
|
#include <hip/hip_runtime.h>
|
|
@@ -97,3 +99,22 @@ bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaEr
|
|
|
97
99
|
|
|
98
100
|
return true;
|
|
99
101
|
}
|
|
102
|
+
|
|
103
|
+
void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
|
|
104
|
+
int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
|
|
105
|
+
|
|
106
|
+
if (deviceCount < 0) {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (int i = 0; i < deviceCount; i++) {
|
|
111
|
+
cudaDeviceProp prop;
|
|
112
|
+
auto getDevicePropertiesResult = cudaGetDeviceProperties(&prop, i);
|
|
113
|
+
|
|
114
|
+
if (getDevicePropertiesResult != cudaSuccess) {
|
|
115
|
+
errorLogCallback(cudaGetErrorString(getDevicePropertiesResult));
|
|
116
|
+
} else {
|
|
117
|
+
(*deviceNames).push_back(std::string(prop.name));
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include <stddef.h>
|
|
4
|
+
#include <vector>
|
|
5
|
+
#include <string>
|
|
4
6
|
|
|
5
7
|
typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
|
|
6
8
|
|
|
7
9
|
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback);
|
|
10
|
+
void gpuInfoGetCudaDeviceNames(std::vector<std::string> * deviceNames, gpuInfoCudaErrorLogCallback_t errorLogCallback);
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include <stdint.h>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <vector>
|
|
4
6
|
|
|
5
|
-
void
|
|
7
|
+
void getMetalGpuInfo(uint64_t * total, uint64_t * used);
|
|
8
|
+
void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames);
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#include <stdint.h>
|
|
2
|
+
#include <vector>
|
|
3
|
+
#include <string>
|
|
2
4
|
#import <Metal/Metal.h>
|
|
3
5
|
|
|
4
|
-
void
|
|
6
|
+
void getMetalGpuInfo(uint64_t * total, uint64_t * used) {
|
|
5
7
|
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
|
6
8
|
|
|
7
9
|
if (device) {
|
|
@@ -15,3 +17,14 @@ void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
|
|
|
15
17
|
[device release];
|
|
16
18
|
device = nil;
|
|
17
19
|
}
|
|
20
|
+
|
|
21
|
+
void getMetalGpuDeviceNames(std::vector<std::string> * deviceNames) {
|
|
22
|
+
NSArray<id<MTLDevice>> *devices = MTLCopyAllDevices();
|
|
23
|
+
|
|
24
|
+
for (id<MTLDevice> device in devices) {
|
|
25
|
+
(*deviceNames).push_back(std::string(([NSString stringWithUTF8String:device.name.UTF8String]).UTF8String));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
[devices release];
|
|
29
|
+
devices = nil;
|
|
30
|
+
}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
#include <stddef.h>
|
|
2
|
+
#include <vector>
|
|
2
3
|
|
|
3
4
|
#include <vulkan/vulkan.hpp>
|
|
4
5
|
|
|
5
6
|
typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
|
|
6
7
|
|
|
7
|
-
bool
|
|
8
|
+
static bool enumerateVulkanDevices(size_t* total, size_t* used, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
|
|
8
9
|
vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
|
|
9
10
|
vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
|
|
10
11
|
vk::Instance instance = vk::createInstance(createInfo);
|
|
@@ -41,8 +42,14 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan
|
|
|
41
42
|
|
|
42
43
|
for (uint32_t i = 0; i < memProps.memoryHeapCount; ++i) {
|
|
43
44
|
if (memProps.memoryHeaps[i].flags & vk::MemoryHeapFlagBits::eDeviceLocal) {
|
|
44
|
-
|
|
45
|
+
const auto size = memProps.memoryHeaps[i].size;
|
|
46
|
+
totalMem += size;
|
|
45
47
|
usedMem += memoryBudgetProperties.heapUsage[i];
|
|
48
|
+
|
|
49
|
+
if (size > 0 && addDeviceNames) {
|
|
50
|
+
(*deviceNames).push_back(std::string(deviceProps.deviceName.data()));
|
|
51
|
+
}
|
|
52
|
+
|
|
46
53
|
break;
|
|
47
54
|
}
|
|
48
55
|
}
|
|
@@ -63,3 +70,14 @@ bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkan
|
|
|
63
70
|
*used = usedMem;
|
|
64
71
|
return true;
|
|
65
72
|
}
|
|
73
|
+
|
|
74
|
+
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
|
|
75
|
+
return enumerateVulkanDevices(total, used, false, nullptr, warningLogCallback);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback) {
|
|
79
|
+
size_t vulkanDeviceTotal = 0;
|
|
80
|
+
size_t vulkanDeviceUsed = 0;
|
|
81
|
+
|
|
82
|
+
return enumerateVulkanDevices(&vulkanDeviceTotal, &vulkanDeviceUsed, true, deviceNames, warningLogCallback);
|
|
83
|
+
}
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include <stddef.h>
|
|
4
|
+
#include <vector>
|
|
4
5
|
|
|
5
6
|
typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
|
|
6
7
|
|
|
7
8
|
bool gpuInfoGetTotalVulkanDevicesInfo(size_t* total, size_t* used, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
|
|
9
|
+
bool gpuInfoGetVulkanDeviceNames(std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback);
|
package/llama/grammars/README.md
CHANGED
|
@@ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
|
|
|
89
89
|
```
|
|
90
90
|
./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
|
|
91
91
|
```
|
|
92
|
+
|
|
93
|
+
## Troubleshooting
|
|
94
|
+
|
|
95
|
+
Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
|
|
96
|
+
|
|
97
|
+
### Efficient optional repetitions
|
|
98
|
+
|
|
99
|
+
A common pattern is to allow repetitions of a pattern `x` up to N times.
|
|
100
|
+
|
|
101
|
+
While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
set(CMAKE_SYSTEM_NAME Windows)
|
|
2
|
+
set(CMAKE_SYSTEM_PROCESSOR ARM64)
|
|
3
|
+
|
|
4
|
+
# Look for cl.exe in the Visual Studio installation directories
|
|
5
|
+
set(PROGRAMFILES "$ENV{ProgramFiles}")
|
|
6
|
+
set(PROGRAMFILES_X86 "$ENV{ProgramFiles\(x86\)}")
|
|
7
|
+
|
|
8
|
+
set(VS_INSTALL_PATHS
|
|
9
|
+
"${PROGRAMFILES_X86}/Microsoft Visual Studio"
|
|
10
|
+
"${PROGRAMFILES}/Microsoft Visual Studio"
|
|
11
|
+
"C:/Program Files (x86)/Microsoft Visual Studio"
|
|
12
|
+
"C:/Program Files/Microsoft Visual Studio"
|
|
13
|
+
)
|
|
14
|
+
foreach(PATH IN LISTS VS_INSTALL_PATHS)
|
|
15
|
+
if(CL_EXE_PATH)
|
|
16
|
+
break()
|
|
17
|
+
endif()
|
|
18
|
+
|
|
19
|
+
file(GLOB_RECURSE FOUND_CL_EXE "${PATH}/*/VC/Tools/MSVC/*/bin/Hostx64/arm64/cl.exe")
|
|
20
|
+
if(FOUND_CL_EXE)
|
|
21
|
+
list(GET FOUND_CL_EXE 0 CL_EXE_PATH)
|
|
22
|
+
break()
|
|
23
|
+
endif()
|
|
24
|
+
|
|
25
|
+
if(CL_EXE_PATH)
|
|
26
|
+
break()
|
|
27
|
+
endif()
|
|
28
|
+
|
|
29
|
+
file(GLOB_RECURSE FOUND_CL_EXE "${PATH}/**/*/VC/Tools/MSVC/*/bin/Hostx64/arm64/cl.exe")
|
|
30
|
+
if(FOUND_CL_EXE)
|
|
31
|
+
list(GET FOUND_CL_EXE 0 CL_EXE_PATH)
|
|
32
|
+
break()
|
|
33
|
+
endif()
|
|
34
|
+
endforeach()
|
|
35
|
+
|
|
36
|
+
if(NOT CL_EXE_PATH)
|
|
37
|
+
message(FATAL_ERROR "cl.exe not found for ARM architecture.")
|
|
38
|
+
else()
|
|
39
|
+
set(CMAKE_C_COMPILER "${CL_EXE_PATH}")
|
|
40
|
+
set(CMAKE_CXX_COMPILER "${CL_EXE_PATH}")
|
|
41
|
+
endif()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"armv7l","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"armv7l","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"linux","platformInfo":{"name":"Ubuntu","version":"22.04"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"arm64","gpu":"metal","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"arm64","gpu":"metal","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"mac","platformInfo":{"name":"macOS","version":"21.6.0"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"arm64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":false,"llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"cuda","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"
|
|
1
|
+
{"buildOptions":{"customCmakeOptions":{},"progressLogs":true,"platform":"win","platformInfo":{"name":"Windows","version":"10.0.20348"},"arch":"x64","gpu":"vulkan","llamaCpp":{"repo":"ggerganov/llama.cpp","release":"b2665"}}}
|
|
Binary file
|