node-llama-cpp 3.0.0-beta.14 → 3.0.0-beta.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/ChatWrapper.js +4 -0
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +23 -0
- package/dist/bindings/Llama.d.ts +11 -0
- package/dist/bindings/Llama.js +56 -4
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +20 -2
- package/dist/bindings/getLlama.js +15 -5
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +15 -0
- package/dist/bindings/types.js +27 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +26 -25
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +2 -2
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/FalconChatWrapper.js +28 -11
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
- package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +6 -9
- package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +208 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +1 -1
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.js +1 -1
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +9 -5
- package/dist/cli/commands/ChatCommand.js +203 -118
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +5 -5
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +5 -3
- package/dist/cli/commands/CompleteCommand.js +136 -85
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +4 -4
- package/dist/cli/commands/DownloadCommand.js +3 -4
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +5 -3
- package/dist/cli/commands/InfillCommand.js +138 -89
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
- package/dist/cli/commands/inspect/InspectCommand.js +17 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +11 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +121 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +136 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +15 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +579 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +281 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +5 -0
- package/dist/cli/utils/consolePromptQuestion.js +80 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +70 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +494 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +17 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/config.d.ts +5 -0
- package/dist/config.js +11 -2
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +2 -0
- package/dist/consts.js +8 -0
- package/dist/consts.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +8 -1
- package/dist/evaluator/LlamaChat/LlamaChat.js +15 -6
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +9 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +5 -3
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +9 -2
- package/dist/evaluator/LlamaCompletion.js +11 -6
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +30 -3
- package/dist/evaluator/LlamaContext/LlamaContext.js +227 -102
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +57 -6
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +23 -2
- package/dist/evaluator/LlamaEmbeddingContext.js +4 -5
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +3 -2
- package/dist/evaluator/LlamaGrammar.js +3 -2
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +56 -6
- package/dist/evaluator/LlamaModel.js +99 -7
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/evaluator/TokenBias.d.ts +22 -0
- package/dist/evaluator/TokenBias.js +33 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +42 -0
- package/dist/gguf/insights/GgufInsights.js +361 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +52 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +58 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +30 -0
- package/dist/gguf/readGgufFileInfo.js +38 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/index.d.ts +14 -7
- package/dist/index.js +12 -6
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +25 -10
- package/dist/utils/LlamaText.js +205 -23
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.js +3 -1
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/getConsoleLogPrefix.js +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/gitReleaseBundles.js +68 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +4 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/prettyPrintObject.d.ts +10 -1
- package/dist/utils/prettyPrintObject.js +57 -13
- package/dist/utils/prettyPrintObject.js.map +1 -1
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +1 -1
- package/dist/utils/tokenizeInput.js +6 -3
- package/dist/utils/tokenizeInput.js.map +1 -1
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +14 -8
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +23 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.js +1 -1
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/CMakeLists.txt +5 -5
- package/llama/addon.cpp +159 -9
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
- package/llama/gpuInfo/cuda-gpu-info.h +3 -0
- package/llama/gpuInfo/metal-gpu-info.h +4 -1
- package/llama/gpuInfo/metal-gpu-info.mm +14 -1
- package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
- package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
- package/llama/grammars/README.md +10 -0
- package/llama/llama.cpp.info.json +1 -1
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-arm64/llama-addon.exp +0 -0
- package/llamaBins/win-arm64/llama-addon.lib +0 -0
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +15 -12
- package/dist/TemplateChatWrapper.js.map +0 -1
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/cli/commands/InspectCommand.js +0 -113
- package/dist/cli/commands/InspectCommand.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/gguf/GGUFInsights.d.ts +0 -28
- package/dist/gguf/GGUFInsights.js +0 -58
- package/dist/gguf/GGUFInsights.js.map +0 -1
- package/dist/gguf/GGUFMetadata.d.ts +0 -19
- package/dist/gguf/GGUFMetadata.js +0 -38
- package/dist/gguf/GGUFMetadata.js.map +0 -1
- package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
- package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
- package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
- package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
- package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
- package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
- package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
- package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
- package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -11
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
- package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
- package/dist/gguf/ggufParser/GGUFParser.js +0 -123
- package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
- package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
- package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
- package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
- package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
- package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { EventRelay } from "lifecycle-utils";
|
|
2
2
|
import { Token } from "../../types.js";
|
|
3
3
|
import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
|
|
4
|
+
import { TokenMeter } from "../TokenMeter.js";
|
|
5
|
+
import { TokenBias } from "../TokenBias.js";
|
|
4
6
|
import { ContextShiftOptions, ContextTokensDeleteRange, EvaluationPriority, LlamaContextSequenceRepeatPenalty } from "./types.js";
|
|
5
7
|
import type { LlamaModel } from "../LlamaModel.js";
|
|
6
8
|
export declare class LlamaContext {
|
|
@@ -13,6 +15,11 @@ export declare class LlamaContext {
|
|
|
13
15
|
get model(): LlamaModel;
|
|
14
16
|
get contextSize(): number;
|
|
15
17
|
get batchSize(): number;
|
|
18
|
+
/**
|
|
19
|
+
* The actual size of the state in the memory in bytes.
|
|
20
|
+
* This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
|
|
21
|
+
*/
|
|
22
|
+
get stateSize(): number;
|
|
16
23
|
getAllocatedContextSize(): number;
|
|
17
24
|
get totalSequences(): number;
|
|
18
25
|
get sequencesLeft(): number;
|
|
@@ -21,10 +28,15 @@ export declare class LlamaContext {
|
|
|
21
28
|
* When there are no sequences left, this method will throw an error.
|
|
22
29
|
* @param [options]
|
|
23
30
|
*/
|
|
24
|
-
getSequence({ contextShift: { size: contextShiftSize, strategy: contextShiftStrategy } }?: {
|
|
31
|
+
getSequence({ contextShift: { size: contextShiftSize, strategy: contextShiftStrategy }, _tokenMeter }?: {
|
|
25
32
|
contextShift?: ContextShiftOptions;
|
|
26
33
|
}): LlamaContextSequence;
|
|
27
34
|
dispatchPendingBatch(): void;
|
|
35
|
+
/**
|
|
36
|
+
* Print the timings of token evaluation since that last print for this context.
|
|
37
|
+
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
38
|
+
* it won't print anything.
|
|
39
|
+
*/
|
|
28
40
|
printTimings(): Promise<void>;
|
|
29
41
|
}
|
|
30
42
|
export declare class LlamaContextSequence {
|
|
@@ -38,6 +50,7 @@ export declare class LlamaContextSequence {
|
|
|
38
50
|
get model(): LlamaModel;
|
|
39
51
|
get nextTokenIndex(): number;
|
|
40
52
|
get contextTokens(): Token[];
|
|
53
|
+
get tokenMeter(): TokenMeter;
|
|
41
54
|
get isLoadedToMemory(): boolean;
|
|
42
55
|
compareContextTokens(tokens: Token[]): {
|
|
43
56
|
firstDifferentIndex: number;
|
|
@@ -49,7 +62,7 @@ export declare class LlamaContextSequence {
|
|
|
49
62
|
clearHistory(): Promise<void>;
|
|
50
63
|
/**
|
|
51
64
|
* Erase context tokens in the provided ranges to free up space for new tokens to be generated.
|
|
52
|
-
*
|
|
65
|
+
* The start of each range is inclusive, and the end of each range is exclusive.
|
|
53
66
|
* For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
|
|
54
67
|
*/
|
|
55
68
|
eraseContextTokenRanges(ranges: ContextTokensDeleteRange[]): Promise<void>;
|
|
@@ -57,13 +70,19 @@ export declare class LlamaContextSequence {
|
|
|
57
70
|
* @param tokens
|
|
58
71
|
* @param [options]
|
|
59
72
|
*/
|
|
60
|
-
evaluate(tokens: Token[], { temperature, minP, topK, topP, grammarEvaluationState, repeatPenalty, evaluationPriority, contextShift: { size: contextShiftSize, strategy: contextShiftStrategy }, yieldEosToken }?: {
|
|
73
|
+
evaluate(tokens: Token[], { temperature, minP, topK, topP, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority, contextShift: { size: contextShiftSize, strategy: contextShiftStrategy }, yieldEosToken }?: {
|
|
61
74
|
temperature?: number;
|
|
62
75
|
minP?: number;
|
|
63
76
|
topK?: number;
|
|
64
77
|
topP?: number;
|
|
65
78
|
grammarEvaluationState?: LlamaGrammarEvaluationState | (() => LlamaGrammarEvaluationState | undefined);
|
|
66
79
|
repeatPenalty?: LlamaContextSequenceRepeatPenalty;
|
|
80
|
+
/**
|
|
81
|
+
* Adjust the probability of tokens being generated.
|
|
82
|
+
* Can be used to bias the model to generate tokens that you want it to lean towards,
|
|
83
|
+
* or to avoid generating tokens that you want it to avoid.
|
|
84
|
+
*/
|
|
85
|
+
tokenBias?: TokenBias | (() => TokenBias);
|
|
67
86
|
/**
|
|
68
87
|
* When a lot of tokens are queued for the next batch, more than the configured `batchSize`, the tokens for each sequence will be
|
|
69
88
|
* evaluated based on the strategy chosen for the context.
|
|
@@ -104,3 +123,11 @@ export declare class LlamaContextSequence {
|
|
|
104
123
|
contextShift?: ContextShiftOptions;
|
|
105
124
|
}): Promise<void>;
|
|
106
125
|
}
|
|
126
|
+
export declare function getDefaultContextBatchSize({ contextSize, sequences }: {
|
|
127
|
+
contextSize: number;
|
|
128
|
+
sequences: number;
|
|
129
|
+
}): number;
|
|
130
|
+
export declare function getDefaultContextSequences(): number;
|
|
131
|
+
export declare function getDefaultModelContextSize({ trainContextSize }: {
|
|
132
|
+
trainContextSize?: number;
|
|
133
|
+
}): number;
|
|
@@ -2,7 +2,8 @@ import { DisposeAggregator, EventRelay, withLock, DisposedError, AsyncDisposeAgg
|
|
|
2
2
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
3
|
import { compareTokens } from "../../utils/compareTokens.js";
|
|
4
4
|
import { DisposeGuard } from "../../utils/DisposeGuard.js";
|
|
5
|
-
import {
|
|
5
|
+
import { TokenMeter } from "../TokenMeter.js";
|
|
6
|
+
import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
|
|
6
7
|
export class LlamaContext {
|
|
7
8
|
/** @internal */ _llama;
|
|
8
9
|
/** @internal */ _ctx;
|
|
@@ -25,7 +26,7 @@ export class LlamaContext {
|
|
|
25
26
|
/** @internal */ _allocatedContextSize;
|
|
26
27
|
/** @internal */ _disposed = false;
|
|
27
28
|
onDispose = new EventRelay();
|
|
28
|
-
constructor({ _model }, { sequences
|
|
29
|
+
constructor({ _model }, { sequences, seed = null, contextSize, batchSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, _embeddings, _noSeed }) {
|
|
29
30
|
if (_model.disposed)
|
|
30
31
|
throw new DisposedError();
|
|
31
32
|
this._llama = _model._llama;
|
|
@@ -39,13 +40,14 @@ export class LlamaContext {
|
|
|
39
40
|
seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
|
|
40
41
|
contextSize: this._contextSize * this._totalSequences,
|
|
41
42
|
batchSize: this._batchSize,
|
|
43
|
+
sequences: this._totalSequences,
|
|
42
44
|
threads: Math.max(0, Math.floor(threads)),
|
|
43
45
|
embeddings: _embeddings,
|
|
44
46
|
noSeed: _noSeed
|
|
45
47
|
}));
|
|
46
48
|
this._batchingOptions = {
|
|
47
49
|
dispatchSchedule: batchingDispatchSchedule,
|
|
48
|
-
|
|
50
|
+
itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
|
|
49
51
|
};
|
|
50
52
|
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
51
53
|
this._disposeAggregator.add(() => {
|
|
@@ -82,6 +84,14 @@ export class LlamaContext {
|
|
|
82
84
|
get batchSize() {
|
|
83
85
|
return this._batchSize;
|
|
84
86
|
}
|
|
87
|
+
/**
|
|
88
|
+
* The actual size of the state in the memory in bytes.
|
|
89
|
+
* This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
|
|
90
|
+
*/
|
|
91
|
+
get stateSize() {
|
|
92
|
+
this._ensureNotDisposed();
|
|
93
|
+
return this._ctx.getStateSize();
|
|
94
|
+
}
|
|
85
95
|
getAllocatedContextSize() {
|
|
86
96
|
this._ensureNotDisposed();
|
|
87
97
|
if (this._allocatedContextSize == null)
|
|
@@ -99,7 +109,7 @@ export class LlamaContext {
|
|
|
99
109
|
* When there are no sequences left, this method will throw an error.
|
|
100
110
|
* @param [options]
|
|
101
111
|
*/
|
|
102
|
-
getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} } = {}) {
|
|
112
|
+
getSequence({ contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = {}) {
|
|
103
113
|
this._ensureNotDisposed();
|
|
104
114
|
const nextSequenceId = this._popSequenceId();
|
|
105
115
|
if (nextSequenceId == null)
|
|
@@ -107,6 +117,7 @@ export class LlamaContext {
|
|
|
107
117
|
return LlamaContextSequence._create({
|
|
108
118
|
sequenceId: nextSequenceId,
|
|
109
119
|
context: this,
|
|
120
|
+
tokenMeter: _tokenMeter,
|
|
110
121
|
contextShift: {
|
|
111
122
|
size: contextShiftSize,
|
|
112
123
|
strategy: contextShiftStrategy
|
|
@@ -123,17 +134,18 @@ export class LlamaContext {
|
|
|
123
134
|
this._currentDispatchBatchHandle = {};
|
|
124
135
|
this._dispatchDecodeScheduled = false;
|
|
125
136
|
this._batchDispatchPending = false;
|
|
126
|
-
let
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
+
let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
138
|
+
const resolvePrioritizationStrategy = () => {
|
|
139
|
+
try {
|
|
140
|
+
this._ensureNotDisposed();
|
|
141
|
+
return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
|
|
142
|
+
}
|
|
143
|
+
catch (err) {
|
|
144
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
145
|
+
}
|
|
146
|
+
return null;
|
|
147
|
+
};
|
|
148
|
+
const getOrderedQueuedDecodes = (prioritizationStrategy) => {
|
|
137
149
|
const batchItemToQueuedDecodeMap = new Map();
|
|
138
150
|
const batchItemsList = [];
|
|
139
151
|
for (const queuedDecode of this._queuedDecodes) {
|
|
@@ -146,101 +158,132 @@ export class LlamaContext {
|
|
|
146
158
|
}
|
|
147
159
|
let prioritizedItems;
|
|
148
160
|
try {
|
|
149
|
-
prioritizedItems =
|
|
161
|
+
prioritizedItems = prioritizationStrategy({
|
|
150
162
|
items: batchItemsList,
|
|
151
163
|
size: this._batchSize
|
|
152
164
|
});
|
|
153
165
|
}
|
|
154
166
|
catch (err) {
|
|
155
167
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
156
|
-
return;
|
|
168
|
+
return null;
|
|
157
169
|
}
|
|
158
|
-
|
|
159
|
-
const afterDecodeActions = [];
|
|
160
|
-
const queuedDecodesToDelete = new Set();
|
|
161
|
-
const currentQueuedDecodeItems = new Set();
|
|
162
|
-
const currentBatchItems = [];
|
|
163
|
-
let currentBatchSize = 0;
|
|
164
|
-
for (const prioritizedItem of prioritizedItems) {
|
|
170
|
+
return prioritizedItems.map((prioritizedItem) => {
|
|
165
171
|
const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
|
|
166
172
|
if (queuedDecode == null)
|
|
167
173
|
throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
|
|
168
174
|
"of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
|
|
169
|
-
|
|
170
|
-
|
|
175
|
+
return {
|
|
176
|
+
queuedDecode,
|
|
177
|
+
processAmount: prioritizedItem.processAmount
|
|
178
|
+
};
|
|
179
|
+
});
|
|
180
|
+
};
|
|
181
|
+
const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
|
|
182
|
+
const currentBatchItems = [];
|
|
183
|
+
let currentBatchSize = 0;
|
|
184
|
+
let batchTokenSlotsLeft = batchSize;
|
|
185
|
+
for (const { queuedDecode, processAmount } of queuedDecodes) {
|
|
186
|
+
const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
|
|
187
|
+
if (resolvedProcessAmount <= 0) {
|
|
188
|
+
if (batchTokenSlotsLeft === 0)
|
|
189
|
+
break;
|
|
171
190
|
continue;
|
|
172
|
-
|
|
191
|
+
}
|
|
192
|
+
batchTokenSlotsLeft -= resolvedProcessAmount;
|
|
193
|
+
currentBatchSize += resolvedProcessAmount;
|
|
173
194
|
currentBatchItems.push({
|
|
174
195
|
queuedDecode,
|
|
175
|
-
processAmount
|
|
196
|
+
processAmount: resolvedProcessAmount
|
|
176
197
|
});
|
|
177
|
-
currentBatchSize += processAmount;
|
|
178
198
|
}
|
|
179
|
-
|
|
199
|
+
return {
|
|
200
|
+
currentBatchItems,
|
|
201
|
+
currentBatchSize
|
|
202
|
+
};
|
|
203
|
+
};
|
|
204
|
+
const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
|
|
205
|
+
const afterDecodeActions = [];
|
|
206
|
+
const queuedDecodesToDelete = new Set();
|
|
207
|
+
const currentQueuedDecodeItems = new Set();
|
|
208
|
+
if (currentBatchSize !== 0)
|
|
209
|
+
this._ctx.initBatch(currentBatchSize);
|
|
210
|
+
for (const { queuedDecode, processAmount } of batchItems) {
|
|
211
|
+
let batchLogitIndex;
|
|
212
|
+
try {
|
|
213
|
+
const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
|
|
214
|
+
processAmount === queuedDecode.tokens.length;
|
|
215
|
+
const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
|
|
216
|
+
const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
|
|
217
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
|
|
218
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
|
|
219
|
+
batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
|
|
220
|
+
}
|
|
221
|
+
catch (err) {
|
|
222
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
currentQueuedDecodeItems.add(queuedDecode);
|
|
226
|
+
if (queuedDecode.tokens.length === processAmount) {
|
|
227
|
+
queuedDecodesToDelete.add(queuedDecode);
|
|
228
|
+
afterDecodeActions.push({
|
|
229
|
+
batchLogitIndex,
|
|
230
|
+
response: queuedDecode.response,
|
|
231
|
+
onDone: queuedDecode.onDone
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
|
|
236
|
+
queuedDecode.firstTokenSequenceIndex += processAmount;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
240
|
+
const queuedDecode = this._queuedDecodes[i];
|
|
241
|
+
if (queuedDecodesToDelete.has(queuedDecode)) {
|
|
242
|
+
this._queuedDecodes.splice(i, 1);
|
|
243
|
+
this._queuedDecodeSequenceIds.delete(queuedDecode.sequenceId);
|
|
244
|
+
i--;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
180
247
|
try {
|
|
181
|
-
|
|
248
|
+
if (currentBatchSize !== 0)
|
|
249
|
+
await this._ctx.decodeBatch();
|
|
182
250
|
}
|
|
183
251
|
catch (err) {
|
|
184
|
-
this._dispatchErrorForQueuedDecodesAndDequeue(
|
|
252
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
|
|
185
253
|
return;
|
|
186
254
|
}
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
for (const { queuedDecode, processAmount } of currentBatchItems) {
|
|
191
|
-
let batchLogitIndex;
|
|
255
|
+
for (const action of afterDecodeActions) {
|
|
256
|
+
const [accept, reject] = action.response;
|
|
257
|
+
if (action.onDone != null && action.batchLogitIndex != null) {
|
|
192
258
|
try {
|
|
193
|
-
|
|
259
|
+
accept(action.onDone(action.batchLogitIndex ?? null));
|
|
194
260
|
}
|
|
195
261
|
catch (err) {
|
|
196
|
-
|
|
197
|
-
continue;
|
|
198
|
-
}
|
|
199
|
-
currentQueuedDecodeItems.add(queuedDecode);
|
|
200
|
-
if (queuedDecode.tokens.length === processAmount) {
|
|
201
|
-
queuedDecodesToDelete.add(queuedDecode);
|
|
202
|
-
afterDecodeActions.push({
|
|
203
|
-
batchLogitIndex,
|
|
204
|
-
response: queuedDecode.response,
|
|
205
|
-
onDone: queuedDecode.onDone
|
|
206
|
-
});
|
|
207
|
-
}
|
|
208
|
-
else {
|
|
209
|
-
queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
|
|
210
|
-
queuedDecode.firstTokenSequenceIndex += processAmount;
|
|
211
|
-
}
|
|
212
|
-
if (batchTokenSlotsLeft === 0)
|
|
213
|
-
break;
|
|
214
|
-
}
|
|
215
|
-
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
216
|
-
const queuedDecode = this._queuedDecodes[i];
|
|
217
|
-
if (queuedDecodesToDelete.has(queuedDecode)) {
|
|
218
|
-
this._queuedDecodes.splice(i, 1);
|
|
219
|
-
this._queuedDecodeSequenceIds.delete(queuedDecode.sequenceId);
|
|
220
|
-
i--;
|
|
262
|
+
reject(err);
|
|
221
263
|
}
|
|
222
264
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
265
|
+
accept(undefined);
|
|
266
|
+
}
|
|
267
|
+
};
|
|
268
|
+
const prioritizationStrategy = resolvePrioritizationStrategy();
|
|
269
|
+
if (prioritizationStrategy == null)
|
|
270
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
271
|
+
while (shouldHaveAnotherLoop) {
|
|
272
|
+
const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
|
|
273
|
+
if (orderedQueuedDecodes == null)
|
|
274
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
275
|
+
const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
|
|
276
|
+
let preventDisposalHandle;
|
|
277
|
+
try {
|
|
278
|
+
preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
|
|
279
|
+
}
|
|
280
|
+
catch (err) {
|
|
281
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
try {
|
|
285
|
+
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
|
|
286
|
+
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
244
287
|
}
|
|
245
288
|
finally {
|
|
246
289
|
preventDisposalHandle.dispose();
|
|
@@ -248,13 +291,18 @@ export class LlamaContext {
|
|
|
248
291
|
}
|
|
249
292
|
});
|
|
250
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Print the timings of token evaluation since that last print for this context.
|
|
296
|
+
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
297
|
+
* it won't print anything.
|
|
298
|
+
*/
|
|
251
299
|
async printTimings() {
|
|
252
300
|
this._ensureNotDisposed();
|
|
253
301
|
this._ctx.printTimings();
|
|
254
302
|
await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
|
|
255
303
|
}
|
|
256
304
|
/** @internal */
|
|
257
|
-
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
|
|
305
|
+
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
|
|
258
306
|
return await new Promise((accept, reject) => {
|
|
259
307
|
this._queuedDecodes.push({
|
|
260
308
|
sequenceId,
|
|
@@ -262,6 +310,7 @@ export class LlamaContext {
|
|
|
262
310
|
firstTokenSequenceIndex,
|
|
263
311
|
generateLogitAtTheEnd,
|
|
264
312
|
evaluationPriority,
|
|
313
|
+
tokenMeter,
|
|
265
314
|
response: [accept, reject],
|
|
266
315
|
onDone
|
|
267
316
|
});
|
|
@@ -337,17 +386,44 @@ export class LlamaContext {
|
|
|
337
386
|
}
|
|
338
387
|
/** @internal */
|
|
339
388
|
static async _create(options, { _model }) {
|
|
340
|
-
const
|
|
389
|
+
const sequences = options.sequences ?? getDefaultContextSequences();
|
|
390
|
+
const contextSize = _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
|
|
391
|
+
batchSize: options.batchSize,
|
|
392
|
+
sequences: sequences,
|
|
393
|
+
modelGpuLayers: _model.gpuLayers,
|
|
394
|
+
modelTrainContextSize: _model.trainContextSize,
|
|
395
|
+
getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
|
|
396
|
+
llamaGpu: _model._llama.gpu,
|
|
397
|
+
ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
|
|
398
|
+
isEmbeddingContext: options._embeddings
|
|
399
|
+
});
|
|
400
|
+
const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
|
|
401
|
+
const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
|
|
402
|
+
contextSize,
|
|
403
|
+
sequences,
|
|
404
|
+
isEmbeddingContext: options._embeddings,
|
|
405
|
+
modelGpuLayers: _model.gpuLayers,
|
|
406
|
+
batchSize
|
|
407
|
+
}).gpuVram;
|
|
408
|
+
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences });
|
|
341
409
|
const { createSignal } = options;
|
|
342
|
-
const
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
410
|
+
const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
|
|
411
|
+
? null
|
|
412
|
+
: _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
|
|
413
|
+
try {
|
|
414
|
+
const contextLoaded = await context._ctx.init();
|
|
415
|
+
if (createSignal?.aborted) {
|
|
416
|
+
if (contextLoaded)
|
|
417
|
+
await context._ctx.dispose();
|
|
418
|
+
throw createSignal.reason;
|
|
419
|
+
}
|
|
420
|
+
else if (!contextLoaded)
|
|
421
|
+
throw new Error("Failed to create context");
|
|
422
|
+
return context;
|
|
423
|
+
}
|
|
424
|
+
finally {
|
|
425
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
347
426
|
}
|
|
348
|
-
else if (!contextLoaded)
|
|
349
|
-
throw new Error("Failed to create context");
|
|
350
|
-
return context;
|
|
351
427
|
}
|
|
352
428
|
}
|
|
353
429
|
export class LlamaContextSequence {
|
|
@@ -355,14 +431,16 @@ export class LlamaContextSequence {
|
|
|
355
431
|
/** @internal */ _gcRegistry;
|
|
356
432
|
/** @internal */ _context;
|
|
357
433
|
/** @internal */ _contextShift;
|
|
434
|
+
/** @internal */ _tokenMeter;
|
|
358
435
|
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
359
436
|
/** @internal */ _contextTokens = [];
|
|
360
437
|
/** @internal */ _nextTokenIndex = 0;
|
|
361
438
|
/** @internal */ _disposed = false;
|
|
362
439
|
onDispose = new EventRelay();
|
|
363
|
-
constructor({ sequenceId, context, contextShift }) {
|
|
440
|
+
constructor({ sequenceId, context, tokenMeter, contextShift }) {
|
|
364
441
|
this._sequenceId = sequenceId;
|
|
365
442
|
this._context = context;
|
|
443
|
+
this._tokenMeter = tokenMeter ?? new TokenMeter();
|
|
366
444
|
this._contextShift = contextShift;
|
|
367
445
|
this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
|
|
368
446
|
this._gcRegistry.register(this, sequenceId);
|
|
@@ -399,6 +477,9 @@ export class LlamaContextSequence {
|
|
|
399
477
|
get contextTokens() {
|
|
400
478
|
return this._contextTokens.slice();
|
|
401
479
|
}
|
|
480
|
+
get tokenMeter() {
|
|
481
|
+
return this._tokenMeter;
|
|
482
|
+
}
|
|
402
483
|
get isLoadedToMemory() {
|
|
403
484
|
return !this._disposed;
|
|
404
485
|
}
|
|
@@ -424,7 +505,7 @@ export class LlamaContextSequence {
|
|
|
424
505
|
}
|
|
425
506
|
/**
|
|
426
507
|
* Erase context tokens in the provided ranges to free up space for new tokens to be generated.
|
|
427
|
-
*
|
|
508
|
+
* The start of each range is inclusive, and the end of each range is exclusive.
|
|
428
509
|
* For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
|
|
429
510
|
*/
|
|
430
511
|
async eraseContextTokenRanges(ranges) {
|
|
@@ -486,7 +567,7 @@ export class LlamaContextSequence {
|
|
|
486
567
|
* @param tokens
|
|
487
568
|
* @param [options]
|
|
488
569
|
*/
|
|
489
|
-
evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
|
|
570
|
+
evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
|
|
490
571
|
return this._evaluate(tokens, {
|
|
491
572
|
temperature,
|
|
492
573
|
minP,
|
|
@@ -494,6 +575,7 @@ export class LlamaContextSequence {
|
|
|
494
575
|
topP,
|
|
495
576
|
grammarEvaluationState,
|
|
496
577
|
repeatPenalty,
|
|
578
|
+
tokenBias,
|
|
497
579
|
evaluationPriority,
|
|
498
580
|
contextShiftOptions: {
|
|
499
581
|
size: contextShiftSize,
|
|
@@ -522,7 +604,7 @@ export class LlamaContextSequence {
|
|
|
522
604
|
}
|
|
523
605
|
}
|
|
524
606
|
/** @internal */
|
|
525
|
-
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
|
|
607
|
+
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEosToken = false }) {
|
|
526
608
|
this._ensureNotDisposed();
|
|
527
609
|
let evalTokens = tokens;
|
|
528
610
|
if (evalTokens.length === 0)
|
|
@@ -531,7 +613,7 @@ export class LlamaContextSequence {
|
|
|
531
613
|
while (true) {
|
|
532
614
|
this._ensureNotDisposed();
|
|
533
615
|
// Evaluate to get the next token.
|
|
534
|
-
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
|
|
616
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
|
|
535
617
|
const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
|
|
536
618
|
? repeatPenalty.punishTokens()
|
|
537
619
|
: repeatPenalty?.punishTokens;
|
|
@@ -540,6 +622,7 @@ export class LlamaContextSequence {
|
|
|
540
622
|
: grammarEvaluationState;
|
|
541
623
|
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
|
|
542
624
|
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
|
|
625
|
+
const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
|
|
543
626
|
return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
|
|
544
627
|
temperature,
|
|
545
628
|
minP,
|
|
@@ -551,6 +634,8 @@ export class LlamaContextSequence {
|
|
|
551
634
|
: undefined,
|
|
552
635
|
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
553
636
|
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
637
|
+
tokenBiasKeys,
|
|
638
|
+
tokenBiasValues,
|
|
554
639
|
grammarEvaluationState: resolvedGrammarEvaluationState?._state
|
|
555
640
|
}));
|
|
556
641
|
});
|
|
@@ -565,7 +650,7 @@ export class LlamaContextSequence {
|
|
|
565
650
|
}
|
|
566
651
|
}
|
|
567
652
|
/** @internal */
|
|
568
|
-
async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
|
|
653
|
+
async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
|
|
569
654
|
this._ensureNotDisposed();
|
|
570
655
|
const tokensLeftToDecode = tokens.slice();
|
|
571
656
|
return await withLock(this, "evaluate", async () => {
|
|
@@ -585,7 +670,8 @@ export class LlamaContextSequence {
|
|
|
585
670
|
tokens: tokensToDecode,
|
|
586
671
|
firstTokenSequenceIndex: this._nextTokenIndex,
|
|
587
672
|
generateLogitAtTheEnd,
|
|
588
|
-
evaluationPriority
|
|
673
|
+
evaluationPriority,
|
|
674
|
+
tokenMeter
|
|
589
675
|
}, !generateLogitAtTheEnd
|
|
590
676
|
? undefined
|
|
591
677
|
: onDecodeDone);
|
|
@@ -632,10 +718,11 @@ export class LlamaContextSequence {
|
|
|
632
718
|
* We need this to make it impossible to manually create instances of this class outside the code of this library
|
|
633
719
|
* @internal
|
|
634
720
|
*/
|
|
635
|
-
static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
721
|
+
static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
636
722
|
return new LlamaContextSequence({
|
|
637
723
|
sequenceId,
|
|
638
724
|
context,
|
|
725
|
+
tokenMeter,
|
|
639
726
|
contextShift: {
|
|
640
727
|
size: contextShiftSize,
|
|
641
728
|
strategy: contextShiftStrategy
|
|
@@ -643,6 +730,34 @@ export class LlamaContextSequence {
|
|
|
643
730
|
});
|
|
644
731
|
}
|
|
645
732
|
}
|
|
733
|
+
function getTokenBiasesForAddon(tokenBias, currentModel) {
|
|
734
|
+
if (tokenBias == null)
|
|
735
|
+
return {
|
|
736
|
+
tokenBiasKeys: undefined,
|
|
737
|
+
tokenBiasValues: undefined
|
|
738
|
+
};
|
|
739
|
+
if (tokenBias instanceof Function)
|
|
740
|
+
tokenBias = tokenBias();
|
|
741
|
+
if (tokenBias._model !== currentModel)
|
|
742
|
+
throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
|
|
743
|
+
"Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
|
|
744
|
+
const tokenBiasKeys = [];
|
|
745
|
+
const tokenBiasValues = [];
|
|
746
|
+
for (const [token, bias] of tokenBias._biases) {
|
|
747
|
+
tokenBiasKeys.push(token);
|
|
748
|
+
tokenBiasValues.push(bias);
|
|
749
|
+
}
|
|
750
|
+
if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
|
|
751
|
+
return {
|
|
752
|
+
tokenBiasKeys: undefined,
|
|
753
|
+
tokenBiasValues: undefined
|
|
754
|
+
};
|
|
755
|
+
}
|
|
756
|
+
return {
|
|
757
|
+
tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
|
|
758
|
+
tokenBiasValues: Float32Array.from(tokenBiasValues)
|
|
759
|
+
};
|
|
760
|
+
}
|
|
646
761
|
function disposeContextIfReferenced(contextRef) {
|
|
647
762
|
const context = contextRef.deref();
|
|
648
763
|
if (context != null)
|
|
@@ -653,4 +768,14 @@ function disposeContextSequenceIfReferenced(contextRef) {
|
|
|
653
768
|
if (context != null)
|
|
654
769
|
context.dispose();
|
|
655
770
|
}
|
|
771
|
+
export function getDefaultContextBatchSize({ contextSize, sequences }) {
|
|
772
|
+
return Math.min(contextSize * sequences, 512);
|
|
773
|
+
}
|
|
774
|
+
export function getDefaultContextSequences() {
|
|
775
|
+
return 1;
|
|
776
|
+
}
|
|
777
|
+
const defaultFallbackContextSize = 4096;
|
|
778
|
+
export function getDefaultModelContextSize({ trainContextSize }) {
|
|
779
|
+
return trainContextSize ?? defaultFallbackContextSize;
|
|
780
|
+
}
|
|
656
781
|
//# sourceMappingURL=LlamaContext.js.map
|