node-llama-cpp 3.0.0-beta.13 → 3.0.0-beta.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/ChatWrapper.js +4 -0
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +35 -6
- package/dist/bindings/Llama.d.ts +12 -0
- package/dist/bindings/Llama.js +100 -7
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +19 -1
- package/dist/bindings/getLlama.js +16 -6
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +18 -0
- package/dist/bindings/types.js +31 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +4 -3
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
- package/dist/bindings/utils/compileLLamaCpp.js +133 -97
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +3 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +155 -13
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +16 -6
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +2 -2
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.js +2 -2
- package/dist/bindings/utils/testBindingBinary.js.map +1 -1
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js +12 -10
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/FalconChatWrapper.js +28 -11
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +59 -45
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +9 -7
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/LlamaChatWrapper.js +26 -8
- package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +355 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/{TemplateChatWrapper.d.ts → chatWrappers/generic/TemplateChatWrapper.d.ts} +16 -18
- package/dist/{TemplateChatWrapper.js → chatWrappers/generic/TemplateChatWrapper.js} +31 -69
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +67 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +1 -1
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +7 -4
- package/dist/cli/commands/ChatCommand.js +177 -70
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +5 -5
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +3 -2
- package/dist/cli/commands/CompleteCommand.js +115 -51
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +3 -2
- package/dist/cli/commands/InfillCommand.js +115 -51
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +2 -0
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/{InspectCommand.d.ts → inspect/InspectCommand.d.ts} +1 -4
- package/dist/cli/commands/inspect/InspectCommand.js +17 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +10 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +108 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +98 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +14 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +577 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +70 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +10 -0
- package/dist/cli/utils/printInfoLine.js +45 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +6 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/config.d.ts +3 -1
- package/dist/config.js +7 -1
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.js +13 -5
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.js +5 -3
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +43 -9
- package/dist/evaluator/LlamaContext/LlamaContext.js +251 -60
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +68 -10
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -7
- package/dist/evaluator/LlamaEmbeddingContext.js +31 -22
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.js +1 -0
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +78 -20
- package/dist/evaluator/LlamaModel.js +385 -21
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/GgufInsights.d.ts +40 -0
- package/dist/gguf/GgufInsights.js +350 -0
- package/dist/gguf/GgufInsights.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +58 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +30 -0
- package/dist/gguf/readGgufFileInfo.js +37 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +52 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +330 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/index.d.ts +13 -7
- package/dist/index.js +11 -6
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +25 -10
- package/dist/utils/LlamaText.js +205 -23
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.js +3 -1
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/cmake.js +1 -1
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/getConsoleLogPrefix.js +3 -2
- package/dist/utils/getConsoleLogPrefix.js.map +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +3 -3
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +68 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/mergeUnionTypes.d.ts +4 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/prettyPrintObject.d.ts +10 -1
- package/dist/utils/prettyPrintObject.js +57 -13
- package/dist/utils/prettyPrintObject.js.map +1 -1
- package/dist/utils/removeNullFields.d.ts +2 -2
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +55 -7
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +1 -1
- package/dist/utils/tokenizeInput.js +3 -3
- package/dist/utils/tokenizeInput.js.map +1 -1
- package/dist/utils/withOra.d.ts +1 -0
- package/dist/utils/withOra.js +2 -2
- package/dist/utils/withOra.js.map +1 -1
- package/llama/CMakeLists.txt +5 -5
- package/llama/addon.cpp +793 -88
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +21 -0
- package/llama/gpuInfo/cuda-gpu-info.h +3 -0
- package/llama/gpuInfo/metal-gpu-info.h +4 -1
- package/llama/gpuInfo/metal-gpu-info.mm +14 -1
- package/llama/gpuInfo/vulkan-gpu-info.cpp +20 -2
- package/llama/gpuInfo/vulkan-gpu-info.h +2 -0
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/llamaBins/linux-arm64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-arm64/llama-addon.exp +0 -0
- package/llamaBins/win-arm64/llama-addon.lib +0 -0
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +16 -11
- package/dist/TemplateChatWrapper.js.map +0 -1
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +0 -33
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +0 -49
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -63
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/cli/commands/InspectCommand.js +0 -113
- package/dist/cli/commands/InspectCommand.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/gguf/GGUFInsights.d.ts +0 -28
- package/dist/gguf/GGUFInsights.js +0 -58
- package/dist/gguf/GGUFInsights.js.map +0 -1
- package/dist/gguf/GGUFMetadata.d.ts +0 -19
- package/dist/gguf/GGUFMetadata.js +0 -38
- package/dist/gguf/GGUFMetadata.js.map +0 -1
- package/dist/gguf/errors/InvalidGGUFMagicError.d.ts +0 -3
- package/dist/gguf/errors/InvalidGGUFMagicError.js +0 -6
- package/dist/gguf/errors/InvalidGGUFMagicError.js.map +0 -1
- package/dist/gguf/errors/MetadataNotParsedYetError.d.ts +0 -3
- package/dist/gguf/errors/MetadataNotParsedYetError.js +0 -6
- package/dist/gguf/errors/MetadataNotParsedYetError.js.map +0 -1
- package/dist/gguf/errors/MissingNodeLlamaError.d.ts +0 -3
- package/dist/gguf/errors/MissingNodeLlamaError.js +0 -6
- package/dist/gguf/errors/MissingNodeLlamaError.js.map +0 -1
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.d.ts +0 -5
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +0 -12
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +0 -1
- package/dist/gguf/errors/UnsupportedMetadataTypeError.d.ts +0 -4
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js +0 -8
- package/dist/gguf/errors/UnsupportedMetadataTypeError.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFParser.d.ts +0 -18
- package/dist/gguf/ggufParser/GGUFParser.js +0 -123
- package/dist/gguf/ggufParser/GGUFParser.js.map +0 -1
- package/dist/gguf/ggufParser/GGUFTypes.d.ts +0 -257
- package/dist/gguf/ggufParser/GGUFTypes.js +0 -2
- package/dist/gguf/ggufParser/GGUFTypes.js.map +0 -1
- package/dist/gguf/ggufParser/checkArchitecture.d.ts +0 -14
- package/dist/gguf/ggufParser/checkArchitecture.js +0 -74
- package/dist/gguf/ggufParser/checkArchitecture.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.d.ts +0 -38
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js +0 -83
- package/dist/gguf/ggufParser/stream/GGUFBaseStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.d.ts +0 -14
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js +0 -35
- package/dist/gguf/ggufParser/stream/GGUFFetchStream.js.map +0 -1
- package/dist/gguf/ggufParser/stream/GGUFReadStream.d.ts +0 -15
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js +0 -40
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7731
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { parseModelFileName } from "../../utils/parseModelFileName.js";
|
|
2
|
+
import { LlamaChatWrapper } from "../LlamaChatWrapper.js";
|
|
3
|
+
import { ChatMLChatWrapper } from "../ChatMLChatWrapper.js";
|
|
4
|
+
import { GeneralChatWrapper } from "../GeneralChatWrapper.js";
|
|
5
|
+
import { FalconChatWrapper } from "../FalconChatWrapper.js";
|
|
6
|
+
import { FunctionaryChatWrapper } from "../FunctionaryChatWrapper.js";
|
|
7
|
+
import { AlpacaChatWrapper } from "../AlpacaChatWrapper.js";
|
|
8
|
+
import { GemmaChatWrapper } from "../GemmaChatWrapper.js";
|
|
9
|
+
import { JinjaTemplateChatWrapper } from "../generic/JinjaTemplateChatWrapper.js";
|
|
10
|
+
import { TemplateChatWrapper } from "../generic/TemplateChatWrapper.js";
|
|
11
|
+
import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
|
|
12
|
+
import { isJinjaTemplateEquivalentToSpecializedChatWrapper } from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js";
|
|
13
|
+
export const specializedChatWrapperTypeNames = Object.freeze([
|
|
14
|
+
"general", "llamaChat", "alpacaChat", "functionary", "chatML", "falconChat", "gemma"
|
|
15
|
+
]);
|
|
16
|
+
export const templateChatWrapperTypeNames = Object.freeze([
|
|
17
|
+
"template", "jinjaTemplate"
|
|
18
|
+
]);
|
|
19
|
+
export const resolvableChatWrapperTypeNames = Object.freeze([
|
|
20
|
+
"auto",
|
|
21
|
+
...specializedChatWrapperTypeNames,
|
|
22
|
+
...templateChatWrapperTypeNames
|
|
23
|
+
]);
|
|
24
|
+
const chatWrappers = {
|
|
25
|
+
"general": GeneralChatWrapper,
|
|
26
|
+
"llamaChat": LlamaChatWrapper,
|
|
27
|
+
"alpacaChat": AlpacaChatWrapper,
|
|
28
|
+
"functionary": FunctionaryChatWrapper,
|
|
29
|
+
"chatML": ChatMLChatWrapper,
|
|
30
|
+
"falconChat": FalconChatWrapper,
|
|
31
|
+
"gemma": GemmaChatWrapper,
|
|
32
|
+
"template": TemplateChatWrapper,
|
|
33
|
+
"jinjaTemplate": JinjaTemplateChatWrapper
|
|
34
|
+
};
|
|
35
|
+
const chatWrapperToConfigType = new Map(Object.entries(chatWrappers)
|
|
36
|
+
.map(([configType, Wrapper]) => ([Wrapper, configType])));
|
|
37
|
+
/**
|
|
38
|
+
* Resolve to a chat wrapper instance based on the provided information.
|
|
39
|
+
* The more information provided, the better the resolution will be (except for `type`).
|
|
40
|
+
*
|
|
41
|
+
* It's recommended to not set `type` to a specific chat wrapper in order for the resolution to be more flexible, but it is useful for when
|
|
42
|
+
* you need to provide the ability to force a specific chat wrapper type.
|
|
43
|
+
* Note that when setting `type` to a generic chat wrapper type (such as `"template"` or `"jinjaTemplate"`), the `customWrapperSettings`
|
|
44
|
+
* must contain the necessary settings for that chat wrapper to be created.
|
|
45
|
+
*
|
|
46
|
+
* When loading a Jinja chat template from either `fileInfo` or `customWrapperSettings.jinjaTemplate.template`,
|
|
47
|
+
* if the chat template format is invalid, it fallbacks to resolve other chat wrappers,
|
|
48
|
+
* unless `fallbackToOtherWrappersOnJinjaError` is set to `false` (in which case, it will throw an error).
|
|
49
|
+
*/
|
|
50
|
+
export function resolveChatWrapper({ type = "auto", bosString, filename, fileInfo, tokenizer, customWrapperSettings, warningLogs = true, fallbackToOtherWrappersOnJinjaError = true, noJinja = false }) {
|
|
51
|
+
function createSpecializedChatWrapper(specializedChatWrapper, defaultSettings = {}) {
|
|
52
|
+
const chatWrapperConfigType = chatWrapperToConfigType.get(specializedChatWrapper);
|
|
53
|
+
const chatWrapperSettings = customWrapperSettings?.[chatWrapperConfigType];
|
|
54
|
+
return new specializedChatWrapper({
|
|
55
|
+
...(defaultSettings ?? {}),
|
|
56
|
+
...(chatWrapperSettings ?? {})
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
if (type !== "auto" && type != null) {
|
|
60
|
+
if (isTemplateChatWrapperType(type)) {
|
|
61
|
+
const Wrapper = chatWrappers[type];
|
|
62
|
+
if (isClassReference(Wrapper, TemplateChatWrapper)) {
|
|
63
|
+
const wrapperSettings = customWrapperSettings?.template;
|
|
64
|
+
if (wrapperSettings == null || wrapperSettings?.template == null || wrapperSettings?.historyTemplate == null ||
|
|
65
|
+
wrapperSettings?.modelRoleName == null || wrapperSettings?.userRoleName == null) {
|
|
66
|
+
if (warningLogs)
|
|
67
|
+
console.warn(getConsoleLogPrefix() + "Template chat wrapper settings must have a template, historyTemplate, modelRoleName, and userRoleName. Falling back to resolve other chat wrapper types.");
|
|
68
|
+
}
|
|
69
|
+
else
|
|
70
|
+
return new TemplateChatWrapper(wrapperSettings);
|
|
71
|
+
}
|
|
72
|
+
else if (isClassReference(Wrapper, JinjaTemplateChatWrapper)) {
|
|
73
|
+
const jinjaTemplate = customWrapperSettings?.jinjaTemplate?.template ?? fileInfo?.metadata?.tokenizer?.chat_template;
|
|
74
|
+
if (jinjaTemplate == null) {
|
|
75
|
+
if (warningLogs)
|
|
76
|
+
console.warn(getConsoleLogPrefix() + "Jinja template chat wrapper received no template. Falling back to resolve other chat wrapper types.");
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
try {
|
|
80
|
+
return new JinjaTemplateChatWrapper({
|
|
81
|
+
...(customWrapperSettings?.jinjaTemplate ?? {}),
|
|
82
|
+
template: jinjaTemplate
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
catch (err) {
|
|
86
|
+
if (!fallbackToOtherWrappersOnJinjaError)
|
|
87
|
+
throw err;
|
|
88
|
+
else if (warningLogs)
|
|
89
|
+
console.error(getConsoleLogPrefix() + "Error creating Jinja template chat wrapper. Falling back to resolve other chat wrappers. Error:", err);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
else
|
|
94
|
+
void (Wrapper);
|
|
95
|
+
}
|
|
96
|
+
else if (Object.hasOwn(chatWrappers, type)) {
|
|
97
|
+
const Wrapper = chatWrappers[type];
|
|
98
|
+
const wrapperSettings = customWrapperSettings?.[type];
|
|
99
|
+
return new Wrapper(wrapperSettings);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
const modelJinjaTemplate = customWrapperSettings?.jinjaTemplate?.template ?? fileInfo?.metadata?.tokenizer?.chat_template;
|
|
103
|
+
if (!noJinja && modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") {
|
|
104
|
+
const jinjaTemplateChatWrapperOptions = {
|
|
105
|
+
...(customWrapperSettings?.jinjaTemplate ?? {}),
|
|
106
|
+
template: modelJinjaTemplate
|
|
107
|
+
};
|
|
108
|
+
for (const specializedChatWrapperTypeName of specializedChatWrapperTypeNames) {
|
|
109
|
+
const Wrapper = chatWrappers[specializedChatWrapperTypeName];
|
|
110
|
+
const wrapperSettings = customWrapperSettings?.[specializedChatWrapperTypeName];
|
|
111
|
+
const testOptionConfigurations = Wrapper._getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate?.() ?? [];
|
|
112
|
+
if (testOptionConfigurations.length === 0)
|
|
113
|
+
testOptionConfigurations.push({});
|
|
114
|
+
for (const testConfiguration of testOptionConfigurations) {
|
|
115
|
+
const testChatWrapperSettings = {
|
|
116
|
+
...(wrapperSettings ?? {}),
|
|
117
|
+
...(testConfiguration ?? {})
|
|
118
|
+
};
|
|
119
|
+
const chatWrapper = new Wrapper(testChatWrapperSettings);
|
|
120
|
+
if (isJinjaTemplateEquivalentToSpecializedChatWrapper(jinjaTemplateChatWrapperOptions, chatWrapper, tokenizer))
|
|
121
|
+
return new Wrapper(testChatWrapperSettings);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (!fallbackToOtherWrappersOnJinjaError)
|
|
125
|
+
return new JinjaTemplateChatWrapper(jinjaTemplateChatWrapperOptions);
|
|
126
|
+
try {
|
|
127
|
+
return new JinjaTemplateChatWrapper(jinjaTemplateChatWrapperOptions);
|
|
128
|
+
}
|
|
129
|
+
catch (err) {
|
|
130
|
+
console.error(getConsoleLogPrefix() + "Error creating Jinja template chat wrapper. Falling back to resolve other chat wrappers. Error:", err);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
// try to find a pattern in the Jinja template to resolve to a specialized chat wrapper,
|
|
134
|
+
// with a logic similar to `llama.cpp`'s `llama_chat_apply_template_internal` function
|
|
135
|
+
if (modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") {
|
|
136
|
+
if (modelJinjaTemplate.includes("<|im_start|>"))
|
|
137
|
+
return createSpecializedChatWrapper(ChatMLChatWrapper);
|
|
138
|
+
else if (modelJinjaTemplate.includes("[INST]"))
|
|
139
|
+
return createSpecializedChatWrapper(LlamaChatWrapper, {
|
|
140
|
+
addSpaceBeforeEos: modelJinjaTemplate.includes("' ' + eos_token")
|
|
141
|
+
});
|
|
142
|
+
else if (modelJinjaTemplate.includes("<start_of_turn>"))
|
|
143
|
+
return createSpecializedChatWrapper(GemmaChatWrapper);
|
|
144
|
+
}
|
|
145
|
+
if (filename != null) {
|
|
146
|
+
const { name, subType, fileType, otherInfo } = parseModelFileName(filename);
|
|
147
|
+
if (fileType?.toLowerCase() === "gguf") {
|
|
148
|
+
const lowercaseName = name?.toLowerCase();
|
|
149
|
+
const lowercaseSubType = subType?.toLowerCase();
|
|
150
|
+
const splitLowercaseSubType = (lowercaseSubType?.split("-") ?? []).concat(otherInfo.map(info => info.toLowerCase()));
|
|
151
|
+
const firstSplitLowercaseSubType = splitLowercaseSubType[0];
|
|
152
|
+
if (lowercaseName === "llama") {
|
|
153
|
+
if (splitLowercaseSubType.includes("chat"))
|
|
154
|
+
return createSpecializedChatWrapper(LlamaChatWrapper);
|
|
155
|
+
return createSpecializedChatWrapper(GeneralChatWrapper);
|
|
156
|
+
}
|
|
157
|
+
else if (lowercaseName === "yarn" && firstSplitLowercaseSubType === "llama")
|
|
158
|
+
return createSpecializedChatWrapper(LlamaChatWrapper);
|
|
159
|
+
else if (lowercaseName === "orca")
|
|
160
|
+
return createSpecializedChatWrapper(ChatMLChatWrapper);
|
|
161
|
+
else if (lowercaseName === "phind" && lowercaseSubType === "codellama")
|
|
162
|
+
return createSpecializedChatWrapper(LlamaChatWrapper);
|
|
163
|
+
else if (lowercaseName === "mistral")
|
|
164
|
+
return createSpecializedChatWrapper(GeneralChatWrapper);
|
|
165
|
+
else if (firstSplitLowercaseSubType === "llama")
|
|
166
|
+
return createSpecializedChatWrapper(LlamaChatWrapper);
|
|
167
|
+
else if (lowercaseSubType === "alpaca")
|
|
168
|
+
return createSpecializedChatWrapper(AlpacaChatWrapper);
|
|
169
|
+
else if (lowercaseName === "functionary")
|
|
170
|
+
return createSpecializedChatWrapper(FunctionaryChatWrapper);
|
|
171
|
+
else if (lowercaseName === "dolphin" && splitLowercaseSubType.includes("mistral"))
|
|
172
|
+
return createSpecializedChatWrapper(ChatMLChatWrapper);
|
|
173
|
+
else if (lowercaseName === "gemma")
|
|
174
|
+
return createSpecializedChatWrapper(GemmaChatWrapper);
|
|
175
|
+
else if (splitLowercaseSubType.includes("chatml"))
|
|
176
|
+
return createSpecializedChatWrapper(ChatMLChatWrapper);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
if (fileInfo != null) {
|
|
180
|
+
const arch = fileInfo.metadata.general?.architecture;
|
|
181
|
+
if (arch === "llama")
|
|
182
|
+
return createSpecializedChatWrapper(LlamaChatWrapper);
|
|
183
|
+
else if (arch === "falcon")
|
|
184
|
+
return createSpecializedChatWrapper(FalconChatWrapper);
|
|
185
|
+
}
|
|
186
|
+
if (bosString === "" || bosString == null)
|
|
187
|
+
return null;
|
|
188
|
+
if ("<s>[INST] <<SYS>>\n".startsWith(bosString)) {
|
|
189
|
+
return createSpecializedChatWrapper(LlamaChatWrapper);
|
|
190
|
+
}
|
|
191
|
+
else if ("<|im_start|>system\n".startsWith(bosString)) {
|
|
192
|
+
return createSpecializedChatWrapper(ChatMLChatWrapper);
|
|
193
|
+
}
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
export function isSpecializedChatWrapperType(type) {
|
|
197
|
+
return specializedChatWrapperTypeNames.includes(type);
|
|
198
|
+
}
|
|
199
|
+
export function isTemplateChatWrapperType(type) {
|
|
200
|
+
return templateChatWrapperTypeNames.includes(type);
|
|
201
|
+
}
|
|
202
|
+
// this is needed because TypeScript guards don't work automatically with class references
|
|
203
|
+
function isClassReference(value, classReference) {
|
|
204
|
+
return value === classReference;
|
|
205
|
+
}
|
|
206
|
+
//# sourceMappingURL=resolveChatWrapper.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"resolveChatWrapper.js","sourceRoot":"","sources":["../../../src/chatWrappers/utils/resolveChatWrapper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,kBAAkB,EAAC,MAAM,mCAAmC,CAAC;AACrE,OAAO,EAAC,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAC,kBAAkB,EAAC,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAC,sBAAsB,EAAC,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAC,gBAAgB,EAAC,MAAM,wBAAwB,CAAC;AACxD,OAAO,EAAC,wBAAwB,EAAkC,MAAM,wCAAwC,CAAC;AACjH,OAAO,EAAC,mBAAmB,EAAC,MAAM,mCAAmC,CAAC;AACtE,OAAO,EAAC,mBAAmB,EAAC,MAAM,oCAAoC,CAAC;AAEvE,OAAO,EAAC,iDAAiD,EAAC,MAAM,wDAAwD,CAAC;AAIzH,MAAM,CAAC,MAAM,+BAA+B,GAAG,MAAM,CAAC,MAAM,CAAC;IACzD,SAAS,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,EAAE,QAAQ,EAAE,YAAY,EAAE,OAAO;CAC9E,CAAC,CAAC;AAGZ,MAAM,CAAC,MAAM,4BAA4B,GAAG,MAAM,CAAC,MAAM,CAAC;IACtD,UAAU,EAAE,eAAe;CACrB,CAAC,CAAC;AAGZ,MAAM,CAAC,MAAM,8BAA8B,GAAG,MAAM,CAAC,MAAM,CAAC;IACxD,MAAM;IACN,GAAG,+BAA+B;IAClC,GAAG,4BAA4B;CACzB,CAAC,CAAC;AAGZ,MAAM,YAAY,GAAG;IACjB,SAAS,EAAE,kBAAkB;IAC7B,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,iBAAiB;IAC/B,aAAa,EAAE,sBAAsB;IACrC,QAAQ,EAAE,iBAAiB;IAC3B,YAAY,EAAE,iBAAiB;IAC/B,OAAO,EAAE,gBAAgB;IACzB,UAAU,EAAE,mBAAmB;IAC/B,eAAe,EAAE,wBAAwB;CACiD,CAAC;AAC/F,MAAM,uBAAuB,GAAG,IAAI,GAAG,CACnC,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC;KACvB,GAAG,CAAC,CAAC,CAAC,UAAU,EAAE,OAAO,CAAC,EAAE,EAAE,CAAC,CAC5B,CAAC,OAAO,EAAE,UAAuC,CAAC,CACrD,CAAC,CACT,CAAC;AA2BF;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,kBAAkB,CAAC,EAC/B,IAAI,GAAG,MAAM,EACb,SAAS,EACT,QAAQ,EACR,QAAQ,EACR,SAAS,EACT,qBAAqB,EACrB,WAAW,GAAG,IAAI,EAClB,mCAAmC,GAAG,IAAI,EAC1C,OAAO,GAAG,KAAK,EACS;IACxB,SAAS,4BAA4B,CACjC,sBAAyB,EACzB,kBAA+C,EAAE;QAEjD,MAAM,qBAAqB,GAAG,uBAAuB,CAAC,GAAG,CAAC,sBAAsB,CAAmC,CAAC;QACpH,MAAM,mBAAmB,GAAG,qBAAqB,EAAE,CAAC,qBAAqB,CAAC,CAAC;QAE3E,OAAO,IAAK,sBAA8B,CAAC;YACvC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC;YAC1B,GAAG,CAAC,mBAAmB,IAAI,EAAE,CAAC;SACjC,CAAC,CAAC;IACP,CAAC;IAED,IAAI,IAAI,KAAK,MAAM,IAAI,IAAI,IAAI,IAAI,EAAE;QACjC,IAAI,yBAAyB,CAAC,IAAI,CAAC,EAAE;YACjC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;YAEnC,IAAI,gBAAgB,CAAC,OAAO,EAAE,mBAAmB,CAAC,EAAE;gBAChD,MAAM,eAAe,GAAG,qBAAqB,EAAE,QAAQ,CAAC;gBACxD,IAAI,eAAe,IAAI,IAAI,IAAI,eAAe,EAAE,QAAQ,IAAI,IAAI,IAAI,eAAe,EAAE,eAAe,IAAI,IAAI;oBACxG,eAAe,EAAE,aAAa,IAAI,IAAI,IAAI,eAAe,EAAE,YAAY,IAAI,IAAI,EACjF;oBACE,IAAI,WAAW;wBACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,0JAA0J,CAAC,CAAC;iBACxM;;oBACG,OAAO,IAAI,mBAAmB,CAAC,eAAe,CAAC,CAAC;aACvD;iBAAM,IAAI,gBAAgB,CAAC,OAAO,EAAE,wBAAwB,CAAC,EAAE;gBAC5D,MAAM,aAAa,GAAG,qBAAqB,EAAE,aAAa,EAAE,QAAQ,IAAI,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,aAAa,CAAC;gBAErH,IAAI,aAAa,IAAI,IAAI,EAAE;oBACvB,IAAI,WAAW;wBACX,OAAO,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,qGAAqG,CAAC,CAAC;iBACnJ;qBAAM;oBACH,IAAI;wBACA,OAAO,IAAI,wBAAwB,CAAC;4BAChC,GAAG,CAAC,qBAAqB,EAAE,aAAa,IAAI,EAAE,CAAC;4BAC/C,QAAQ,EAAE,aAAa;yBAC1B,CAAC,CAAC;qBACN;oBAAC,OAAO,GAAG,EAAE;wBACV,IAAI,CAAC,mCAAmC;4BACpC,MAAM,GAAG,CAAC;6BACT,IAAI,WAAW;4BAChB,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,iGAAiG,EAAE,GAAG,CAAC,CAAC;qBACrJ;iBACJ;aACJ;;gBACG,KAAK,CAAC,OAAuB,CAAC,CAAC;SACtC;aAAM,IAAI,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,IAAI,CAAC,EAAE;YAC1C,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,eAAe,GACjB,qBAAqB,EAAE,CAAC,IAAI,CAAC,CAAC;YAElC,OAAO,IAAK,OAAe,CAAC,eAAe,CAAC,CAAC;SAChD;KACJ;IAED,MAAM,kBAAkB,GAAG,qBAAqB,EAAE,aAAa,EAAE,QAAQ,IAAI,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,aAAa,CAAC;IAE1H,IAAI,CAAC,OAAO,IAAI,kBAAkB,IAAI,IAAI,IAAI,kBAAkB,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5E,MAAM,+BAA+B,GAAoC;YACrE,GAAG,CAAC,qBAAqB,EAAE,aAAa,IAAI,EAAE,CAAC;YAC/C,QAAQ,EAAE,kBAAkB;SAC/B,CAAC;QAEF,KAAK,MAAM,8BAA8B,IAAI,+BAA+B,EAAE;YAC1E,MAAM,OAAO,GAAG,YAAY,CAAC,8BAA8B,CAAC,CAAC;YAC7D,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAC,8BAA8B,CAAC,CAAC;YAEhF,MAAM,wBAAwB,GAAG,OAAO,CAAC,yDAAyD,EAAE,EAAE,IAAI,EAAE,CAAC;YAC7G,IAAI,wBAAwB,CAAC,MAAM,KAAK,CAAC;gBACrC,wBAAwB,CAAC,IAAI,CAAC,EAAS,CAAC,CAAC;YAE7C,KAAK,MAAM,iBAAiB,IAAI,wBAAwB,EAAE;gBACtD,MAAM,uBAAuB,GAAG;oBAC5B,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC;oBAC1B,GAAG,CAAC,iBAAiB,IAAI,EAAE,CAAC;iBAC/B,CAAC;gBACF,MAAM,WAAW,GAAG,IAAK,OAAe,CAAC,uBAAuB,CAAC,CAAC;gBAElE,IAAI,iDAAiD,CAAC,+BAA+B,EAAE,WAAW,EAAE,SAAS,CAAC;oBAC1G,OAAO,IAAK,OAAe,CAAC,uBAAuB,CAAC,CAAC;aAC5D;SACJ;QAED,IAAI,CAAC,mCAAmC;YACpC,OAAO,IAAI,wBAAwB,CAAC,+BAA+B,CAAC,CAAC;QAEzE,IAAI;YACA,OAAO,IAAI,wBAAwB,CAAC,+BAA+B,CAAC,CAAC;SACxE;QAAC,OAAO,GAAG,EAAE;YACV,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,iGAAiG,EAAE,GAAG,CAAC,CAAC;SACjJ;KACJ;IAED,wFAAwF;IACxF,sFAAsF;IACtF,IAAI,kBAAkB,IAAI,IAAI,IAAI,kBAAkB,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAChE,IAAI,kBAAkB,CAAC,QAAQ,CAAC,cAAc,CAAC;YAC3C,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;aACtD,IAAI,kBAAkB,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAC1C,OAAO,4BAA4B,CAAC,gBAAgB,EAAE;gBAClD,iBAAiB,EAAE,kBAAkB,CAAC,QAAQ,CAAC,iBAAiB,CAAC;aACpE,CAAC,CAAC;aACF,IAAI,kBAAkB,CAAC,QAAQ,CAAC,iBAAiB,CAAC;YACnD,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;KAC7D;IAED,IAAI,QAAQ,IAAI,IAAI,EAAE;QAClB,MAAM,EAAC,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAC,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAE1E,IAAI,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,EAAE;YACpC,MAAM,aAAa,GAAG,IAAI,EAAE,WAAW,EAAE,CAAC;YAC1C,MAAM,gBAAgB,GAAG,OAAO,EAAE,WAAW,EAAE,CAAC;YAChD,MAAM,qBAAqB,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CACrE,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAC5C,CAAC;YACF,MAAM,0BAA0B,GAAG,qBAAqB,CAAC,CAAC,CAAC,CAAC;YAE5D,IAAI,aAAa,KAAK,OAAO,EAAE;gBAC3B,IAAI,qBAAqB,CAAC,QAAQ,CAAC,MAAM,CAAC;oBACtC,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;gBAE1D,OAAO,4BAA4B,CAAC,kBAAkB,CAAC,CAAC;aAC3D;iBAAM,IAAI,aAAa,KAAK,MAAM,IAAI,0BAA0B,KAAK,OAAO;gBACzE,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,aAAa,KAAK,MAAM;gBAC7B,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;iBACtD,IAAI,aAAa,KAAK,OAAO,IAAI,gBAAgB,KAAK,WAAW;gBAClE,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,aAAa,KAAK,SAAS;gBAChC,OAAO,4BAA4B,CAAC,kBAAkB,CAAC,CAAC;iBACvD,IAAI,0BAA0B,KAAK,OAAO;gBAC3C,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,gBAAgB,KAAK,QAAQ;gBAClC,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;iBACtD,IAAI,aAAa,KAAK,aAAa;gBACpC,OAAO,4BAA4B,CAAC,sBAAsB,CAAC,CAAC;iBAC3D,IAAI,aAAa,KAAK,SAAS,IAAI,qBAAqB,CAAC,QAAQ,CAAC,SAAS,CAAC;gBAC7E,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;iBACtD,IAAI,aAAa,KAAK,OAAO;gBAC9B,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;iBACrD,IAAI,qBAAqB,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBAC7C,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;SAC9D;KACJ;IAED,IAAI,QAAQ,IAAI,IAAI,EAAE;QAClB,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,EAAE,YAAY,CAAC;QAErD,IAAI,IAAI,KAAK,OAAO;YAChB,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;aACrD,IAAI,IAAI,KAAK,QAAQ;YACtB,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;KAC9D;IAED,IAAI,SAAS,KAAK,EAAE,IAAI,SAAS,IAAI,IAAI;QACrC,OAAO,IAAI,CAAC;IAEhB,IAAI,qBAAqB,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;QAC7C,OAAO,4BAA4B,CAAC,gBAAgB,CAAC,CAAC;KACzD;SAAM,IAAI,sBAAsB,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE;QACrD,OAAO,4BAA4B,CAAC,iBAAiB,CAAC,CAAC;KAC1D;IAED,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,4BAA4B,CAAC,IAAY;IACrD,OAAO,+BAA+B,CAAC,QAAQ,CAAC,IAAW,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,IAAY;IAClD,OAAO,4BAA4B,CAAC,QAAQ,CAAC,IAAW,CAAC,CAAC;AAC9D,CAAC;AAED,0FAA0F;AAC1F,SAAS,gBAAgB,CAAI,KAAU,EAAE,cAAiB;IACtD,OAAO,KAAK,KAAK,cAAc,CAAC;AACpC,CAAC"}
|
package/dist/cli/cli.js
CHANGED
|
@@ -14,7 +14,7 @@ import { ClearCommand } from "./commands/ClearCommand.js";
|
|
|
14
14
|
import { ChatCommand } from "./commands/ChatCommand.js";
|
|
15
15
|
import { CompleteCommand } from "./commands/CompleteCommand.js";
|
|
16
16
|
import { InfillCommand } from "./commands/InfillCommand.js";
|
|
17
|
-
import { InspectCommand } from "./commands/InspectCommand.js";
|
|
17
|
+
import { InspectCommand } from "./commands/inspect/InspectCommand.js";
|
|
18
18
|
import { DebugCommand } from "./commands/DebugCommand.js";
|
|
19
19
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
20
20
|
const packageJson = fs.readJSONSync(path.join(__dirname, "..", "..", "package.json"));
|
package/dist/cli/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/cli/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,sDAAsD;AACtD,OAAO,EAAC,OAAO,EAAC,MAAM,eAAe,CAAC;AACtC,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,UAAU,EAAC,MAAM,cAAc,CAAC;AACxC,OAAO,EAAC,mBAAmB,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,oBAAoB,EAAC,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,WAAW,EAAC,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAC,cAAc,EAAC,MAAM,
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/cli/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAC,aAAa,EAAC,MAAM,KAAK,CAAC;AAClC,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,sDAAsD;AACtD,OAAO,EAAC,OAAO,EAAC,MAAM,eAAe,CAAC;AACtC,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,UAAU,EAAC,MAAM,cAAc,CAAC;AACxC,OAAO,EAAC,mBAAmB,EAAC,MAAM,aAAa,CAAC;AAChD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,oBAAoB,EAAC,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAC,WAAW,EAAC,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAC,eAAe,EAAC,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAC,aAAa,EAAC,MAAM,6BAA6B,CAAC;AAC1D,OAAO,EAAC,cAAc,EAAC,MAAM,sCAAsC,CAAC;AACpE,OAAO,EAAC,YAAY,EAAC,MAAM,4BAA4B,CAAC;AAExD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE/D,MAAM,WAAW,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC;AAEtF,mBAAmB,CAAC,IAAI,CAAC,CAAC;AAE1B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;AAE1C,IAAI;KACC,UAAU,CAAC,UAAU,CAAC;KACtB,KAAK,CAAC,+BAA+B,CAAC;KACtC,OAAO,CAAC,eAAe,CAAC;KACxB,OAAO,CAAC,YAAY,CAAC;KACrB,OAAO,CAAC,YAAY,CAAC;KACrB,OAAO,CAAC,WAAW,CAAC;KACpB,OAAO,CAAC,eAAe,CAAC;KACxB,OAAO,CAAC,aAAa,CAAC;KACtB,OAAO,CAAC,cAAc,CAAC;KACvB,OAAO,CAAC,oBAAoB,CAAC;KAC7B,OAAO,CAAC,YAAY,CAAC;KACrB,iBAAiB,EAAE;KACnB,aAAa,CAAC,CAAC,CAAC;KAChB,MAAM,EAAE;KACR,cAAc,EAAE;KAChB,KAAK,CAAC,GAAG,EAAE,SAAS,CAAC;KACrB,IAAI,CAAC,GAAG,CAAC;KACT,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC;KAClB,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC;KAC5B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;KACzC,KAAK,EAAE,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { CommandModule } from "yargs";
|
|
2
2
|
import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
|
|
3
|
-
import {
|
|
3
|
+
import { SpecializedChatWrapperTypeName } from "../../chatWrappers/utils/resolveChatWrapper.js";
|
|
4
4
|
type ChatCommand = {
|
|
5
5
|
model: string;
|
|
6
6
|
systemInfo: boolean;
|
|
@@ -8,9 +8,11 @@ type ChatCommand = {
|
|
|
8
8
|
systemPromptFile?: string;
|
|
9
9
|
prompt?: string;
|
|
10
10
|
promptFile?: string;
|
|
11
|
-
wrapper:
|
|
12
|
-
|
|
11
|
+
wrapper: SpecializedChatWrapperTypeName | "auto";
|
|
12
|
+
noJinja?: boolean;
|
|
13
|
+
contextSize?: number;
|
|
13
14
|
batchSize?: number;
|
|
15
|
+
noTrimWhitespace: boolean;
|
|
14
16
|
grammar: "text" | Parameters<typeof LlamaGrammar.getFor>[1];
|
|
15
17
|
jsonSchemaGrammarFile?: string;
|
|
16
18
|
threads: number;
|
|
@@ -27,7 +29,8 @@ type ChatCommand = {
|
|
|
27
29
|
maxTokens: number;
|
|
28
30
|
noHistory: boolean;
|
|
29
31
|
environmentFunctions: boolean;
|
|
30
|
-
|
|
32
|
+
debug: boolean;
|
|
33
|
+
meter: boolean;
|
|
31
34
|
printTimings: boolean;
|
|
32
35
|
};
|
|
33
36
|
export declare const ChatCommand: CommandModule<object, ChatCommand>;
|
|
@@ -6,24 +6,27 @@ import fs from "fs-extra";
|
|
|
6
6
|
import { chatCommandHistoryFilePath, defaultChatSystemPrompt } from "../../config.js";
|
|
7
7
|
import { getIsInDocumentationMode } from "../../state.js";
|
|
8
8
|
import { ReplHistory } from "../../utils/ReplHistory.js";
|
|
9
|
-
import withStatusLogs from "../../utils/withStatusLogs.js";
|
|
10
9
|
import { defineChatSessionFunction } from "../../evaluator/LlamaChatSession/utils/defineChatSessionFunction.js";
|
|
11
10
|
import { getLlama } from "../../bindings/getLlama.js";
|
|
12
11
|
import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
|
|
13
12
|
import { LlamaChatSession } from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
|
|
14
|
-
import { LlamaModel } from "../../evaluator/LlamaModel.js";
|
|
15
|
-
import { LlamaContext } from "../../evaluator/LlamaContext/LlamaContext.js";
|
|
16
13
|
import { LlamaJsonSchemaGrammar } from "../../evaluator/LlamaJsonSchemaGrammar.js";
|
|
17
|
-
import { LlamaLogLevel } from "../../bindings/types.js";
|
|
18
|
-
import
|
|
14
|
+
import { LlamaLogLevel, LlamaLogLevelGreaterThan } from "../../bindings/types.js";
|
|
15
|
+
import withOra from "../../utils/withOra.js";
|
|
16
|
+
import { TokenMeter } from "../../evaluator/TokenMeter.js";
|
|
17
|
+
import { printInfoLine } from "../utils/printInfoLine.js";
|
|
18
|
+
import { resolveChatWrapper, specializedChatWrapperTypeNames } from "../../chatWrappers/utils/resolveChatWrapper.js";
|
|
19
|
+
import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
|
|
20
|
+
import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
|
|
21
|
+
import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
|
|
19
22
|
export const ChatCommand = {
|
|
20
|
-
command: "chat",
|
|
23
|
+
command: "chat [modelPath]",
|
|
21
24
|
describe: "Chat with a Llama model",
|
|
22
25
|
builder(yargs) {
|
|
23
26
|
const isInDocumentationMode = getIsInDocumentationMode();
|
|
24
27
|
return yargs
|
|
25
28
|
.option("model", {
|
|
26
|
-
alias: "m",
|
|
29
|
+
alias: ["m", "modelPath"],
|
|
27
30
|
type: "string",
|
|
28
31
|
demandOption: true,
|
|
29
32
|
description: "Llama model file to use for the chat",
|
|
@@ -64,15 +67,22 @@ export const ChatCommand = {
|
|
|
64
67
|
alias: "w",
|
|
65
68
|
type: "string",
|
|
66
69
|
default: "auto",
|
|
67
|
-
choices:
|
|
70
|
+
choices: ["auto", ...specializedChatWrapperTypeNames],
|
|
68
71
|
description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token",
|
|
69
72
|
group: "Optional:"
|
|
73
|
+
})
|
|
74
|
+
.option("noJinja", {
|
|
75
|
+
type: "boolean",
|
|
76
|
+
default: false,
|
|
77
|
+
description: "Don't use a Jinja wrapper, even if it's the best option for the model",
|
|
78
|
+
group: "Optional:"
|
|
70
79
|
})
|
|
71
80
|
.option("contextSize", {
|
|
72
81
|
alias: "c",
|
|
73
82
|
type: "number",
|
|
74
|
-
default: 1024 * 4,
|
|
75
83
|
description: "Context size to use for the model context",
|
|
84
|
+
default: -1,
|
|
85
|
+
defaultDescription: "Automatically determined based on the available VRAM",
|
|
76
86
|
group: "Optional:"
|
|
77
87
|
})
|
|
78
88
|
.option("batchSize", {
|
|
@@ -80,6 +90,13 @@ export const ChatCommand = {
|
|
|
80
90
|
type: "number",
|
|
81
91
|
description: "Batch size to use for the model context. The default value is the context size",
|
|
82
92
|
group: "Optional:"
|
|
93
|
+
})
|
|
94
|
+
.option("noTrimWhitespace", {
|
|
95
|
+
type: "boolean",
|
|
96
|
+
alias: ["noTrim"],
|
|
97
|
+
default: false,
|
|
98
|
+
description: "Don't trim whitespaces from the model response",
|
|
99
|
+
group: "Optional:"
|
|
83
100
|
})
|
|
84
101
|
.option("grammar", {
|
|
85
102
|
alias: "g",
|
|
@@ -133,6 +150,8 @@ export const ChatCommand = {
|
|
|
133
150
|
alias: "gl",
|
|
134
151
|
type: "number",
|
|
135
152
|
description: "number of layers to store in VRAM",
|
|
153
|
+
default: -1,
|
|
154
|
+
defaultDescription: "Automatically determined based on the available VRAM",
|
|
136
155
|
group: "Optional:"
|
|
137
156
|
})
|
|
138
157
|
.option("repeatPenalty", {
|
|
@@ -189,11 +208,17 @@ export const ChatCommand = {
|
|
|
189
208
|
description: "Provide access to environment functions like `getDate` and `getTime`",
|
|
190
209
|
group: "Optional:"
|
|
191
210
|
})
|
|
192
|
-
.option("
|
|
193
|
-
alias: "
|
|
211
|
+
.option("debug", {
|
|
212
|
+
alias: "d",
|
|
194
213
|
type: "boolean",
|
|
195
214
|
default: false,
|
|
196
|
-
description: "
|
|
215
|
+
description: "Print llama.cpp info and debug logs",
|
|
216
|
+
group: "Optional:"
|
|
217
|
+
})
|
|
218
|
+
.option("meter", {
|
|
219
|
+
type: "boolean",
|
|
220
|
+
default: false,
|
|
221
|
+
description: "Log how many tokens were used as input and output for each response",
|
|
197
222
|
group: "Optional:"
|
|
198
223
|
})
|
|
199
224
|
.option("printTimings", {
|
|
@@ -204,28 +229,36 @@ export const ChatCommand = {
|
|
|
204
229
|
group: "Optional:"
|
|
205
230
|
});
|
|
206
231
|
},
|
|
207
|
-
async handler({ model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions,
|
|
232
|
+
async handler({ model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
|
|
208
233
|
try {
|
|
209
234
|
await RunChat({
|
|
210
|
-
model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize,
|
|
211
|
-
grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers,
|
|
212
|
-
repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
|
|
213
|
-
noHistory, environmentFunctions,
|
|
235
|
+
model, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize,
|
|
236
|
+
noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, gpuLayers,
|
|
237
|
+
lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens,
|
|
238
|
+
noHistory, environmentFunctions, debug, meter, printTimings
|
|
214
239
|
});
|
|
215
240
|
}
|
|
216
241
|
catch (err) {
|
|
242
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
217
243
|
console.error(err);
|
|
218
244
|
process.exit(1);
|
|
219
245
|
}
|
|
220
246
|
}
|
|
221
247
|
};
|
|
222
|
-
async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, contextSize, batchSize, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions,
|
|
223
|
-
if (
|
|
224
|
-
|
|
248
|
+
async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, noTrimWhitespace, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
|
|
249
|
+
if (contextSize === -1)
|
|
250
|
+
contextSize = undefined;
|
|
251
|
+
if (gpuLayers === -1)
|
|
252
|
+
gpuLayers = undefined;
|
|
253
|
+
const trimWhitespace = !noTrimWhitespace;
|
|
254
|
+
if (debug)
|
|
255
|
+
console.info(`${chalk.yellow("Log level:")} debug`);
|
|
256
|
+
const resolvedModelPath = await resolveCommandGgufPath(modelArg);
|
|
257
|
+
const llamaLogLevel = debug
|
|
258
|
+
? LlamaLogLevel.debug
|
|
259
|
+
: LlamaLogLevel.warn;
|
|
225
260
|
const llama = await getLlama("lastBuild", {
|
|
226
|
-
logLevel:
|
|
227
|
-
? LlamaLogLevel.warn
|
|
228
|
-
: LlamaLogLevel.debug
|
|
261
|
+
logLevel: llamaLogLevel
|
|
229
262
|
});
|
|
230
263
|
const logBatchSize = batchSize != null;
|
|
231
264
|
if (systemInfo)
|
|
@@ -240,75 +273,121 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
|
|
|
240
273
|
console.warn(chalk.yellow("Both `prompt` and `promptFile` were specified. `promptFile` will be used."));
|
|
241
274
|
prompt = await fs.readFile(path.resolve(process.cwd(), promptFile), "utf8");
|
|
242
275
|
}
|
|
243
|
-
if (batchSize
|
|
244
|
-
batchSize = contextSize;
|
|
245
|
-
else if (batchSize > contextSize) {
|
|
276
|
+
if (batchSize != null && contextSize != null && batchSize > contextSize) {
|
|
246
277
|
console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
|
|
247
278
|
batchSize = contextSize;
|
|
248
279
|
}
|
|
249
280
|
let initialPrompt = prompt ?? null;
|
|
250
|
-
const model = await
|
|
281
|
+
const model = await withOra({
|
|
251
282
|
loading: chalk.blue("Loading model"),
|
|
252
283
|
success: chalk.blue("Model loaded"),
|
|
253
|
-
fail: chalk.blue("Failed to load model")
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
284
|
+
fail: chalk.blue("Failed to load model"),
|
|
285
|
+
useStatusLogs: debug
|
|
286
|
+
}, async () => {
|
|
287
|
+
try {
|
|
288
|
+
return await llama.loadModel({
|
|
289
|
+
modelPath: resolvedModelPath,
|
|
290
|
+
gpuLayers: gpuLayers != null ? gpuLayers : undefined
|
|
291
|
+
});
|
|
292
|
+
}
|
|
293
|
+
finally {
|
|
294
|
+
if (llama.logLevel === LlamaLogLevel.debug) {
|
|
295
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
296
|
+
console.info();
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
});
|
|
300
|
+
const context = await withOra({
|
|
260
301
|
loading: chalk.blue("Creating context"),
|
|
261
302
|
success: chalk.blue("Context created"),
|
|
262
|
-
fail: chalk.blue("Failed to create context")
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
303
|
+
fail: chalk.blue("Failed to create context"),
|
|
304
|
+
useStatusLogs: debug
|
|
305
|
+
}, async () => {
|
|
306
|
+
try {
|
|
307
|
+
return await model.createContext({
|
|
308
|
+
contextSize: contextSize != null ? contextSize : undefined,
|
|
309
|
+
batchSize: batchSize != null ? batchSize : undefined,
|
|
310
|
+
threads
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
finally {
|
|
314
|
+
if (llama.logLevel === LlamaLogLevel.debug) {
|
|
315
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
316
|
+
console.info();
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
});
|
|
269
320
|
const grammar = jsonSchemaGrammarFilePath != null
|
|
270
321
|
? new LlamaJsonSchemaGrammar(llama, await fs.readJson(path.resolve(process.cwd(), jsonSchemaGrammarFilePath)))
|
|
271
322
|
: grammarArg !== "text"
|
|
272
323
|
? await LlamaGrammar.getFor(llama, grammarArg)
|
|
273
324
|
: undefined;
|
|
274
|
-
const
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
bosString: bos,
|
|
325
|
+
const chatWrapper = resolveChatWrapper({
|
|
326
|
+
type: wrapper,
|
|
327
|
+
bosString: model.tokens.bosString,
|
|
278
328
|
filename: model.filename,
|
|
279
|
-
|
|
280
|
-
|
|
329
|
+
fileInfo: model.fileInfo,
|
|
330
|
+
tokenizer: model.tokenize,
|
|
331
|
+
noJinja
|
|
332
|
+
}) ?? new GeneralChatWrapper();
|
|
333
|
+
const contextSequence = context.getSequence();
|
|
281
334
|
const session = new LlamaChatSession({
|
|
282
|
-
contextSequence
|
|
335
|
+
contextSequence,
|
|
283
336
|
systemPrompt,
|
|
284
337
|
chatWrapper: chatWrapper
|
|
285
338
|
});
|
|
339
|
+
let lastTokenMeterState = contextSequence.tokenMeter.getState();
|
|
286
340
|
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
287
341
|
if (grammarArg != "text" && jsonSchemaGrammarFilePath != null)
|
|
288
342
|
console.warn(chalk.yellow("Both `grammar` and `jsonSchemaGrammarFile` were specified. `jsonSchemaGrammarFile` will be used."));
|
|
289
|
-
console.info(`${chalk.yellow("Context size:")} ${context.contextSize}`);
|
|
290
|
-
if (logBatchSize)
|
|
291
|
-
console.info(`${chalk.yellow("Batch size:")} ${context.batchSize}`);
|
|
292
|
-
console.info(`${chalk.yellow("Train context size:")} ${model.trainContextSize}`);
|
|
293
|
-
console.info(`${chalk.yellow("Model type:")} ${model.typeDescription}`);
|
|
294
|
-
console.info(`${chalk.yellow("BOS:")} ${bos}`);
|
|
295
|
-
console.info(`${chalk.yellow("EOS:")} ${eos}`);
|
|
296
|
-
console.info(`${chalk.yellow("Chat wrapper:")} ${chatWrapper.wrapperName}`);
|
|
297
|
-
console.info(`${chalk.yellow("Repeat penalty:")} ${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`);
|
|
298
|
-
if (repeatFrequencyPenalty != null)
|
|
299
|
-
console.info(`${chalk.yellow("Repeat frequency penalty:")} ${repeatFrequencyPenalty}`);
|
|
300
|
-
if (repeatPresencePenalty != null)
|
|
301
|
-
console.info(`${chalk.yellow("Repeat presence penalty:")} ${repeatPresencePenalty}`);
|
|
302
|
-
if (!penalizeRepeatingNewLine)
|
|
303
|
-
console.info(`${chalk.yellow("Penalize repeating new line:")} disabled`);
|
|
304
|
-
if (jsonSchemaGrammarFilePath != null)
|
|
305
|
-
console.info(`${chalk.yellow("JSON schema grammar file:")} ${path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath))}`);
|
|
306
|
-
else if (grammarArg !== "text")
|
|
307
|
-
console.info(`${chalk.yellow("Grammar:")} ${grammarArg}`);
|
|
308
343
|
if (environmentFunctions && grammar != null) {
|
|
309
344
|
console.warn(chalk.yellow("Environment functions are disabled since a grammar is already specified"));
|
|
310
345
|
environmentFunctions = false;
|
|
311
346
|
}
|
|
347
|
+
const padTitle = "Context".length + 1;
|
|
348
|
+
printCommonInfoLines({
|
|
349
|
+
context,
|
|
350
|
+
minTitleLength: padTitle,
|
|
351
|
+
printBos: true,
|
|
352
|
+
printEos: true,
|
|
353
|
+
logBatchSize,
|
|
354
|
+
tokenMeterEnabled: meter
|
|
355
|
+
});
|
|
356
|
+
printInfoLine({
|
|
357
|
+
title: "Chat",
|
|
358
|
+
padTitle: padTitle,
|
|
359
|
+
info: [{
|
|
360
|
+
title: "Wrapper",
|
|
361
|
+
value: chatWrapper.wrapperName
|
|
362
|
+
}, {
|
|
363
|
+
title: "Repeat penalty",
|
|
364
|
+
value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
|
|
365
|
+
}, {
|
|
366
|
+
show: repeatFrequencyPenalty != null,
|
|
367
|
+
title: "Repeat frequency penalty",
|
|
368
|
+
value: String(repeatFrequencyPenalty)
|
|
369
|
+
}, {
|
|
370
|
+
show: repeatPresencePenalty != null,
|
|
371
|
+
title: "Repeat presence penalty",
|
|
372
|
+
value: String(repeatPresencePenalty)
|
|
373
|
+
}, {
|
|
374
|
+
show: !penalizeRepeatingNewLine,
|
|
375
|
+
title: "Penalize repeating new line",
|
|
376
|
+
value: "disabled"
|
|
377
|
+
}, {
|
|
378
|
+
show: jsonSchemaGrammarFilePath != null,
|
|
379
|
+
title: "JSON schema grammar file",
|
|
380
|
+
value: () => path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath ?? ""))
|
|
381
|
+
}, {
|
|
382
|
+
show: jsonSchemaGrammarFilePath == null && grammarArg !== "text",
|
|
383
|
+
title: "Grammar",
|
|
384
|
+
value: grammarArg
|
|
385
|
+
}, {
|
|
386
|
+
show: environmentFunctions,
|
|
387
|
+
title: "Environment functions",
|
|
388
|
+
value: "enabled"
|
|
389
|
+
}]
|
|
390
|
+
});
|
|
312
391
|
// this is for ora to not interfere with readline
|
|
313
392
|
await new Promise(resolve => setTimeout(resolve, 1));
|
|
314
393
|
const replHistory = await ReplHistory.load(chatCommandHistoryFilePath, !noHistory);
|
|
@@ -324,6 +403,8 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
|
|
|
324
403
|
}
|
|
325
404
|
// eslint-disable-next-line no-constant-condition
|
|
326
405
|
while (true) {
|
|
406
|
+
let hadNoWhitespaceTextInThisIteration = false;
|
|
407
|
+
let nextPrintLeftovers = "";
|
|
327
408
|
const input = initialPrompt != null
|
|
328
409
|
? initialPrompt
|
|
329
410
|
: await getPrompt();
|
|
@@ -357,16 +438,42 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
|
|
|
357
438
|
? undefined
|
|
358
439
|
: maxTokens,
|
|
359
440
|
onToken(chunk) {
|
|
360
|
-
|
|
441
|
+
let text = nextPrintLeftovers + model.detokenize(chunk);
|
|
442
|
+
nextPrintLeftovers = "";
|
|
443
|
+
if (trimWhitespace) {
|
|
444
|
+
if (!hadNoWhitespaceTextInThisIteration) {
|
|
445
|
+
text = text.trimStart();
|
|
446
|
+
if (text.length > 0)
|
|
447
|
+
hadNoWhitespaceTextInThisIteration = true;
|
|
448
|
+
}
|
|
449
|
+
const textWithTrimmedEnd = text.trimEnd();
|
|
450
|
+
if (textWithTrimmedEnd.length < text.length) {
|
|
451
|
+
nextPrintLeftovers = text.slice(textWithTrimmedEnd.length);
|
|
452
|
+
text = textWithTrimmedEnd;
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
process.stdout.write(text);
|
|
361
456
|
},
|
|
362
457
|
functions: (grammar == null && environmentFunctions)
|
|
363
458
|
? defaultEnvironmentFunctions
|
|
364
|
-
: undefined
|
|
459
|
+
: undefined,
|
|
460
|
+
trimWhitespaceSuffix: trimWhitespace
|
|
365
461
|
});
|
|
366
462
|
process.stdout.write(endColor);
|
|
367
463
|
console.log();
|
|
368
|
-
if (printTimings)
|
|
464
|
+
if (printTimings) {
|
|
465
|
+
if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
|
|
466
|
+
llama.logLevel = LlamaLogLevel.info;
|
|
369
467
|
await context.printTimings();
|
|
468
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
469
|
+
llama.logLevel = llamaLogLevel;
|
|
470
|
+
}
|
|
471
|
+
if (meter) {
|
|
472
|
+
const newTokenMeterState = contextSequence.tokenMeter.getState();
|
|
473
|
+
const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
|
|
474
|
+
lastTokenMeterState = newTokenMeterState;
|
|
475
|
+
console.info(`${chalk.dim("Input tokens:")} ${String(tokenMeterDiff.usedInputTokens).padEnd(5, " ")} ${chalk.dim("Output tokens:")} ${tokenMeterDiff.usedOutputTokens}`);
|
|
476
|
+
}
|
|
370
477
|
}
|
|
371
478
|
}
|
|
372
479
|
const defaultEnvironmentFunctions = {
|