node-llama-cpp 3.0.0-beta.9 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -27
- package/bins/_linux-arm64.moved.txt +1 -0
- package/bins/_linux-armv7l.moved.txt +1 -0
- package/bins/_linux-x64-cuda.moved.txt +1 -0
- package/bins/_linux-x64-vulkan.moved.txt +1 -0
- package/bins/_linux-x64.moved.txt +1 -0
- package/bins/_mac-arm64-metal.moved.txt +1 -0
- package/bins/_mac-x64.moved.txt +1 -0
- package/bins/_win-arm64.moved.txt +1 -0
- package/bins/_win-x64-cuda.moved.txt +1 -0
- package/bins/_win-x64-vulkan.moved.txt +1 -0
- package/bins/_win-x64.moved.txt +1 -0
- package/dist/ChatWrapper.d.ts +19 -39
- package/dist/ChatWrapper.js +129 -72
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsIndex.d.ts +1 -0
- package/dist/apiDocsIndex.js +7 -0
- package/dist/apiDocsIndex.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +88 -20
- package/dist/bindings/Llama.d.ts +43 -3
- package/dist/bindings/Llama.js +193 -23
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +13 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +123 -18
- package/dist/bindings/getLlama.js +264 -75
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +29 -5
- package/dist/bindings/types.js +51 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
- package/dist/bindings/utils/compileLLamaCpp.js +250 -81
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -1
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +100 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
- package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/FalconChatWrapper.js +39 -21
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
- package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
- package/dist/chatWrappers/MistralChatWrapper.js +169 -0
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
- package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
- package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
- package/dist/chatWrappers/utils/jsonDumps.js +18 -0
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +19 -11
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +16 -7
- package/dist/cli/commands/ChatCommand.js +321 -190
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +31 -0
- package/dist/cli/commands/CompleteCommand.js +402 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/InfillCommand.d.ts +33 -0
- package/dist/cli/commands/InfillCommand.js +438 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +6 -2
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +13 -0
- package/dist/cli/commands/PullCommand.js +158 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +21 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
- package/dist/cli/commands/source/SourceCommand.js +19 -0
- package/dist/cli/commands/source/SourceCommand.js.map +1 -0
- package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
- package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
- package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
- package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
- package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
- package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
- package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
- package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
- package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +585 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
- package/dist/cli/utils/interactivelyAskForModel.js +450 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +82 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +4 -3
- package/dist/commands.js +6 -3
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +35 -4
- package/dist/config.js +58 -17
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +4 -0
- package/dist/consts.js +11 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
- package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +168 -0
- package/dist/evaluator/LlamaCompletion.js +470 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +62 -21
- package/dist/evaluator/LlamaContext/LlamaContext.js +501 -120
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +177 -16
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
- package/dist/evaluator/LlamaEmbedding.js +53 -0
- package/dist/evaluator/LlamaEmbedding.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
- package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +16 -13
- package/dist/evaluator/LlamaGrammar.js +17 -10
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
- package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +34 -0
- package/dist/evaluator/TokenBias.js +65 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +45 -0
- package/dist/evaluator/TokenMeter.js +74 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +4 -0
- package/dist/gguf/consts.js +12 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +50 -0
- package/dist/gguf/insights/GgufInsights.js +401 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
- package/dist/gguf/parser/GgufV2Parser.js +156 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +61 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +45 -0
- package/dist/gguf/readGgufFileInfo.js +71 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
- package/dist/gguf/types/GgufMetadataTypes.js +114 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +39 -14
- package/dist/index.js +29 -8
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/dist/types.d.ts +131 -5
- package/dist/types.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +73 -26
- package/dist/utils/LlamaText.js +475 -157
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +12 -0
- package/dist/utils/LruCache.js +44 -0
- package/dist/utils/LruCache.js.map +1 -0
- package/dist/utils/OverridesObject.d.ts +7 -0
- package/dist/utils/OverridesObject.js +2 -0
- package/dist/utils/OverridesObject.js.map +1 -0
- package/dist/utils/ReplHistory.js +5 -1
- package/dist/utils/ReplHistory.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +27 -8
- package/dist/utils/StopGenerationDetector.js +108 -22
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/ThreadsSplitter.d.ts +32 -0
- package/dist/utils/ThreadsSplitter.js +177 -0
- package/dist/utils/ThreadsSplitter.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +10 -4
- package/dist/utils/TokenStreamRegulator.js +102 -10
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/clearTempFolder.js.map +1 -1
- package/dist/utils/cmake.js +23 -10
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +1 -1
- package/dist/utils/compareTokens.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +199 -0
- package/dist/utils/createModelDownloader.js +405 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/types.d.ts +3 -0
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
- package/dist/utils/getConsoleLogPrefix.js +5 -4
- package/dist/utils/getConsoleLogPrefix.js.map +1 -1
- package/dist/utils/getGrammarsFolder.js +1 -1
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +68 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/modelFileAccesTokens.d.ts +4 -0
- package/dist/utils/modelFileAccesTokens.js +40 -0
- package/dist/utils/modelFileAccesTokens.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -1
- package/dist/utils/prettyPrintObject.js +61 -15
- package/dist/utils/prettyPrintObject.js.map +1 -1
- package/dist/utils/pushAll.d.ts +6 -0
- package/dist/utils/pushAll.js +11 -0
- package/dist/utils/pushAll.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -2
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -2
- package/dist/utils/resolveGithubRelease.js.map +1 -1
- package/dist/utils/resolveLastTokens.d.ts +2 -0
- package/dist/utils/resolveLastTokens.js +12 -0
- package/dist/utils/resolveLastTokens.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/safeEventCallback.d.ts +6 -0
- package/dist/utils/safeEventCallback.js +29 -0
- package/dist/utils/safeEventCallback.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -2
- package/dist/utils/spawnCommand.js +55 -7
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +1 -1
- package/dist/utils/tokenizeInput.js +6 -3
- package/dist/utils/tokenizeInput.js.map +1 -1
- package/dist/utils/transformPromisable.d.ts +40 -0
- package/dist/utils/transformPromisable.js +64 -0
- package/dist/utils/transformPromisable.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
- package/dist/utils/truncateTextAndRoundToWords.js +32 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -1
- package/dist/utils/withLockfile.js.map +1 -1
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +16 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +22 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.js +1 -1
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/dist/utils/wrapAbortSignal.d.ts +1 -0
- package/dist/utils/wrapAbortSignal.js +9 -0
- package/dist/utils/wrapAbortSignal.js.map +1 -0
- package/llama/CMakeLists.txt +134 -5
- package/llama/addon/AddonContext.cpp +629 -0
- package/llama/addon/AddonContext.h +52 -0
- package/llama/addon/AddonGrammar.cpp +39 -0
- package/llama/addon/AddonGrammar.h +19 -0
- package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
- package/llama/addon/AddonGrammarEvaluationState.h +17 -0
- package/llama/addon/AddonModel.cpp +672 -0
- package/llama/addon/AddonModel.h +61 -0
- package/llama/addon/AddonModelData.cpp +25 -0
- package/llama/addon/AddonModelData.h +15 -0
- package/llama/addon/AddonModelLora.cpp +105 -0
- package/llama/addon/AddonModelLora.h +28 -0
- package/llama/addon/AddonSampler.cpp +513 -0
- package/llama/addon/AddonSampler.h +65 -0
- package/llama/addon/RingBuffer.h +109 -0
- package/llama/addon/addon.cpp +223 -0
- package/llama/addon/addonGlobals.cpp +22 -0
- package/llama/addon/addonGlobals.h +12 -0
- package/llama/addon/globals/addonLog.cpp +136 -0
- package/llama/addon/globals/addonLog.h +21 -0
- package/llama/addon/globals/addonProgress.cpp +15 -0
- package/llama/addon/globals/addonProgress.h +15 -0
- package/llama/addon/globals/getGpuInfo.cpp +108 -0
- package/llama/addon/globals/getGpuInfo.h +6 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +297 -6
- package/llama/grammars/json.gbnf +4 -4
- package/llama/grammars/json_arr.gbnf +4 -4
- package/llama/llama.cpp.info.json +1 -1
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/package.json +109 -59
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/cli/commands/BuildCommand.d.ts +0 -11
- package/dist/cli/commands/BuildCommand.js +0 -106
- package/dist/cli/commands/BuildCommand.js.map +0 -1
- package/dist/cli/commands/ClearCommand.js.map +0 -1
- package/dist/cli/commands/DownloadCommand.js +0 -169
- package/dist/cli/commands/DownloadCommand.js.map +0 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/evaluator/LlamaModel.d.ts +0 -120
- package/dist/evaluator/LlamaModel.js +0 -320
- package/dist/evaluator/LlamaModel.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/llama/addon.cpp +0 -950
- package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/.buildMetadata.json +0 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/.buildMetadata.json +0 -1
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
|
@@ -3,231 +3,272 @@ import process from "process";
|
|
|
3
3
|
import path from "path";
|
|
4
4
|
import chalk from "chalk";
|
|
5
5
|
import fs from "fs-extra";
|
|
6
|
-
import { chatCommandHistoryFilePath, defaultChatSystemPrompt } from "../../config.js";
|
|
7
|
-
import { LlamaChatWrapper } from "../../chatWrappers/LlamaChatWrapper.js";
|
|
8
|
-
import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
|
|
9
|
-
import { ChatMLChatWrapper } from "../../chatWrappers/ChatMLChatWrapper.js";
|
|
10
|
-
import { resolveChatWrapperBasedOnModel } from "../../chatWrappers/resolveChatWrapperBasedOnModel.js";
|
|
11
|
-
import { FalconChatWrapper } from "../../chatWrappers/FalconChatWrapper.js";
|
|
6
|
+
import { chatCommandHistoryFilePath, defaultChatSystemPrompt, documentationPageUrls } from "../../config.js";
|
|
12
7
|
import { getIsInDocumentationMode } from "../../state.js";
|
|
13
8
|
import { ReplHistory } from "../../utils/ReplHistory.js";
|
|
14
|
-
import withStatusLogs from "../../utils/withStatusLogs.js";
|
|
15
|
-
import { AlpacaChatWrapper } from "../../chatWrappers/AlpacaChatWrapper.js";
|
|
16
|
-
import { FunctionaryChatWrapper } from "../../chatWrappers/FunctionaryChatWrapper.js";
|
|
17
9
|
import { defineChatSessionFunction } from "../../evaluator/LlamaChatSession/utils/defineChatSessionFunction.js";
|
|
18
10
|
import { getLlama } from "../../bindings/getLlama.js";
|
|
19
11
|
import { LlamaGrammar } from "../../evaluator/LlamaGrammar.js";
|
|
20
12
|
import { LlamaChatSession } from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
|
|
21
|
-
import { LlamaModel } from "../../evaluator/LlamaModel.js";
|
|
22
|
-
import { LlamaContext } from "../../evaluator/LlamaContext/LlamaContext.js";
|
|
23
13
|
import { LlamaJsonSchemaGrammar } from "../../evaluator/LlamaJsonSchemaGrammar.js";
|
|
24
|
-
import { LlamaLogLevel } from "../../bindings/types.js";
|
|
25
|
-
|
|
14
|
+
import { LlamaLogLevel, LlamaLogLevelGreaterThan, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption } from "../../bindings/types.js";
|
|
15
|
+
import withOra from "../../utils/withOra.js";
|
|
16
|
+
import { TokenMeter } from "../../evaluator/TokenMeter.js";
|
|
17
|
+
import { printInfoLine } from "../utils/printInfoLine.js";
|
|
18
|
+
import { resolveChatWrapper, specializedChatWrapperTypeNames } from "../../chatWrappers/utils/resolveChatWrapper.js";
|
|
19
|
+
import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
|
|
20
|
+
import { printCommonInfoLines } from "../utils/printCommonInfoLines.js";
|
|
21
|
+
import { resolveCommandGgufPath } from "../utils/resolveCommandGgufPath.js";
|
|
22
|
+
import { withProgressLog } from "../../utils/withProgressLog.js";
|
|
23
|
+
import { resolveHeaderFlag } from "../utils/resolveHeaderFlag.js";
|
|
24
|
+
import { withCliCommandDescriptionDocsUrl } from "../utils/withCliCommandDescriptionDocsUrl.js";
|
|
25
|
+
import { ConsoleInteraction, ConsoleInteractionKey } from "../utils/ConsoleInteraction.js";
|
|
26
26
|
export const ChatCommand = {
|
|
27
|
-
command: "chat",
|
|
28
|
-
describe: "Chat with a
|
|
27
|
+
command: "chat [modelPath]",
|
|
28
|
+
describe: withCliCommandDescriptionDocsUrl("Chat with a model", documentationPageUrls.CLI.Chat),
|
|
29
29
|
builder(yargs) {
|
|
30
30
|
const isInDocumentationMode = getIsInDocumentationMode();
|
|
31
31
|
return yargs
|
|
32
|
-
.option("
|
|
33
|
-
alias: "m",
|
|
32
|
+
.option("modelPath", {
|
|
33
|
+
alias: ["m", "model", "path", "url"],
|
|
34
|
+
type: "string",
|
|
35
|
+
description: "Model file to use for the chat. Can be a path to a local file or a URL of a model file to download. Leave empty to choose from a list of recommended models"
|
|
36
|
+
})
|
|
37
|
+
.option("header", {
|
|
38
|
+
alias: ["H"],
|
|
34
39
|
type: "string",
|
|
35
|
-
|
|
36
|
-
description: "
|
|
37
|
-
|
|
40
|
+
array: true,
|
|
41
|
+
description: "Headers to use when downloading a model from a URL, in the format `key: value`. You can pass this option multiple times to add multiple headers."
|
|
42
|
+
})
|
|
43
|
+
.option("gpu", {
|
|
44
|
+
type: "string",
|
|
45
|
+
// yargs types don't support passing `false` as a choice, although it is supported by yargs
|
|
46
|
+
choices: nodeLlamaCppGpuOptions,
|
|
47
|
+
coerce: (value) => {
|
|
48
|
+
if (value == null || value == "")
|
|
49
|
+
return undefined;
|
|
50
|
+
return parseNodeLlamaCppGpuOption(value);
|
|
51
|
+
},
|
|
52
|
+
defaultDescription: "Uses the latest local build, and fallbacks to \"auto\"",
|
|
53
|
+
description: "Compute layer implementation type to use for llama.cpp. If omitted, uses the latest local build, and fallbacks to \"auto\""
|
|
38
54
|
})
|
|
39
55
|
.option("systemInfo", {
|
|
40
56
|
alias: "i",
|
|
41
57
|
type: "boolean",
|
|
42
58
|
default: false,
|
|
43
|
-
description: "Print llama.cpp system info"
|
|
44
|
-
group: "Optional:"
|
|
59
|
+
description: "Print llama.cpp system info"
|
|
45
60
|
})
|
|
46
61
|
.option("systemPrompt", {
|
|
47
62
|
alias: "s",
|
|
48
63
|
type: "string",
|
|
49
|
-
default: defaultChatSystemPrompt,
|
|
50
|
-
defaultDescription: " ",
|
|
51
64
|
description: "System prompt to use against the model" +
|
|
52
|
-
(isInDocumentationMode ? "" : (". [default value: " + defaultChatSystemPrompt.split("\n").join(" ") + "]"))
|
|
53
|
-
group: "Optional:"
|
|
65
|
+
(isInDocumentationMode ? "" : (". [the default value is determined by the chat wrapper, but is usually: " + defaultChatSystemPrompt.split("\n").join(" ") + "]"))
|
|
54
66
|
})
|
|
55
67
|
.option("systemPromptFile", {
|
|
56
68
|
type: "string",
|
|
57
|
-
description: "Path to a file to load text from and use as as the model system prompt"
|
|
58
|
-
group: "Optional:"
|
|
69
|
+
description: "Path to a file to load text from and use as as the model system prompt"
|
|
59
70
|
})
|
|
60
71
|
.option("prompt", {
|
|
61
72
|
type: "string",
|
|
62
|
-
description: "First prompt to automatically send to the model when starting the chat"
|
|
63
|
-
group: "Optional:"
|
|
73
|
+
description: "First prompt to automatically send to the model when starting the chat"
|
|
64
74
|
})
|
|
65
75
|
.option("promptFile", {
|
|
66
76
|
type: "string",
|
|
67
|
-
description: "Path to a file to load text from and use as a first prompt to automatically send to the model when starting the chat"
|
|
68
|
-
group: "Optional:"
|
|
77
|
+
description: "Path to a file to load text from and use as a first prompt to automatically send to the model when starting the chat"
|
|
69
78
|
})
|
|
70
79
|
.option("wrapper", {
|
|
71
80
|
alias: "w",
|
|
72
81
|
type: "string",
|
|
73
82
|
default: "auto",
|
|
74
|
-
choices:
|
|
75
|
-
description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token"
|
|
76
|
-
|
|
83
|
+
choices: ["auto", ...specializedChatWrapperTypeNames],
|
|
84
|
+
description: "Chat wrapper to use. Use `auto` to automatically select a wrapper based on the model's BOS token"
|
|
85
|
+
})
|
|
86
|
+
.option("noJinja", {
|
|
87
|
+
type: "boolean",
|
|
88
|
+
default: false,
|
|
89
|
+
description: "Don't use a Jinja wrapper, even if it's the best option for the model"
|
|
77
90
|
})
|
|
78
91
|
.option("contextSize", {
|
|
79
92
|
alias: "c",
|
|
80
93
|
type: "number",
|
|
81
|
-
default: 1024 * 4,
|
|
82
94
|
description: "Context size to use for the model context",
|
|
83
|
-
|
|
95
|
+
default: -1,
|
|
96
|
+
defaultDescription: "Automatically determined based on the available VRAM"
|
|
84
97
|
})
|
|
85
98
|
.option("batchSize", {
|
|
86
99
|
alias: "b",
|
|
87
100
|
type: "number",
|
|
88
|
-
description: "Batch size to use for the model context. The default value is the context size"
|
|
89
|
-
|
|
101
|
+
description: "Batch size to use for the model context. The default value is the context size"
|
|
102
|
+
})
|
|
103
|
+
.option("flashAttention", {
|
|
104
|
+
alias: "fa",
|
|
105
|
+
type: "boolean",
|
|
106
|
+
default: false,
|
|
107
|
+
description: "Enable flash attention"
|
|
108
|
+
})
|
|
109
|
+
.option("noTrimWhitespace", {
|
|
110
|
+
type: "boolean",
|
|
111
|
+
alias: ["noTrim"],
|
|
112
|
+
default: false,
|
|
113
|
+
description: "Don't trim whitespaces from the model response"
|
|
90
114
|
})
|
|
91
115
|
.option("grammar", {
|
|
92
116
|
alias: "g",
|
|
93
117
|
type: "string",
|
|
94
118
|
default: "text",
|
|
95
119
|
choices: ["text", "json", "list", "arithmetic", "japanese", "chess"],
|
|
96
|
-
description: "Restrict the model response to a specific grammar, like JSON for example"
|
|
97
|
-
group: "Optional:"
|
|
120
|
+
description: "Restrict the model response to a specific grammar, like JSON for example"
|
|
98
121
|
})
|
|
99
122
|
.option("jsonSchemaGrammarFile", {
|
|
100
123
|
alias: ["jsgf"],
|
|
101
124
|
type: "string",
|
|
102
|
-
description: "File path to a JSON schema file, to restrict the model response to only generate output that conforms to the JSON schema"
|
|
103
|
-
group: "Optional:"
|
|
125
|
+
description: "File path to a JSON schema file, to restrict the model response to only generate output that conforms to the JSON schema"
|
|
104
126
|
})
|
|
105
127
|
.option("threads", {
|
|
106
128
|
type: "number",
|
|
107
|
-
|
|
108
|
-
description: "Number of threads to use for the evaluation of tokens"
|
|
109
|
-
group: "Optional:"
|
|
129
|
+
defaultDescription: "Number of cores that are useful for math on the current machine",
|
|
130
|
+
description: "Number of threads to use for the evaluation of tokens"
|
|
110
131
|
})
|
|
111
132
|
.option("temperature", {
|
|
112
133
|
alias: "t",
|
|
113
134
|
type: "number",
|
|
114
135
|
default: 0,
|
|
115
|
-
description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable."
|
|
116
|
-
|
|
136
|
+
description: "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The suggested temperature is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run. Set to `0` to disable."
|
|
137
|
+
})
|
|
138
|
+
.option("minP", {
|
|
139
|
+
alias: "mp",
|
|
140
|
+
type: "number",
|
|
141
|
+
default: 0,
|
|
142
|
+
description: "From the next token candidates, discard the percentage of tokens with the lowest probability. For example, if set to `0.05`, 5% of the lowest probability tokens will be discarded. This is useful for generating more high-quality results when using a high temperature. Set to a value between `0` and `1` to enable. Only relevant when `temperature` is set to a value greater than `0`."
|
|
117
143
|
})
|
|
118
144
|
.option("topK", {
|
|
119
145
|
alias: "k",
|
|
120
146
|
type: "number",
|
|
121
147
|
default: 40,
|
|
122
|
-
description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0."
|
|
123
|
-
group: "Optional:"
|
|
148
|
+
description: "Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation. An integer number between `1` and the size of the vocabulary. Set to `0` to disable (which uses the full vocabulary). Only relevant when `temperature` is set to a value greater than 0."
|
|
124
149
|
})
|
|
125
150
|
.option("topP", {
|
|
126
151
|
alias: "p",
|
|
127
152
|
type: "number",
|
|
128
153
|
default: 0.95,
|
|
129
|
-
description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`."
|
|
130
|
-
|
|
154
|
+
description: "Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P, and samples the next token only from this set. A float number between `0` and `1`. Set to `1` to disable. Only relevant when `temperature` is set to a value greater than `0`."
|
|
155
|
+
})
|
|
156
|
+
.option("seed", {
|
|
157
|
+
type: "number",
|
|
158
|
+
description: "Used to control the randomness of the generated text. Only relevant when using `temperature`.",
|
|
159
|
+
defaultDescription: "The current epoch time"
|
|
131
160
|
})
|
|
132
161
|
.option("gpuLayers", {
|
|
133
162
|
alias: "gl",
|
|
134
163
|
type: "number",
|
|
135
164
|
description: "number of layers to store in VRAM",
|
|
136
|
-
|
|
165
|
+
default: -1,
|
|
166
|
+
defaultDescription: "Automatically determined based on the available VRAM"
|
|
137
167
|
})
|
|
138
168
|
.option("repeatPenalty", {
|
|
139
169
|
alias: "rp",
|
|
140
170
|
type: "number",
|
|
141
171
|
default: 1.1,
|
|
142
|
-
description: "Prevent the model from repeating the same token too much. Set to `1` to disable."
|
|
143
|
-
group: "Optional:"
|
|
172
|
+
description: "Prevent the model from repeating the same token too much. Set to `1` to disable."
|
|
144
173
|
})
|
|
145
174
|
.option("lastTokensRepeatPenalty", {
|
|
146
175
|
alias: "rpn",
|
|
147
176
|
type: "number",
|
|
148
177
|
default: 64,
|
|
149
|
-
description: "Number of recent tokens generated by the model to apply penalties to repetition of"
|
|
150
|
-
group: "Optional:"
|
|
178
|
+
description: "Number of recent tokens generated by the model to apply penalties to repetition of"
|
|
151
179
|
})
|
|
152
180
|
.option("penalizeRepeatingNewLine", {
|
|
153
181
|
alias: "rpnl",
|
|
154
182
|
type: "boolean",
|
|
155
183
|
default: true,
|
|
156
|
-
description: "Penalize new line tokens. set
|
|
157
|
-
group: "Optional:"
|
|
184
|
+
description: "Penalize new line tokens. set `--no-penalizeRepeatingNewLine` or `--no-rpnl` to disable"
|
|
158
185
|
})
|
|
159
186
|
.option("repeatFrequencyPenalty", {
|
|
160
187
|
alias: "rfp",
|
|
161
188
|
type: "number",
|
|
162
|
-
description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable."
|
|
163
|
-
group: "Optional:"
|
|
189
|
+
description: "For n time a token is in the `punishTokens` array, lower its probability by `n * repeatFrequencyPenalty`. Set to a value between `0` and `1` to enable."
|
|
164
190
|
})
|
|
165
191
|
.option("repeatPresencePenalty", {
|
|
166
192
|
alias: "rpp",
|
|
167
193
|
type: "number",
|
|
168
|
-
description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable."
|
|
169
|
-
group: "Optional:"
|
|
194
|
+
description: "Lower the probability of all the tokens in the `punishTokens` array by `repeatPresencePenalty`. Set to a value between `0` and `1` to enable."
|
|
170
195
|
})
|
|
171
196
|
.option("maxTokens", {
|
|
172
197
|
alias: "mt",
|
|
173
198
|
type: "number",
|
|
174
199
|
default: 0,
|
|
175
|
-
description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
|
|
176
|
-
group: "Optional:"
|
|
200
|
+
description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size"
|
|
177
201
|
})
|
|
178
202
|
.option("noHistory", {
|
|
179
203
|
alias: "nh",
|
|
180
204
|
type: "boolean",
|
|
181
205
|
default: false,
|
|
182
|
-
description: "Don't load or save chat history"
|
|
183
|
-
group: "Optional:"
|
|
206
|
+
description: "Don't load or save chat history"
|
|
184
207
|
})
|
|
185
208
|
.option("environmentFunctions", {
|
|
186
209
|
alias: "ef",
|
|
187
210
|
type: "boolean",
|
|
188
211
|
default: false,
|
|
189
|
-
description: "Provide access to environment functions like `getDate` and `getTime`"
|
|
190
|
-
|
|
212
|
+
description: "Provide access to environment functions like `getDate` and `getTime`"
|
|
213
|
+
})
|
|
214
|
+
.option("debug", {
|
|
215
|
+
alias: "d",
|
|
216
|
+
type: "boolean",
|
|
217
|
+
default: false,
|
|
218
|
+
description: "Print llama.cpp info and debug logs"
|
|
191
219
|
})
|
|
192
|
-
.option("
|
|
193
|
-
alias: "nl",
|
|
220
|
+
.option("meter", {
|
|
194
221
|
type: "boolean",
|
|
195
222
|
default: false,
|
|
196
|
-
description: "
|
|
197
|
-
group: "Optional:"
|
|
223
|
+
description: "Print how many tokens were used as input and output for each response"
|
|
198
224
|
})
|
|
199
225
|
.option("printTimings", {
|
|
200
226
|
alias: "pt",
|
|
201
227
|
type: "boolean",
|
|
202
228
|
default: false,
|
|
203
|
-
description: "Print llama.cpp timings after each response"
|
|
204
|
-
group: "Optional:"
|
|
229
|
+
description: "Print llama.cpp timings after each response"
|
|
205
230
|
});
|
|
206
231
|
},
|
|
207
|
-
async handler({
|
|
232
|
+
async handler({ modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, seed, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
|
|
208
233
|
try {
|
|
209
234
|
await RunChat({
|
|
210
|
-
|
|
211
|
-
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
|
|
212
|
-
repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
|
|
213
|
-
noHistory, environmentFunctions,
|
|
235
|
+
modelPath, header, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize,
|
|
236
|
+
batchSize, flashAttention, noTrimWhitespace, grammar, jsonSchemaGrammarFile, threads, temperature, minP, topK, topP, seed,
|
|
237
|
+
gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty,
|
|
238
|
+
maxTokens, noHistory, environmentFunctions, debug, meter, printTimings
|
|
214
239
|
});
|
|
215
240
|
}
|
|
216
241
|
catch (err) {
|
|
242
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
217
243
|
console.error(err);
|
|
218
244
|
process.exit(1);
|
|
219
245
|
}
|
|
220
246
|
}
|
|
221
247
|
};
|
|
222
|
-
async function RunChat({
|
|
223
|
-
if (
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
248
|
+
async function RunChat({ modelPath: modelArg, header: headerArg, gpu, systemInfo, systemPrompt, systemPromptFile, prompt, promptFile, wrapper, noJinja, contextSize, batchSize, flashAttention, noTrimWhitespace, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, minP, topK, topP, seed, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, environmentFunctions, debug, meter, printTimings }) {
|
|
249
|
+
if (contextSize === -1)
|
|
250
|
+
contextSize = undefined;
|
|
251
|
+
if (gpuLayers === -1)
|
|
252
|
+
gpuLayers = undefined;
|
|
253
|
+
const headers = resolveHeaderFlag(headerArg);
|
|
254
|
+
const trimWhitespace = !noTrimWhitespace;
|
|
255
|
+
if (debug)
|
|
256
|
+
console.info(`${chalk.yellow("Log level:")} debug`);
|
|
257
|
+
const llamaLogLevel = debug
|
|
258
|
+
? LlamaLogLevel.debug
|
|
259
|
+
: LlamaLogLevel.warn;
|
|
260
|
+
const llama = gpu == null
|
|
261
|
+
? await getLlama("lastBuild", {
|
|
262
|
+
logLevel: llamaLogLevel
|
|
263
|
+
})
|
|
264
|
+
: await getLlama({
|
|
265
|
+
gpu,
|
|
266
|
+
logLevel: llamaLogLevel
|
|
267
|
+
});
|
|
230
268
|
const logBatchSize = batchSize != null;
|
|
269
|
+
const resolvedModelPath = await resolveCommandGgufPath(modelArg, llama, headers, {
|
|
270
|
+
flashAttention
|
|
271
|
+
});
|
|
231
272
|
if (systemInfo)
|
|
232
273
|
console.log(llama.systemInfo);
|
|
233
274
|
if (systemPromptFile != null && systemPromptFile !== "") {
|
|
@@ -240,75 +281,140 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
|
|
|
240
281
|
console.warn(chalk.yellow("Both `prompt` and `promptFile` were specified. `promptFile` will be used."));
|
|
241
282
|
prompt = await fs.readFile(path.resolve(process.cwd(), promptFile), "utf8");
|
|
242
283
|
}
|
|
243
|
-
if (batchSize
|
|
244
|
-
batchSize = contextSize;
|
|
245
|
-
else if (batchSize > contextSize) {
|
|
284
|
+
if (batchSize != null && contextSize != null && batchSize > contextSize) {
|
|
246
285
|
console.warn(chalk.yellow("Batch size is greater than the context size. Batch size will be set to the context size."));
|
|
247
286
|
batchSize = contextSize;
|
|
248
287
|
}
|
|
249
288
|
let initialPrompt = prompt ?? null;
|
|
250
|
-
const model = await
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
289
|
+
const model = await withProgressLog({
|
|
290
|
+
loadingText: chalk.blue.bold("Loading model"),
|
|
291
|
+
successText: chalk.blue("Model loaded"),
|
|
292
|
+
failText: chalk.blue("Failed to load model"),
|
|
293
|
+
liveUpdates: !debug,
|
|
294
|
+
noProgress: debug,
|
|
295
|
+
liveCtrlCSendsAbortSignal: true
|
|
296
|
+
}, async (progressUpdater) => {
|
|
297
|
+
try {
|
|
298
|
+
return await llama.loadModel({
|
|
299
|
+
modelPath: resolvedModelPath,
|
|
300
|
+
gpuLayers: gpuLayers != null
|
|
301
|
+
? gpuLayers
|
|
302
|
+
: contextSize != null
|
|
303
|
+
? { fitContext: { contextSize } }
|
|
304
|
+
: undefined,
|
|
305
|
+
defaultContextFlashAttention: flashAttention,
|
|
306
|
+
ignoreMemorySafetyChecks: gpuLayers != null,
|
|
307
|
+
onLoadProgress(loadProgress) {
|
|
308
|
+
progressUpdater.setProgress(loadProgress);
|
|
309
|
+
},
|
|
310
|
+
loadSignal: progressUpdater.abortSignal
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
catch (err) {
|
|
314
|
+
if (err === progressUpdater.abortSignal?.reason)
|
|
315
|
+
process.exit(0);
|
|
316
|
+
throw err;
|
|
317
|
+
}
|
|
318
|
+
finally {
|
|
319
|
+
if (llama.logLevel === LlamaLogLevel.debug) {
|
|
320
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
321
|
+
console.info();
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
});
|
|
325
|
+
const context = await withOra({
|
|
260
326
|
loading: chalk.blue("Creating context"),
|
|
261
327
|
success: chalk.blue("Context created"),
|
|
262
|
-
fail: chalk.blue("Failed to create context")
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
328
|
+
fail: chalk.blue("Failed to create context"),
|
|
329
|
+
useStatusLogs: debug
|
|
330
|
+
}, async () => {
|
|
331
|
+
try {
|
|
332
|
+
return await model.createContext({
|
|
333
|
+
contextSize: contextSize != null ? contextSize : undefined,
|
|
334
|
+
batchSize: batchSize != null ? batchSize : undefined,
|
|
335
|
+
threads: threads === null ? undefined : threads,
|
|
336
|
+
ignoreMemorySafetyChecks: gpuLayers != null || contextSize != null,
|
|
337
|
+
performanceTracking: printTimings
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
finally {
|
|
341
|
+
if (llama.logLevel === LlamaLogLevel.debug) {
|
|
342
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
343
|
+
console.info();
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
});
|
|
269
347
|
const grammar = jsonSchemaGrammarFilePath != null
|
|
270
348
|
? new LlamaJsonSchemaGrammar(llama, await fs.readJson(path.resolve(process.cwd(), jsonSchemaGrammarFilePath)))
|
|
271
349
|
: grammarArg !== "text"
|
|
272
350
|
? await LlamaGrammar.getFor(llama, grammarArg)
|
|
273
351
|
: undefined;
|
|
274
|
-
const
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
bosString: bos,
|
|
352
|
+
const chatWrapper = resolveChatWrapper({
|
|
353
|
+
type: wrapper,
|
|
354
|
+
bosString: model.tokens.bosString,
|
|
278
355
|
filename: model.filename,
|
|
279
|
-
|
|
280
|
-
|
|
356
|
+
fileInfo: model.fileInfo,
|
|
357
|
+
tokenizer: model.tokenizer,
|
|
358
|
+
noJinja
|
|
359
|
+
}) ?? new GeneralChatWrapper();
|
|
360
|
+
const contextSequence = context.getSequence();
|
|
281
361
|
const session = new LlamaChatSession({
|
|
282
|
-
contextSequence
|
|
362
|
+
contextSequence,
|
|
283
363
|
systemPrompt,
|
|
284
364
|
chatWrapper: chatWrapper
|
|
285
365
|
});
|
|
366
|
+
let lastTokenMeterState = contextSequence.tokenMeter.getState();
|
|
286
367
|
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
287
368
|
if (grammarArg != "text" && jsonSchemaGrammarFilePath != null)
|
|
288
369
|
console.warn(chalk.yellow("Both `grammar` and `jsonSchemaGrammarFile` were specified. `jsonSchemaGrammarFile` will be used."));
|
|
289
|
-
console.info(`${chalk.yellow("Context size:")} ${context.contextSize}`);
|
|
290
|
-
if (logBatchSize)
|
|
291
|
-
console.info(`${chalk.yellow("Batch size:")} ${context.batchSize}`);
|
|
292
|
-
console.info(`${chalk.yellow("Train context size:")} ${model.trainContextSize}`);
|
|
293
|
-
console.info(`${chalk.yellow("Model type:")} ${model.typeDescription}`);
|
|
294
|
-
console.info(`${chalk.yellow("BOS:")} ${bos}`);
|
|
295
|
-
console.info(`${chalk.yellow("EOS:")} ${eos}`);
|
|
296
|
-
console.info(`${chalk.yellow("Chat wrapper:")} ${chatWrapper.wrapperName}`);
|
|
297
|
-
console.info(`${chalk.yellow("Repeat penalty:")} ${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`);
|
|
298
|
-
if (repeatFrequencyPenalty != null)
|
|
299
|
-
console.info(`${chalk.yellow("Repeat frequency penalty:")} ${repeatFrequencyPenalty}`);
|
|
300
|
-
if (repeatPresencePenalty != null)
|
|
301
|
-
console.info(`${chalk.yellow("Repeat presence penalty:")} ${repeatPresencePenalty}`);
|
|
302
|
-
if (!penalizeRepeatingNewLine)
|
|
303
|
-
console.info(`${chalk.yellow("Penalize repeating new line:")} disabled`);
|
|
304
|
-
if (jsonSchemaGrammarFilePath != null)
|
|
305
|
-
console.info(`${chalk.yellow("JSON schema grammar file:")} ${path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath))}`);
|
|
306
|
-
else if (grammarArg !== "text")
|
|
307
|
-
console.info(`${chalk.yellow("Grammar:")} ${grammarArg}`);
|
|
308
370
|
if (environmentFunctions && grammar != null) {
|
|
309
371
|
console.warn(chalk.yellow("Environment functions are disabled since a grammar is already specified"));
|
|
310
372
|
environmentFunctions = false;
|
|
311
373
|
}
|
|
374
|
+
const padTitle = "Context".length + 1;
|
|
375
|
+
await printCommonInfoLines({
|
|
376
|
+
context,
|
|
377
|
+
minTitleLength: padTitle,
|
|
378
|
+
printBos: true,
|
|
379
|
+
printEos: true,
|
|
380
|
+
logBatchSize,
|
|
381
|
+
tokenMeterEnabled: meter
|
|
382
|
+
});
|
|
383
|
+
printInfoLine({
|
|
384
|
+
title: "Chat",
|
|
385
|
+
padTitle: padTitle,
|
|
386
|
+
info: [{
|
|
387
|
+
title: "Wrapper",
|
|
388
|
+
value: chatWrapper.wrapperName
|
|
389
|
+
}, {
|
|
390
|
+
title: "Repeat penalty",
|
|
391
|
+
value: `${repeatPenalty} (apply to last ${lastTokensRepeatPenalty} tokens)`
|
|
392
|
+
}, {
|
|
393
|
+
show: repeatFrequencyPenalty != null,
|
|
394
|
+
title: "Repeat frequency penalty",
|
|
395
|
+
value: String(repeatFrequencyPenalty)
|
|
396
|
+
}, {
|
|
397
|
+
show: repeatPresencePenalty != null,
|
|
398
|
+
title: "Repeat presence penalty",
|
|
399
|
+
value: String(repeatPresencePenalty)
|
|
400
|
+
}, {
|
|
401
|
+
show: !penalizeRepeatingNewLine,
|
|
402
|
+
title: "Penalize repeating new line",
|
|
403
|
+
value: "disabled"
|
|
404
|
+
}, {
|
|
405
|
+
show: jsonSchemaGrammarFilePath != null,
|
|
406
|
+
title: "JSON schema grammar file",
|
|
407
|
+
value: () => path.relative(process.cwd(), path.resolve(process.cwd(), jsonSchemaGrammarFilePath ?? ""))
|
|
408
|
+
}, {
|
|
409
|
+
show: jsonSchemaGrammarFilePath == null && grammarArg !== "text",
|
|
410
|
+
title: "Grammar",
|
|
411
|
+
value: grammarArg
|
|
412
|
+
}, {
|
|
413
|
+
show: environmentFunctions,
|
|
414
|
+
title: "Environment functions",
|
|
415
|
+
value: "enabled"
|
|
416
|
+
}]
|
|
417
|
+
});
|
|
312
418
|
// this is for ora to not interfere with readline
|
|
313
419
|
await new Promise(resolve => setTimeout(resolve, 1));
|
|
314
420
|
const replHistory = await ReplHistory.load(chatCommandHistoryFilePath, !noHistory);
|
|
@@ -322,8 +428,13 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
|
|
|
322
428
|
rl.close();
|
|
323
429
|
return res;
|
|
324
430
|
}
|
|
431
|
+
if (!printTimings && !meter)
|
|
432
|
+
void session.preloadPrompt("")
|
|
433
|
+
.catch(() => void 0); // don't throw an error if preloading fails because a real prompt is sent early
|
|
325
434
|
// eslint-disable-next-line no-constant-condition
|
|
326
435
|
while (true) {
|
|
436
|
+
let hadNoWhitespaceTextInThisIteration = false;
|
|
437
|
+
let nextPrintLeftovers = "";
|
|
327
438
|
const input = initialPrompt != null
|
|
328
439
|
? initialPrompt
|
|
329
440
|
: await getPrompt();
|
|
@@ -337,35 +448,84 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, systemPrompt
|
|
|
337
448
|
break;
|
|
338
449
|
process.stdout.write(chalk.yellow("AI: "));
|
|
339
450
|
const [startColor, endColor] = chalk.blue("MIDDLE").split("MIDDLE");
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
topP,
|
|
346
|
-
repeatPenalty: {
|
|
347
|
-
penalty: repeatPenalty,
|
|
348
|
-
frequencyPenalty: repeatFrequencyPenalty != null ? repeatFrequencyPenalty : undefined,
|
|
349
|
-
presencePenalty: repeatPresencePenalty != null ? repeatPresencePenalty : undefined,
|
|
350
|
-
penalizeNewLine: penalizeRepeatingNewLine,
|
|
351
|
-
lastTokens: lastTokensRepeatPenalty
|
|
352
|
-
},
|
|
353
|
-
maxTokens: maxTokens === -1
|
|
354
|
-
? context.contextSize
|
|
355
|
-
: maxTokens <= 0
|
|
356
|
-
? undefined
|
|
357
|
-
: maxTokens,
|
|
358
|
-
onToken(chunk) {
|
|
359
|
-
process.stdout.write(model.detokenize(chunk));
|
|
360
|
-
},
|
|
361
|
-
functions: (grammar == null && environmentFunctions)
|
|
362
|
-
? defaultEnvironmentFunctions
|
|
363
|
-
: undefined
|
|
451
|
+
const abortController = new AbortController();
|
|
452
|
+
const consoleInteraction = new ConsoleInteraction();
|
|
453
|
+
consoleInteraction.onKey(ConsoleInteractionKey.ctrlC, async () => {
|
|
454
|
+
abortController.abort();
|
|
455
|
+
consoleInteraction.stop();
|
|
364
456
|
});
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
457
|
+
try {
|
|
458
|
+
process.stdout.write(startColor);
|
|
459
|
+
consoleInteraction.start();
|
|
460
|
+
await session.prompt(input, {
|
|
461
|
+
grammar: grammar, // this is a workaround to allow passing both `functions` and `grammar`
|
|
462
|
+
temperature,
|
|
463
|
+
minP,
|
|
464
|
+
topK,
|
|
465
|
+
topP,
|
|
466
|
+
seed: seed ?? undefined,
|
|
467
|
+
signal: abortController.signal,
|
|
468
|
+
stopOnAbortSignal: true,
|
|
469
|
+
repeatPenalty: {
|
|
470
|
+
penalty: repeatPenalty,
|
|
471
|
+
frequencyPenalty: repeatFrequencyPenalty != null ? repeatFrequencyPenalty : undefined,
|
|
472
|
+
presencePenalty: repeatPresencePenalty != null ? repeatPresencePenalty : undefined,
|
|
473
|
+
penalizeNewLine: penalizeRepeatingNewLine,
|
|
474
|
+
lastTokens: lastTokensRepeatPenalty
|
|
475
|
+
},
|
|
476
|
+
maxTokens: maxTokens === -1
|
|
477
|
+
? context.contextSize
|
|
478
|
+
: maxTokens <= 0
|
|
479
|
+
? undefined
|
|
480
|
+
: maxTokens,
|
|
481
|
+
onTextChunk(chunk) {
|
|
482
|
+
let text = nextPrintLeftovers + chunk;
|
|
483
|
+
nextPrintLeftovers = "";
|
|
484
|
+
if (trimWhitespace) {
|
|
485
|
+
if (!hadNoWhitespaceTextInThisIteration) {
|
|
486
|
+
text = text.trimStart();
|
|
487
|
+
if (text.length > 0)
|
|
488
|
+
hadNoWhitespaceTextInThisIteration = true;
|
|
489
|
+
}
|
|
490
|
+
const textWithTrimmedEnd = text.trimEnd();
|
|
491
|
+
if (textWithTrimmedEnd.length < text.length) {
|
|
492
|
+
nextPrintLeftovers = text.slice(textWithTrimmedEnd.length);
|
|
493
|
+
text = textWithTrimmedEnd;
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
process.stdout.write(text);
|
|
497
|
+
},
|
|
498
|
+
functions: (grammar == null && environmentFunctions)
|
|
499
|
+
? defaultEnvironmentFunctions
|
|
500
|
+
: undefined,
|
|
501
|
+
trimWhitespaceSuffix: trimWhitespace
|
|
502
|
+
});
|
|
503
|
+
}
|
|
504
|
+
catch (err) {
|
|
505
|
+
if (!(abortController.signal.aborted && err === abortController.signal.reason))
|
|
506
|
+
throw err;
|
|
507
|
+
}
|
|
508
|
+
finally {
|
|
509
|
+
consoleInteraction.stop();
|
|
510
|
+
if (abortController.signal.aborted)
|
|
511
|
+
process.stdout.write(endColor + chalk.yellow("[generation aborted by user]"));
|
|
512
|
+
else
|
|
513
|
+
process.stdout.write(endColor);
|
|
514
|
+
console.log();
|
|
515
|
+
}
|
|
516
|
+
if (printTimings) {
|
|
517
|
+
if (LlamaLogLevelGreaterThan(llama.logLevel, LlamaLogLevel.info))
|
|
518
|
+
llama.logLevel = LlamaLogLevel.info;
|
|
368
519
|
await context.printTimings();
|
|
520
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
|
|
521
|
+
llama.logLevel = llamaLogLevel;
|
|
522
|
+
}
|
|
523
|
+
if (meter) {
|
|
524
|
+
const newTokenMeterState = contextSequence.tokenMeter.getState();
|
|
525
|
+
const tokenMeterDiff = TokenMeter.diff(newTokenMeterState, lastTokenMeterState);
|
|
526
|
+
lastTokenMeterState = newTokenMeterState;
|
|
527
|
+
console.info(`${chalk.dim("Input tokens:")} ${String(tokenMeterDiff.usedInputTokens).padEnd(5, " ")} ${chalk.dim("Output tokens:")} ${tokenMeterDiff.usedOutputTokens}`);
|
|
528
|
+
}
|
|
369
529
|
}
|
|
370
530
|
}
|
|
371
531
|
const defaultEnvironmentFunctions = {
|
|
@@ -382,33 +542,4 @@ const defaultEnvironmentFunctions = {
|
|
|
382
542
|
}
|
|
383
543
|
})
|
|
384
544
|
};
|
|
385
|
-
function getChatWrapper(wrapper, { bosString, filename, typeDescription }) {
|
|
386
|
-
switch (wrapper) {
|
|
387
|
-
case "general":
|
|
388
|
-
return new GeneralChatWrapper();
|
|
389
|
-
case "llamaChat":
|
|
390
|
-
return new LlamaChatWrapper();
|
|
391
|
-
case "alpacaChat":
|
|
392
|
-
return new AlpacaChatWrapper();
|
|
393
|
-
case "functionary":
|
|
394
|
-
return new FunctionaryChatWrapper();
|
|
395
|
-
case "chatML":
|
|
396
|
-
return new ChatMLChatWrapper();
|
|
397
|
-
case "falconChat":
|
|
398
|
-
return new FalconChatWrapper();
|
|
399
|
-
default:
|
|
400
|
-
}
|
|
401
|
-
if (wrapper === "auto") {
|
|
402
|
-
const chatWrapper = resolveChatWrapperBasedOnModel({
|
|
403
|
-
bosString,
|
|
404
|
-
filename,
|
|
405
|
-
typeDescription
|
|
406
|
-
});
|
|
407
|
-
if (chatWrapper != null)
|
|
408
|
-
return new chatWrapper();
|
|
409
|
-
return new GeneralChatWrapper();
|
|
410
|
-
}
|
|
411
|
-
void (wrapper);
|
|
412
|
-
throw new Error("Unknown wrapper: " + wrapper);
|
|
413
|
-
}
|
|
414
545
|
//# sourceMappingURL=ChatCommand.js.map
|