node-llama-cpp 3.0.0-beta.8 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -27
- package/bins/_linux-arm64.moved.txt +1 -0
- package/bins/_linux-armv7l.moved.txt +1 -0
- package/bins/_linux-x64-cuda.moved.txt +1 -0
- package/bins/_linux-x64-vulkan.moved.txt +1 -0
- package/bins/_linux-x64.moved.txt +1 -0
- package/bins/_mac-arm64-metal.moved.txt +1 -0
- package/bins/_mac-x64.moved.txt +1 -0
- package/bins/_win-arm64.moved.txt +1 -0
- package/bins/_win-x64-cuda.moved.txt +1 -0
- package/bins/_win-x64-vulkan.moved.txt +1 -0
- package/bins/_win-x64.moved.txt +1 -0
- package/dist/ChatWrapper.d.ts +19 -39
- package/dist/ChatWrapper.js +129 -72
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsIndex.d.ts +1 -0
- package/dist/apiDocsIndex.js +7 -0
- package/dist/apiDocsIndex.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +88 -20
- package/dist/bindings/Llama.d.ts +43 -6
- package/dist/bindings/Llama.js +214 -40
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +13 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +123 -18
- package/dist/bindings/getLlama.js +288 -90
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +29 -5
- package/dist/bindings/types.js +51 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
- package/dist/bindings/utils/compileLLamaCpp.js +250 -81
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -1
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +100 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
- package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/FalconChatWrapper.js +39 -21
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
- package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
- package/dist/chatWrappers/MistralChatWrapper.js +169 -0
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
- package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
- package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
- package/dist/chatWrappers/utils/jsonDumps.js +18 -0
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +19 -11
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +16 -7
- package/dist/cli/commands/ChatCommand.js +323 -191
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +31 -0
- package/dist/cli/commands/CompleteCommand.js +402 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/InfillCommand.d.ts +33 -0
- package/dist/cli/commands/InfillCommand.js +438 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +6 -2
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +13 -0
- package/dist/cli/commands/PullCommand.js +158 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +21 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
- package/dist/cli/commands/source/SourceCommand.js +19 -0
- package/dist/cli/commands/source/SourceCommand.js.map +1 -0
- package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
- package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
- package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
- package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
- package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
- package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
- package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
- package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
- package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +585 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
- package/dist/cli/utils/interactivelyAskForModel.js +450 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +82 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +4 -3
- package/dist/commands.js +6 -3
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +35 -4
- package/dist/config.js +58 -17
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +4 -0
- package/dist/consts.js +11 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
- package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +168 -0
- package/dist/evaluator/LlamaCompletion.js +470 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +63 -22
- package/dist/evaluator/LlamaContext/LlamaContext.js +503 -121
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +177 -16
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
- package/dist/evaluator/LlamaEmbedding.js +53 -0
- package/dist/evaluator/LlamaEmbedding.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
- package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +16 -13
- package/dist/evaluator/LlamaGrammar.js +17 -10
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
- package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +34 -0
- package/dist/evaluator/TokenBias.js +65 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +45 -0
- package/dist/evaluator/TokenMeter.js +74 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +4 -0
- package/dist/gguf/consts.js +12 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +50 -0
- package/dist/gguf/insights/GgufInsights.js +401 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
- package/dist/gguf/parser/GgufV2Parser.js +156 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +61 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +45 -0
- package/dist/gguf/readGgufFileInfo.js +71 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
- package/dist/gguf/types/GgufMetadataTypes.js +114 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +39 -14
- package/dist/index.js +29 -8
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/dist/types.d.ts +131 -5
- package/dist/types.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +73 -26
- package/dist/utils/LlamaText.js +475 -157
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +12 -0
- package/dist/utils/LruCache.js +44 -0
- package/dist/utils/LruCache.js.map +1 -0
- package/dist/utils/OverridesObject.d.ts +7 -0
- package/dist/utils/OverridesObject.js +2 -0
- package/dist/utils/OverridesObject.js.map +1 -0
- package/dist/utils/ReplHistory.js +5 -1
- package/dist/utils/ReplHistory.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +27 -8
- package/dist/utils/StopGenerationDetector.js +108 -22
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/ThreadsSplitter.d.ts +32 -0
- package/dist/utils/ThreadsSplitter.js +177 -0
- package/dist/utils/ThreadsSplitter.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +10 -4
- package/dist/utils/TokenStreamRegulator.js +102 -10
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/clearTempFolder.js.map +1 -1
- package/dist/utils/cmake.js +23 -10
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +1 -1
- package/dist/utils/compareTokens.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +199 -0
- package/dist/utils/createModelDownloader.js +405 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/types.d.ts +3 -0
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
- package/dist/utils/getConsoleLogPrefix.js +5 -4
- package/dist/utils/getConsoleLogPrefix.js.map +1 -1
- package/dist/utils/getGrammarsFolder.js +1 -1
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +68 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/modelFileAccesTokens.d.ts +4 -0
- package/dist/utils/modelFileAccesTokens.js +40 -0
- package/dist/utils/modelFileAccesTokens.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -1
- package/dist/utils/prettyPrintObject.js +61 -15
- package/dist/utils/prettyPrintObject.js.map +1 -1
- package/dist/utils/pushAll.d.ts +6 -0
- package/dist/utils/pushAll.js +11 -0
- package/dist/utils/pushAll.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -2
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -2
- package/dist/utils/resolveGithubRelease.js.map +1 -1
- package/dist/utils/resolveLastTokens.d.ts +2 -0
- package/dist/utils/resolveLastTokens.js +12 -0
- package/dist/utils/resolveLastTokens.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/safeEventCallback.d.ts +6 -0
- package/dist/utils/safeEventCallback.js +29 -0
- package/dist/utils/safeEventCallback.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -2
- package/dist/utils/spawnCommand.js +55 -7
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +1 -1
- package/dist/utils/tokenizeInput.js +6 -3
- package/dist/utils/tokenizeInput.js.map +1 -1
- package/dist/utils/transformPromisable.d.ts +40 -0
- package/dist/utils/transformPromisable.js +64 -0
- package/dist/utils/transformPromisable.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
- package/dist/utils/truncateTextAndRoundToWords.js +32 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -1
- package/dist/utils/withLockfile.js.map +1 -1
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +22 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +22 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.js +1 -1
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/dist/utils/wrapAbortSignal.d.ts +1 -0
- package/dist/utils/wrapAbortSignal.js +9 -0
- package/dist/utils/wrapAbortSignal.js.map +1 -0
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +134 -5
- package/llama/addon/AddonContext.cpp +629 -0
- package/llama/addon/AddonContext.h +52 -0
- package/llama/addon/AddonGrammar.cpp +39 -0
- package/llama/addon/AddonGrammar.h +19 -0
- package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
- package/llama/addon/AddonGrammarEvaluationState.h +17 -0
- package/llama/addon/AddonModel.cpp +672 -0
- package/llama/addon/AddonModel.h +61 -0
- package/llama/addon/AddonModelData.cpp +25 -0
- package/llama/addon/AddonModelData.h +15 -0
- package/llama/addon/AddonModelLora.cpp +105 -0
- package/llama/addon/AddonModelLora.h +28 -0
- package/llama/addon/AddonSampler.cpp +513 -0
- package/llama/addon/AddonSampler.h +65 -0
- package/llama/addon/RingBuffer.h +109 -0
- package/llama/addon/addon.cpp +223 -0
- package/llama/addon/addonGlobals.cpp +22 -0
- package/llama/addon/addonGlobals.h +12 -0
- package/llama/addon/globals/addonLog.cpp +136 -0
- package/llama/addon/globals/addonLog.h +21 -0
- package/llama/addon/globals/addonProgress.cpp +15 -0
- package/llama/addon/globals/addonProgress.h +15 -0
- package/llama/addon/globals/getGpuInfo.cpp +108 -0
- package/llama/addon/globals/getGpuInfo.h +6 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +297 -6
- package/llama/grammars/json.gbnf +4 -4
- package/llama/grammars/json_arr.gbnf +4 -4
- package/llama/llama.cpp.info.json +1 -1
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/package.json +109 -59
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/cli/commands/BuildCommand.d.ts +0 -11
- package/dist/cli/commands/BuildCommand.js +0 -106
- package/dist/cli/commands/BuildCommand.js.map +0 -1
- package/dist/cli/commands/ClearCommand.js.map +0 -1
- package/dist/cli/commands/DownloadCommand.js +0 -169
- package/dist/cli/commands/DownloadCommand.js.map +0 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/evaluator/LlamaModel.d.ts +0 -120
- package/dist/evaluator/LlamaModel.js +0 -320
- package/dist/evaluator/LlamaModel.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/llama/addon.cpp +0 -916
- package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/.buildMetadata.json +0 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/.buildMetadata.json +0 -1
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
|
@@ -1,66 +1,113 @@
|
|
|
1
|
-
import { DisposeAggregator, EventRelay, withLock
|
|
1
|
+
import { AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
|
2
2
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
3
|
import { compareTokens } from "../../utils/compareTokens.js";
|
|
4
|
-
import {
|
|
4
|
+
import { DisposeGuard } from "../../utils/DisposeGuard.js";
|
|
5
|
+
import { TokenMeter } from "../TokenMeter.js";
|
|
6
|
+
import { UnsupportedError } from "../../utils/UnsupportedError.js";
|
|
7
|
+
import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
|
|
8
|
+
import { LlamaSampler } from "./LlamaSampler.js";
|
|
9
|
+
const defaultLoraScale = 1;
|
|
10
|
+
const shrinkRetriesMinContextSize = 4096;
|
|
11
|
+
const defaultMaxPunishTokens = 64;
|
|
12
|
+
const defaultFailedCreationRemedy = {
|
|
13
|
+
retries: 6,
|
|
14
|
+
autoContextSizeShrink: 0.16
|
|
15
|
+
};
|
|
5
16
|
export class LlamaContext {
|
|
6
17
|
/** @internal */ _llama;
|
|
7
18
|
/** @internal */ _ctx;
|
|
8
19
|
/** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
|
|
20
|
+
/** @internal */ _backendContextDisposeGuard;
|
|
9
21
|
/** @internal */ _model;
|
|
10
22
|
/** @internal */ _contextSize;
|
|
11
23
|
/** @internal */ _batchSize;
|
|
24
|
+
/** @internal */ _flashAttention;
|
|
25
|
+
/** @internal */ _idealThreads;
|
|
26
|
+
/** @internal */ _minThreads;
|
|
27
|
+
/** @internal */ _performanceTracking;
|
|
12
28
|
/** @internal */ _totalSequences;
|
|
13
29
|
/** @internal */ _unusedSequenceIds = [];
|
|
14
30
|
/** @internal */ _batchingOptions;
|
|
15
31
|
/** @internal */ _queuedDecodeSequenceIds = new Set();
|
|
16
32
|
/** @internal */ _queuedDecodes = [];
|
|
17
|
-
/** @internal */ _disposeAggregator = new
|
|
33
|
+
/** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
|
|
34
|
+
/** @internal */ _modelPreventDisposalHandle;
|
|
35
|
+
/** @internal */ _loraAdapters = new Set();
|
|
36
|
+
/** @internal */ _gcRegistry;
|
|
18
37
|
/** @internal */ _nextGeneratedSequenceId = 0;
|
|
19
38
|
/** @internal */ _dispatchDecodeScheduled = false;
|
|
20
39
|
/** @internal */ _batchDispatchPending = false;
|
|
40
|
+
/** @internal */ _threadSplitterConsumer;
|
|
41
|
+
/** @internal */ _freeReservedThreadsTimeout;
|
|
21
42
|
/** @internal */ _currentDispatchBatchHandle = {};
|
|
22
43
|
/** @internal */ _allocatedContextSize;
|
|
23
44
|
/** @internal */ _disposed = false;
|
|
24
45
|
onDispose = new EventRelay();
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
*/
|
|
28
|
-
constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embedding, _noSeed }) {
|
|
29
|
-
if (model.disposed)
|
|
46
|
+
constructor({ _model }, { sequences, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, performanceTracking = false, _embeddings }) {
|
|
47
|
+
if (_model.disposed)
|
|
30
48
|
throw new DisposedError();
|
|
31
|
-
this._llama =
|
|
32
|
-
this._model =
|
|
49
|
+
this._llama = _model._llama;
|
|
50
|
+
this._model = _model;
|
|
51
|
+
this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
|
|
52
|
+
this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
|
|
33
53
|
this._totalSequences = Math.max(1, Math.floor(sequences));
|
|
34
54
|
this._contextSize = Math.max(2, contextSize);
|
|
35
55
|
this._batchSize = Math.max(batchSize, this._totalSequences);
|
|
56
|
+
this._flashAttention = flashAttention;
|
|
57
|
+
this._idealThreads = typeof threads === "number"
|
|
58
|
+
? this._llama._threadsSplitter.normalizeThreadsValue(threads)
|
|
59
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.ideal ?? (this._llama.maxThreads === 0
|
|
60
|
+
? this._llama.cpuMathCores
|
|
61
|
+
: this._llama.maxThreads));
|
|
62
|
+
this._minThreads = Math.max(1, typeof threads === "number"
|
|
63
|
+
? 1
|
|
64
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.min ?? 1));
|
|
65
|
+
this._performanceTracking = !!performanceTracking;
|
|
36
66
|
this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
|
|
37
|
-
|
|
38
|
-
contextSize: contextSize * this._totalSequences,
|
|
67
|
+
contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
|
|
39
68
|
batchSize: this._batchSize,
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
69
|
+
sequences: this._totalSequences,
|
|
70
|
+
flashAttention: this._flashAttention,
|
|
71
|
+
threads: this._idealThreads,
|
|
72
|
+
embeddings: _embeddings,
|
|
73
|
+
performanceTracking: this._performanceTracking
|
|
43
74
|
}));
|
|
44
75
|
this._batchingOptions = {
|
|
45
76
|
dispatchSchedule: batchingDispatchSchedule,
|
|
46
|
-
|
|
77
|
+
itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
|
|
47
78
|
};
|
|
79
|
+
this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
|
|
80
|
+
this._gcRegistry.register(this, this._loraAdapters);
|
|
48
81
|
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
82
|
+
this._freeReservedThreads = this._freeReservedThreads.bind(this);
|
|
83
|
+
this._disposeAggregator.add(() => {
|
|
84
|
+
this._disposed = true;
|
|
85
|
+
});
|
|
86
|
+
this._disposeAggregator.add(() => void this._gcRegistry.unregister(this));
|
|
49
87
|
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
|
|
50
88
|
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
89
|
+
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
51
90
|
this._disposeAggregator.add(() => {
|
|
52
|
-
this.
|
|
91
|
+
if (this._loraAdapters.size > 0) {
|
|
92
|
+
const loraAdapters = new Set(this._loraAdapters);
|
|
93
|
+
this._loraAdapters.clear();
|
|
94
|
+
return this._model._removeLoraUsage(loraAdapters);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
this._disposeAggregator.add(async () => {
|
|
98
|
+
await this._backendContextDisposeGuard.acquireDisposeLock();
|
|
99
|
+
await this._ctx.dispose();
|
|
100
|
+
this._modelPreventDisposalHandle.dispose();
|
|
53
101
|
});
|
|
54
|
-
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
55
102
|
}
|
|
56
|
-
dispose() {
|
|
103
|
+
async dispose() {
|
|
57
104
|
if (this._disposed)
|
|
58
105
|
return;
|
|
59
106
|
this._disposed = true;
|
|
60
|
-
this._disposeAggregator.dispose();
|
|
107
|
+
await this._disposeAggregator.dispose();
|
|
61
108
|
}
|
|
62
109
|
/** @hidden */
|
|
63
|
-
[Symbol.
|
|
110
|
+
[Symbol.asyncDispose]() {
|
|
64
111
|
return this.dispose();
|
|
65
112
|
}
|
|
66
113
|
get disposed() {
|
|
@@ -75,6 +122,30 @@ export class LlamaContext {
|
|
|
75
122
|
get batchSize() {
|
|
76
123
|
return this._batchSize;
|
|
77
124
|
}
|
|
125
|
+
get flashAttention() {
|
|
126
|
+
return this._flashAttention;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* The actual size of the state in the memory in bytes.
|
|
130
|
+
* This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
|
|
131
|
+
*/
|
|
132
|
+
get stateSize() {
|
|
133
|
+
this._ensureNotDisposed();
|
|
134
|
+
return this._ctx.getStateSize();
|
|
135
|
+
}
|
|
136
|
+
/** The number of threads currently used to evaluate tokens */
|
|
137
|
+
get currentThreads() {
|
|
138
|
+
this._ensureNotDisposed();
|
|
139
|
+
return this._ctx.getThreads();
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* The number of threads that are preferred to be used to evaluate tokens.
|
|
143
|
+
*
|
|
144
|
+
* The actual number of threads used may be lower when other evaluations are running in parallel.
|
|
145
|
+
*/
|
|
146
|
+
get idealThreads() {
|
|
147
|
+
return this._idealThreads;
|
|
148
|
+
}
|
|
78
149
|
getAllocatedContextSize() {
|
|
79
150
|
this._ensureNotDisposed();
|
|
80
151
|
if (this._allocatedContextSize == null)
|
|
@@ -90,9 +161,9 @@ export class LlamaContext {
|
|
|
90
161
|
/**
|
|
91
162
|
* Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
|
|
92
163
|
* When there are no sequences left, this method will throw an error.
|
|
93
|
-
* @param [options]
|
|
94
164
|
*/
|
|
95
|
-
getSequence(
|
|
165
|
+
getSequence(options = {}) {
|
|
166
|
+
const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
|
|
96
167
|
this._ensureNotDisposed();
|
|
97
168
|
const nextSequenceId = this._popSequenceId();
|
|
98
169
|
if (nextSequenceId == null)
|
|
@@ -100,6 +171,7 @@ export class LlamaContext {
|
|
|
100
171
|
return LlamaContextSequence._create({
|
|
101
172
|
sequenceId: nextSequenceId,
|
|
102
173
|
context: this,
|
|
174
|
+
tokenMeter: _tokenMeter,
|
|
103
175
|
contextShift: {
|
|
104
176
|
size: contextShiftSize,
|
|
105
177
|
strategy: contextShiftStrategy
|
|
@@ -116,17 +188,18 @@ export class LlamaContext {
|
|
|
116
188
|
this._currentDispatchBatchHandle = {};
|
|
117
189
|
this._dispatchDecodeScheduled = false;
|
|
118
190
|
this._batchDispatchPending = false;
|
|
119
|
-
let
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
191
|
+
let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
192
|
+
const resolvePrioritizationStrategy = () => {
|
|
193
|
+
try {
|
|
194
|
+
this._ensureNotDisposed();
|
|
195
|
+
return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
|
|
196
|
+
}
|
|
197
|
+
catch (err) {
|
|
198
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
};
|
|
202
|
+
const getOrderedQueuedDecodes = (prioritizationStrategy) => {
|
|
130
203
|
const batchItemToQueuedDecodeMap = new Map();
|
|
131
204
|
const batchItemsList = [];
|
|
132
205
|
for (const queuedDecode of this._queuedDecodes) {
|
|
@@ -139,42 +212,65 @@ export class LlamaContext {
|
|
|
139
212
|
}
|
|
140
213
|
let prioritizedItems;
|
|
141
214
|
try {
|
|
142
|
-
prioritizedItems =
|
|
215
|
+
prioritizedItems = prioritizationStrategy({
|
|
143
216
|
items: batchItemsList,
|
|
144
217
|
size: this._batchSize
|
|
145
218
|
});
|
|
146
219
|
}
|
|
147
220
|
catch (err) {
|
|
148
221
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
149
|
-
return;
|
|
222
|
+
return null;
|
|
150
223
|
}
|
|
151
|
-
|
|
152
|
-
const afterDecodeActions = [];
|
|
153
|
-
const queuedDecodesToDelete = new Set();
|
|
154
|
-
const currentQueuedDecodeItems = new Set();
|
|
155
|
-
const currentBatchItems = [];
|
|
156
|
-
let currentBatchSize = 0;
|
|
157
|
-
for (const prioritizedItem of prioritizedItems) {
|
|
224
|
+
return prioritizedItems.map((prioritizedItem) => {
|
|
158
225
|
const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
|
|
159
226
|
if (queuedDecode == null)
|
|
160
227
|
throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
|
|
161
228
|
"of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
|
|
162
|
-
|
|
163
|
-
|
|
229
|
+
return {
|
|
230
|
+
queuedDecode,
|
|
231
|
+
processAmount: prioritizedItem.processAmount
|
|
232
|
+
};
|
|
233
|
+
});
|
|
234
|
+
};
|
|
235
|
+
const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
|
|
236
|
+
const currentBatchItems = [];
|
|
237
|
+
let currentBatchSize = 0;
|
|
238
|
+
let batchTokenSlotsLeft = batchSize;
|
|
239
|
+
for (const { queuedDecode, processAmount } of queuedDecodes) {
|
|
240
|
+
const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
|
|
241
|
+
if (resolvedProcessAmount <= 0) {
|
|
242
|
+
if (batchTokenSlotsLeft === 0)
|
|
243
|
+
break;
|
|
164
244
|
continue;
|
|
165
|
-
|
|
245
|
+
}
|
|
246
|
+
batchTokenSlotsLeft -= resolvedProcessAmount;
|
|
247
|
+
currentBatchSize += resolvedProcessAmount;
|
|
166
248
|
currentBatchItems.push({
|
|
167
249
|
queuedDecode,
|
|
168
|
-
processAmount
|
|
250
|
+
processAmount: resolvedProcessAmount
|
|
169
251
|
});
|
|
170
|
-
currentBatchSize += processAmount;
|
|
171
252
|
}
|
|
253
|
+
return {
|
|
254
|
+
currentBatchItems,
|
|
255
|
+
currentBatchSize
|
|
256
|
+
};
|
|
257
|
+
};
|
|
258
|
+
const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
|
|
259
|
+
const afterDecodeActions = [];
|
|
260
|
+
const queuedDecodesToDelete = new Set();
|
|
261
|
+
const currentQueuedDecodeItems = new Set();
|
|
172
262
|
if (currentBatchSize !== 0)
|
|
173
263
|
this._ctx.initBatch(currentBatchSize);
|
|
174
|
-
for (const { queuedDecode, processAmount } of
|
|
264
|
+
for (const { queuedDecode, processAmount } of batchItems) {
|
|
175
265
|
let batchLogitIndex;
|
|
176
266
|
try {
|
|
177
|
-
|
|
267
|
+
const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
|
|
268
|
+
processAmount === queuedDecode.tokens.length;
|
|
269
|
+
const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
|
|
270
|
+
const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
|
|
271
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
|
|
272
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
|
|
273
|
+
batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
|
|
178
274
|
}
|
|
179
275
|
catch (err) {
|
|
180
276
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
|
|
@@ -193,8 +289,6 @@ export class LlamaContext {
|
|
|
193
289
|
queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
|
|
194
290
|
queuedDecode.firstTokenSequenceIndex += processAmount;
|
|
195
291
|
}
|
|
196
|
-
if (batchTokenSlotsLeft === 0)
|
|
197
|
-
break;
|
|
198
292
|
}
|
|
199
293
|
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
200
294
|
const queuedDecode = this._queuedDecodes[i];
|
|
@@ -204,14 +298,22 @@ export class LlamaContext {
|
|
|
204
298
|
i--;
|
|
205
299
|
}
|
|
206
300
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
301
|
+
if (currentBatchSize !== 0) {
|
|
302
|
+
const allocationResult = this._threadSplitterConsumer?.getAllocationToConsume();
|
|
303
|
+
const [threadsToUse, consumerHandle] = allocationResult instanceof Promise
|
|
304
|
+
? await allocationResult ?? []
|
|
305
|
+
: allocationResult ?? [];
|
|
306
|
+
try {
|
|
307
|
+
if (threadsToUse != null)
|
|
308
|
+
this._ctx.setThreads(threadsToUse);
|
|
210
309
|
await this._ctx.decodeBatch();
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
310
|
+
consumerHandle?.dispose();
|
|
311
|
+
}
|
|
312
|
+
catch (err) {
|
|
313
|
+
consumerHandle?.dispose();
|
|
314
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
215
317
|
}
|
|
216
318
|
for (const action of afterDecodeActions) {
|
|
217
319
|
const [accept, reject] = action.response;
|
|
@@ -225,14 +327,56 @@ export class LlamaContext {
|
|
|
225
327
|
}
|
|
226
328
|
accept(undefined);
|
|
227
329
|
}
|
|
330
|
+
};
|
|
331
|
+
const prioritizationStrategy = resolvePrioritizationStrategy();
|
|
332
|
+
if (prioritizationStrategy == null)
|
|
333
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
334
|
+
this._reserveThreads();
|
|
335
|
+
try {
|
|
336
|
+
while (shouldHaveAnotherLoop) {
|
|
337
|
+
const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
|
|
338
|
+
if (orderedQueuedDecodes == null)
|
|
339
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
340
|
+
const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
|
|
341
|
+
let preventDisposalHandle;
|
|
342
|
+
try {
|
|
343
|
+
preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
|
|
344
|
+
}
|
|
345
|
+
catch (err) {
|
|
346
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
try {
|
|
350
|
+
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
|
|
351
|
+
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
352
|
+
}
|
|
353
|
+
finally {
|
|
354
|
+
preventDisposalHandle.dispose();
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
finally {
|
|
359
|
+
this._scheduleToFreeReservedThreads();
|
|
228
360
|
}
|
|
229
361
|
});
|
|
230
362
|
}
|
|
231
|
-
|
|
363
|
+
/**
|
|
364
|
+
* Print the timings of token evaluation since that last print for this context.
|
|
365
|
+
*
|
|
366
|
+
* Requires the `performanceTracking` option to be enabled.
|
|
367
|
+
*
|
|
368
|
+
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
369
|
+
* it won't print anything.
|
|
370
|
+
*/
|
|
371
|
+
async printTimings() {
|
|
372
|
+
this._ensureNotDisposed();
|
|
373
|
+
if (!this._performanceTracking)
|
|
374
|
+
throw new UnsupportedError("Performance tracking is not enabled");
|
|
232
375
|
this._ctx.printTimings();
|
|
376
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
|
|
233
377
|
}
|
|
234
378
|
/** @internal */
|
|
235
|
-
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
|
|
379
|
+
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
|
|
236
380
|
return await new Promise((accept, reject) => {
|
|
237
381
|
this._queuedDecodes.push({
|
|
238
382
|
sequenceId,
|
|
@@ -240,6 +384,7 @@ export class LlamaContext {
|
|
|
240
384
|
firstTokenSequenceIndex,
|
|
241
385
|
generateLogitAtTheEnd,
|
|
242
386
|
evaluationPriority,
|
|
387
|
+
tokenMeter,
|
|
243
388
|
response: [accept, reject],
|
|
244
389
|
onDone
|
|
245
390
|
});
|
|
@@ -252,16 +397,14 @@ export class LlamaContext {
|
|
|
252
397
|
if (this._disposed)
|
|
253
398
|
return;
|
|
254
399
|
void withLock(this, "context", async () => {
|
|
400
|
+
if (this._disposed)
|
|
401
|
+
return;
|
|
255
402
|
this._ctx.disposeSequence(sequenceId);
|
|
256
403
|
this._unusedSequenceIds.push(sequenceId);
|
|
257
404
|
this._onReclaimUnusedSequenceId.dispatchEvent();
|
|
258
405
|
});
|
|
259
406
|
}
|
|
260
407
|
/** @internal */
|
|
261
|
-
_acceptTokenOnGrammarEvaluationState(grammarEvaluationState, token) {
|
|
262
|
-
this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
|
|
263
|
-
}
|
|
264
|
-
/** @internal */
|
|
265
408
|
_popSequenceId() {
|
|
266
409
|
if (this._unusedSequenceIds.length > 0)
|
|
267
410
|
return this._unusedSequenceIds.shift();
|
|
@@ -311,20 +454,177 @@ export class LlamaContext {
|
|
|
311
454
|
if (this._disposed)
|
|
312
455
|
throw new DisposedError();
|
|
313
456
|
}
|
|
457
|
+
/** @internal */
|
|
458
|
+
async _setLora({ filePath, scale }) {
|
|
459
|
+
const lora = await this._model._getOrLoadLora(filePath);
|
|
460
|
+
this._ctx.setLora(lora, scale ?? defaultLoraScale);
|
|
461
|
+
if (!this._loraAdapters.has(lora)) {
|
|
462
|
+
this._loraAdapters.add(lora);
|
|
463
|
+
lora.usages++;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
/** @internal */
|
|
467
|
+
_reserveThreads() {
|
|
468
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
469
|
+
delete this._freeReservedThreadsTimeout;
|
|
470
|
+
if (this._threadSplitterConsumer != null)
|
|
471
|
+
return;
|
|
472
|
+
this._threadSplitterConsumer = this._llama._threadsSplitter.createConsumer(this._idealThreads, this._minThreads);
|
|
473
|
+
}
|
|
474
|
+
/** @internal */
|
|
475
|
+
_freeReservedThreads() {
|
|
476
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
477
|
+
delete this._freeReservedThreadsTimeout;
|
|
478
|
+
if (this._threadSplitterConsumer == null)
|
|
479
|
+
return;
|
|
480
|
+
this._threadSplitterConsumer.dispose();
|
|
481
|
+
delete this._threadSplitterConsumer;
|
|
482
|
+
}
|
|
483
|
+
/** @internal */
|
|
484
|
+
_scheduleToFreeReservedThreads() {
|
|
485
|
+
if (this._threadSplitterConsumer == null)
|
|
486
|
+
return;
|
|
487
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
488
|
+
this._freeReservedThreadsTimeout = setTimeout(this._freeReservedThreads, 0);
|
|
489
|
+
}
|
|
490
|
+
/** @internal */
|
|
491
|
+
static async _create(options, { _model }) {
|
|
492
|
+
const sequences = options.sequences ?? getDefaultContextSequences();
|
|
493
|
+
const flashAttention = _model.flashAttentionSupported
|
|
494
|
+
? Boolean(options.flashAttention ?? _model.defaultContextFlashAttention)
|
|
495
|
+
: false;
|
|
496
|
+
const loraOptions = typeof options.lora === "string"
|
|
497
|
+
? { adapters: [{ filePath: options.lora }] }
|
|
498
|
+
: options.lora;
|
|
499
|
+
let failedCreationRetries = options.failedCreationRemedy === false
|
|
500
|
+
? 0
|
|
501
|
+
: Math.max(0, options.failedCreationRemedy?.retries ?? defaultFailedCreationRemedy.retries);
|
|
502
|
+
const failedCreationAutoContextSizeShrink = options.failedCreationRemedy === false
|
|
503
|
+
? 0
|
|
504
|
+
: options.failedCreationRemedy?.autoContextSizeShrink ?? defaultFailedCreationRemedy.autoContextSizeShrink;
|
|
505
|
+
let contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
|
|
506
|
+
batchSize: options.batchSize,
|
|
507
|
+
sequences: sequences,
|
|
508
|
+
modelGpuLayers: _model.gpuLayers,
|
|
509
|
+
modelTrainContextSize: _model.trainContextSize,
|
|
510
|
+
flashAttention,
|
|
511
|
+
getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
|
|
512
|
+
llamaGpu: _model._llama.gpu,
|
|
513
|
+
ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
|
|
514
|
+
isEmbeddingContext: options._embeddings
|
|
515
|
+
});
|
|
516
|
+
const minContextSize = options.contextSize === "auto"
|
|
517
|
+
? shrinkRetriesMinContextSize
|
|
518
|
+
: (typeof options.contextSize === "object" && typeof options.contextSize.min === "number")
|
|
519
|
+
? options.contextSize.min
|
|
520
|
+
: typeof options.contextSize === "number"
|
|
521
|
+
? options.contextSize
|
|
522
|
+
: shrinkRetriesMinContextSize;
|
|
523
|
+
const { createSignal } = options;
|
|
524
|
+
async function createContext(contextSize) {
|
|
525
|
+
const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
|
|
526
|
+
const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
|
|
527
|
+
contextSize,
|
|
528
|
+
sequences,
|
|
529
|
+
isEmbeddingContext: options._embeddings,
|
|
530
|
+
modelGpuLayers: _model.gpuLayers,
|
|
531
|
+
batchSize,
|
|
532
|
+
flashAttention
|
|
533
|
+
}).gpuVram;
|
|
534
|
+
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
|
|
535
|
+
const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
|
|
536
|
+
? null
|
|
537
|
+
: _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
|
|
538
|
+
try {
|
|
539
|
+
if (createSignal?.aborted)
|
|
540
|
+
throw createSignal.reason;
|
|
541
|
+
const contextLoaded = await context._ctx.init();
|
|
542
|
+
if (createSignal?.aborted) {
|
|
543
|
+
if (contextLoaded)
|
|
544
|
+
await context._ctx.dispose();
|
|
545
|
+
throw createSignal.reason;
|
|
546
|
+
}
|
|
547
|
+
else if (!contextLoaded)
|
|
548
|
+
throw new Error("Failed to create context");
|
|
549
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
550
|
+
if (loraOptions != null && loraOptions.adapters.length > 0) {
|
|
551
|
+
let loadedAdapters = 0;
|
|
552
|
+
for (const adapter of loraOptions.adapters) {
|
|
553
|
+
try {
|
|
554
|
+
await context._setLora({
|
|
555
|
+
filePath: adapter.filePath,
|
|
556
|
+
scale: adapter.scale
|
|
557
|
+
});
|
|
558
|
+
loadedAdapters++;
|
|
559
|
+
try {
|
|
560
|
+
loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
console.error(err);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
catch (err) {
|
|
567
|
+
await context.dispose();
|
|
568
|
+
throw err;
|
|
569
|
+
}
|
|
570
|
+
if (createSignal?.aborted) {
|
|
571
|
+
await context.dispose();
|
|
572
|
+
throw createSignal.reason;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
else if (loraOptions?.onLoadProgress != null) {
|
|
577
|
+
try {
|
|
578
|
+
loraOptions.onLoadProgress(1);
|
|
579
|
+
}
|
|
580
|
+
catch (err) {
|
|
581
|
+
console.error(err);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
return context;
|
|
585
|
+
}
|
|
586
|
+
finally {
|
|
587
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
while (failedCreationRetries >= 0) {
|
|
591
|
+
try {
|
|
592
|
+
return await createContext(contextSize);
|
|
593
|
+
}
|
|
594
|
+
catch (err) {
|
|
595
|
+
if (failedCreationRetries === 0 || (createSignal?.aborted && err === createSignal.reason))
|
|
596
|
+
throw err;
|
|
597
|
+
failedCreationRetries--;
|
|
598
|
+
let newContextSize = typeof failedCreationAutoContextSizeShrink === "number"
|
|
599
|
+
? Math.floor(contextSize * (1 - failedCreationAutoContextSizeShrink))
|
|
600
|
+
: Math.floor(failedCreationAutoContextSizeShrink(contextSize));
|
|
601
|
+
if (!Number.isFinite(newContextSize))
|
|
602
|
+
throw err;
|
|
603
|
+
if (newContextSize < minContextSize)
|
|
604
|
+
newContextSize = minContextSize;
|
|
605
|
+
if (newContextSize >= contextSize)
|
|
606
|
+
throw err;
|
|
607
|
+
contextSize = newContextSize;
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
throw new Error("Failed to create context");
|
|
611
|
+
}
|
|
314
612
|
}
|
|
315
613
|
export class LlamaContextSequence {
|
|
316
614
|
/** @internal */ _sequenceId;
|
|
317
615
|
/** @internal */ _gcRegistry;
|
|
318
616
|
/** @internal */ _context;
|
|
319
617
|
/** @internal */ _contextShift;
|
|
618
|
+
/** @internal */ _tokenMeter;
|
|
320
619
|
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
321
620
|
/** @internal */ _contextTokens = [];
|
|
322
621
|
/** @internal */ _nextTokenIndex = 0;
|
|
323
622
|
/** @internal */ _disposed = false;
|
|
324
623
|
onDispose = new EventRelay();
|
|
325
|
-
constructor({ sequenceId, context, contextShift }) {
|
|
624
|
+
constructor({ sequenceId, context, tokenMeter, contextShift }) {
|
|
326
625
|
this._sequenceId = sequenceId;
|
|
327
626
|
this._context = context;
|
|
627
|
+
this._tokenMeter = tokenMeter ?? new TokenMeter();
|
|
328
628
|
this._contextShift = contextShift;
|
|
329
629
|
this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
|
|
330
630
|
this._gcRegistry.register(this, sequenceId);
|
|
@@ -361,6 +661,9 @@ export class LlamaContextSequence {
|
|
|
361
661
|
get contextTokens() {
|
|
362
662
|
return this._contextTokens.slice();
|
|
363
663
|
}
|
|
664
|
+
get tokenMeter() {
|
|
665
|
+
return this._tokenMeter;
|
|
666
|
+
}
|
|
364
667
|
get isLoadedToMemory() {
|
|
365
668
|
return !this._disposed;
|
|
366
669
|
}
|
|
@@ -386,7 +689,7 @@ export class LlamaContextSequence {
|
|
|
386
689
|
}
|
|
387
690
|
/**
|
|
388
691
|
* Erase context tokens in the provided ranges to free up space for new tokens to be generated.
|
|
389
|
-
*
|
|
692
|
+
* The start of each range is inclusive, and the end of each range is exclusive.
|
|
390
693
|
* For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
|
|
391
694
|
*/
|
|
392
695
|
async eraseContextTokenRanges(ranges) {
|
|
@@ -395,6 +698,8 @@ export class LlamaContextSequence {
|
|
|
395
698
|
this._ensureNotDisposed();
|
|
396
699
|
if (ranges.length === 0)
|
|
397
700
|
return;
|
|
701
|
+
// if the deletion fails, we'll have to dispose the sequence and fill it up again
|
|
702
|
+
let deletionSuccessful = true;
|
|
398
703
|
const resolvedRanges = ranges
|
|
399
704
|
.map(({ start, end }) => {
|
|
400
705
|
if (start === end)
|
|
@@ -424,34 +729,42 @@ export class LlamaContextSequence {
|
|
|
424
729
|
let lastDeleteRangeEndPos = null;
|
|
425
730
|
for (const range of resolvedRanges) {
|
|
426
731
|
this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
|
|
427
|
-
|
|
428
|
-
|
|
732
|
+
if (deletionSuccessful)
|
|
733
|
+
deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
|
|
734
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
|
|
429
735
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
|
|
430
736
|
removedTokens += range.end - range.start;
|
|
431
737
|
lastDeleteRangeEndPos = range.end;
|
|
432
738
|
}
|
|
433
|
-
if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
739
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
434
740
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
|
|
435
741
|
this._nextTokenIndex -= removedTokens;
|
|
742
|
+
if (deletionSuccessful)
|
|
743
|
+
return;
|
|
744
|
+
const newSequenceTokens = this._contextTokens.slice();
|
|
745
|
+
this._nextTokenIndex = 0;
|
|
746
|
+
this._context._ctx.disposeSequence(this._sequenceId);
|
|
747
|
+
await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
|
|
436
748
|
});
|
|
437
749
|
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
* @param [options]
|
|
441
|
-
*/
|
|
442
|
-
evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
|
|
750
|
+
evaluate(tokens, options = {}) {
|
|
751
|
+
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
443
752
|
return this._evaluate(tokens, {
|
|
444
753
|
temperature,
|
|
754
|
+
minP,
|
|
445
755
|
topK,
|
|
446
756
|
topP,
|
|
757
|
+
seed,
|
|
447
758
|
grammarEvaluationState,
|
|
448
759
|
repeatPenalty,
|
|
760
|
+
tokenBias,
|
|
449
761
|
evaluationPriority,
|
|
450
762
|
contextShiftOptions: {
|
|
451
763
|
size: contextShiftSize,
|
|
452
764
|
strategy: contextShiftStrategy
|
|
453
765
|
},
|
|
454
|
-
|
|
766
|
+
yieldEogToken,
|
|
767
|
+
_noSampling
|
|
455
768
|
});
|
|
456
769
|
}
|
|
457
770
|
/**
|
|
@@ -474,59 +787,85 @@ export class LlamaContextSequence {
|
|
|
474
787
|
}
|
|
475
788
|
}
|
|
476
789
|
/** @internal */
|
|
477
|
-
async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions,
|
|
790
|
+
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
478
791
|
this._ensureNotDisposed();
|
|
479
792
|
let evalTokens = tokens;
|
|
480
793
|
if (evalTokens.length === 0)
|
|
481
794
|
return;
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
const
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
795
|
+
const sampler = new LlamaSampler(this.model);
|
|
796
|
+
try {
|
|
797
|
+
while (true) {
|
|
798
|
+
this._ensureNotDisposed();
|
|
799
|
+
// Evaluate to get the next token.
|
|
800
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
|
|
801
|
+
if (_noSampling)
|
|
802
|
+
return null;
|
|
803
|
+
const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
|
|
804
|
+
? repeatPenalty.punishTokens()
|
|
805
|
+
: repeatPenalty?.punishTokens;
|
|
806
|
+
const maxPunishTokens = Math.max(repeatPenalty?.maxPunishTokens ?? defaultMaxPunishTokens, repeatPenaltyTokens?.length ?? 0);
|
|
807
|
+
const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
|
|
808
|
+
? grammarEvaluationState()
|
|
809
|
+
: grammarEvaluationState;
|
|
810
|
+
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
|
|
811
|
+
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
|
|
812
|
+
const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
|
|
813
|
+
sampler.applyConfig(removeNullFields({
|
|
814
|
+
temperature,
|
|
815
|
+
minP,
|
|
816
|
+
topK,
|
|
817
|
+
topP,
|
|
818
|
+
seed: Math.max(0, Number.isFinite(seed)
|
|
819
|
+
? Math.floor(seed ?? (Date.now() / 1000))
|
|
820
|
+
: Math.floor(Date.now() / 1000)),
|
|
821
|
+
repeatPenalty: repeatPenalty?.penalty,
|
|
822
|
+
repeatPenaltyMaxTokens: maxPunishTokens,
|
|
823
|
+
repeatPenaltyTokens: repeatPenaltyTokens != null
|
|
824
|
+
? Uint32Array.from(repeatPenaltyTokens)
|
|
825
|
+
: undefined,
|
|
826
|
+
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
827
|
+
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
828
|
+
tokenBiasKeys,
|
|
829
|
+
tokenBiasValues,
|
|
830
|
+
grammarEvaluationState: resolvedGrammarEvaluationState?._state
|
|
831
|
+
}));
|
|
832
|
+
return withLock(sampler, "sample", async () => {
|
|
833
|
+
if (sampler.disposed)
|
|
834
|
+
return null;
|
|
835
|
+
return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
|
|
836
|
+
});
|
|
837
|
+
});
|
|
838
|
+
if (nextToken === -1)
|
|
839
|
+
throw new Error("Failed to sample next token");
|
|
840
|
+
if (nextToken == null)
|
|
841
|
+
return;
|
|
842
|
+
// the model finished generating text
|
|
843
|
+
if (!yieldEogToken && this._context.model.isEogToken(nextToken))
|
|
844
|
+
break;
|
|
845
|
+
const replacementToken = (yield nextToken);
|
|
846
|
+
// set the tokens for the next evaluation
|
|
847
|
+
if (replacementToken != null)
|
|
848
|
+
evalTokens = [replacementToken];
|
|
849
|
+
else
|
|
850
|
+
evalTokens = [nextToken];
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
finally {
|
|
854
|
+
void withLock(sampler, "sample", sampler.asyncDispose);
|
|
516
855
|
}
|
|
517
856
|
}
|
|
518
857
|
/** @internal */
|
|
519
|
-
async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
|
|
858
|
+
async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
|
|
520
859
|
this._ensureNotDisposed();
|
|
521
860
|
const tokensLeftToDecode = tokens.slice();
|
|
522
861
|
return await withLock(this, "evaluate", async () => {
|
|
523
862
|
while (tokensLeftToDecode.length > 0) {
|
|
524
863
|
this._ensureNotDisposed();
|
|
525
|
-
let freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
526
|
-
if (freeSpace <=
|
|
864
|
+
let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
865
|
+
if (freeSpace <= 0) {
|
|
527
866
|
await this._freeUpSpaceForTokens(contextShiftOptions);
|
|
528
|
-
freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
529
|
-
if (freeSpace <=
|
|
867
|
+
freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
868
|
+
if (freeSpace <= 0)
|
|
530
869
|
throw new Error("Failed to free up space for new tokens");
|
|
531
870
|
}
|
|
532
871
|
const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
|
|
@@ -536,7 +875,8 @@ export class LlamaContextSequence {
|
|
|
536
875
|
tokens: tokensToDecode,
|
|
537
876
|
firstTokenSequenceIndex: this._nextTokenIndex,
|
|
538
877
|
generateLogitAtTheEnd,
|
|
539
|
-
evaluationPriority
|
|
878
|
+
evaluationPriority,
|
|
879
|
+
tokenMeter
|
|
540
880
|
}, !generateLogitAtTheEnd
|
|
541
881
|
? undefined
|
|
542
882
|
: onDecodeDone);
|
|
@@ -556,7 +896,10 @@ export class LlamaContextSequence {
|
|
|
556
896
|
: contextShiftOptions.size));
|
|
557
897
|
this._ensureNotDisposed();
|
|
558
898
|
if (contextShiftOptions.strategy === "eraseBeginning") {
|
|
559
|
-
|
|
899
|
+
let eraseStartIndex = 0;
|
|
900
|
+
if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
|
|
901
|
+
eraseStartIndex = 1;
|
|
902
|
+
await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }]);
|
|
560
903
|
}
|
|
561
904
|
else {
|
|
562
905
|
const ranges = await contextShiftOptions.strategy({
|
|
@@ -566,7 +909,7 @@ export class LlamaContextSequence {
|
|
|
566
909
|
if (ranges == null)
|
|
567
910
|
throw new Error("Invalid delete ranges");
|
|
568
911
|
await this.eraseContextTokenRanges(ranges);
|
|
569
|
-
if (this.nextTokenIndex >= this._context.contextSize)
|
|
912
|
+
if (this.nextTokenIndex >= this._context.contextSize - 1)
|
|
570
913
|
await this.eraseContextTokenRanges([{ start: 0, end: size }]);
|
|
571
914
|
}
|
|
572
915
|
}
|
|
@@ -579,10 +922,11 @@ export class LlamaContextSequence {
|
|
|
579
922
|
* We need this to make it impossible to manually create instances of this class outside the code of this library
|
|
580
923
|
* @internal
|
|
581
924
|
*/
|
|
582
|
-
static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
925
|
+
static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
583
926
|
return new LlamaContextSequence({
|
|
584
927
|
sequenceId,
|
|
585
928
|
context,
|
|
929
|
+
tokenMeter,
|
|
586
930
|
contextShift: {
|
|
587
931
|
size: contextShiftSize,
|
|
588
932
|
strategy: contextShiftStrategy
|
|
@@ -590,14 +934,52 @@ export class LlamaContextSequence {
|
|
|
590
934
|
});
|
|
591
935
|
}
|
|
592
936
|
}
|
|
937
|
+
function getTokenBiasesForAddon(tokenBias, currentModel) {
|
|
938
|
+
if (tokenBias == null)
|
|
939
|
+
return {
|
|
940
|
+
tokenBiasKeys: undefined,
|
|
941
|
+
tokenBiasValues: undefined
|
|
942
|
+
};
|
|
943
|
+
if (tokenBias instanceof Function)
|
|
944
|
+
tokenBias = tokenBias();
|
|
945
|
+
if (tokenBias._tokenizer !== currentModel.tokenizer)
|
|
946
|
+
throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
|
|
947
|
+
"Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
|
|
948
|
+
const tokenBiasKeys = [];
|
|
949
|
+
const tokenBiasValues = [];
|
|
950
|
+
for (const [token, bias] of tokenBias._biases) {
|
|
951
|
+
tokenBiasKeys.push(token);
|
|
952
|
+
tokenBiasValues.push(bias);
|
|
953
|
+
}
|
|
954
|
+
if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
|
|
955
|
+
return {
|
|
956
|
+
tokenBiasKeys: undefined,
|
|
957
|
+
tokenBiasValues: undefined
|
|
958
|
+
};
|
|
959
|
+
}
|
|
960
|
+
return {
|
|
961
|
+
tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
|
|
962
|
+
tokenBiasValues: Float32Array.from(tokenBiasValues)
|
|
963
|
+
};
|
|
964
|
+
}
|
|
593
965
|
function disposeContextIfReferenced(contextRef) {
|
|
594
966
|
const context = contextRef.deref();
|
|
595
967
|
if (context != null)
|
|
596
|
-
context.dispose();
|
|
968
|
+
void context.dispose();
|
|
597
969
|
}
|
|
598
970
|
function disposeContextSequenceIfReferenced(contextRef) {
|
|
599
971
|
const context = contextRef.deref();
|
|
600
972
|
if (context != null)
|
|
601
973
|
context.dispose();
|
|
602
974
|
}
|
|
975
|
+
export function getDefaultContextBatchSize({ contextSize, sequences }) {
|
|
976
|
+
return Math.min(contextSize * sequences, 512);
|
|
977
|
+
}
|
|
978
|
+
export function getDefaultContextSequences() {
|
|
979
|
+
return 1;
|
|
980
|
+
}
|
|
981
|
+
const defaultFallbackContextSize = 4096;
|
|
982
|
+
export function getDefaultModelContextSize({ trainContextSize }) {
|
|
983
|
+
return trainContextSize ?? defaultFallbackContextSize;
|
|
984
|
+
}
|
|
603
985
|
//# sourceMappingURL=LlamaContext.js.map
|