node-llama-cpp 3.0.0-beta.9 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -27
- package/bins/_linux-arm64.moved.txt +1 -0
- package/bins/_linux-armv7l.moved.txt +1 -0
- package/bins/_linux-x64-cuda.moved.txt +1 -0
- package/bins/_linux-x64-vulkan.moved.txt +1 -0
- package/bins/_linux-x64.moved.txt +1 -0
- package/bins/_mac-arm64-metal.moved.txt +1 -0
- package/bins/_mac-x64.moved.txt +1 -0
- package/bins/_win-arm64.moved.txt +1 -0
- package/bins/_win-x64-cuda.moved.txt +1 -0
- package/bins/_win-x64-vulkan.moved.txt +1 -0
- package/bins/_win-x64.moved.txt +1 -0
- package/dist/ChatWrapper.d.ts +19 -39
- package/dist/ChatWrapper.js +129 -72
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsIndex.d.ts +1 -0
- package/dist/apiDocsIndex.js +7 -0
- package/dist/apiDocsIndex.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +88 -20
- package/dist/bindings/Llama.d.ts +43 -3
- package/dist/bindings/Llama.js +193 -23
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +13 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +123 -18
- package/dist/bindings/getLlama.js +264 -75
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +29 -5
- package/dist/bindings/types.js +51 -2
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +39 -28
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +11 -3
- package/dist/bindings/utils/compileLLamaCpp.js +250 -81
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +14 -6
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -1
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -1
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +1 -1
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +3 -9
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -1
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +26 -26
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +100 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +10 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -14
- package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/FalconChatWrapper.js +39 -21
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
- package/dist/chatWrappers/FunctionaryChatWrapper.js +514 -118
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/GeneralChatWrapper.js +46 -22
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +37 -20
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +35 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +277 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
- package/dist/chatWrappers/MistralChatWrapper.js +169 -0
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +100 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +409 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +60 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +204 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
- package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
- package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +221 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
- package/dist/chatWrappers/utils/jsonDumps.js +18 -0
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +95 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +335 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +19 -11
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +16 -7
- package/dist/cli/commands/ChatCommand.js +321 -190
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +31 -0
- package/dist/cli/commands/CompleteCommand.js +402 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/InfillCommand.d.ts +33 -0
- package/dist/cli/commands/InfillCommand.js +438 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +6 -2
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +13 -0
- package/dist/cli/commands/PullCommand.js +158 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +21 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +149 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +202 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +629 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
- package/dist/cli/commands/source/SourceCommand.js +19 -0
- package/dist/cli/commands/source/SourceCommand.js.map +1 -0
- package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
- package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
- package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
- package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +1 -1
- package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +11 -10
- package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
- package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +5 -4
- package/dist/cli/commands/source/commands/DownloadCommand.js +217 -0
- package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +585 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
- package/dist/cli/utils/interactivelyAskForModel.js +450 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +82 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +4 -3
- package/dist/commands.js +6 -3
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +35 -4
- package/dist/config.js +58 -17
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +4 -0
- package/dist/consts.js +11 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +151 -41
- package/dist/evaluator/LlamaChat/LlamaChat.js +1289 -437
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +27 -17
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +187 -13
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +280 -53
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +40 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +10 -2
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +168 -0
- package/dist/evaluator/LlamaCompletion.js +470 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +62 -21
- package/dist/evaluator/LlamaContext/LlamaContext.js +501 -120
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +177 -16
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/{resolveBatchItemsPrioritizingStrategy.js → resolveBatchItemsPrioritizationStrategy.js} +5 -5
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
- package/dist/evaluator/LlamaEmbedding.js +53 -0
- package/dist/evaluator/LlamaEmbedding.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +29 -19
- package/dist/evaluator/LlamaEmbeddingContext.js +36 -43
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +16 -13
- package/dist/evaluator/LlamaGrammar.js +17 -10
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
- package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +5 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +255 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +780 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +34 -0
- package/dist/evaluator/TokenBias.js +65 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +45 -0
- package/dist/evaluator/TokenMeter.js +74 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +4 -0
- package/dist/gguf/consts.js +12 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +25 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +92 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +50 -0
- package/dist/gguf/insights/GgufInsights.js +401 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +146 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +226 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
- package/dist/gguf/parser/GgufV2Parser.js +156 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +61 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +45 -0
- package/dist/gguf/readGgufFileInfo.js +71 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +372 -0
- package/dist/gguf/types/GgufMetadataTypes.js +114 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +39 -14
- package/dist/index.js +29 -8
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/dist/types.d.ts +131 -5
- package/dist/types.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +73 -26
- package/dist/utils/LlamaText.js +475 -157
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +12 -0
- package/dist/utils/LruCache.js +44 -0
- package/dist/utils/LruCache.js.map +1 -0
- package/dist/utils/OverridesObject.d.ts +7 -0
- package/dist/utils/OverridesObject.js +2 -0
- package/dist/utils/OverridesObject.js.map +1 -0
- package/dist/utils/ReplHistory.js +5 -1
- package/dist/utils/ReplHistory.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +27 -8
- package/dist/utils/StopGenerationDetector.js +108 -22
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/ThreadsSplitter.d.ts +32 -0
- package/dist/utils/ThreadsSplitter.js +177 -0
- package/dist/utils/ThreadsSplitter.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +10 -4
- package/dist/utils/TokenStreamRegulator.js +102 -10
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/clearTempFolder.js.map +1 -1
- package/dist/utils/cmake.js +23 -10
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +1 -1
- package/dist/utils/compareTokens.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +199 -0
- package/dist/utils/createModelDownloader.js +405 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/types.d.ts +3 -0
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +3 -3
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -1
- package/dist/utils/getConsoleLogPrefix.js +5 -4
- package/dist/utils/getConsoleLogPrefix.js.map +1 -1
- package/dist/utils/getGrammarsFolder.js +1 -1
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +68 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/modelFileAccesTokens.d.ts +4 -0
- package/dist/utils/modelFileAccesTokens.js +40 -0
- package/dist/utils/modelFileAccesTokens.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -1
- package/dist/utils/prettyPrintObject.js +61 -15
- package/dist/utils/prettyPrintObject.js.map +1 -1
- package/dist/utils/pushAll.d.ts +6 -0
- package/dist/utils/pushAll.js +11 -0
- package/dist/utils/pushAll.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -2
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -2
- package/dist/utils/resolveGithubRelease.js.map +1 -1
- package/dist/utils/resolveLastTokens.d.ts +2 -0
- package/dist/utils/resolveLastTokens.js +12 -0
- package/dist/utils/resolveLastTokens.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/safeEventCallback.d.ts +6 -0
- package/dist/utils/safeEventCallback.js +29 -0
- package/dist/utils/safeEventCallback.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -2
- package/dist/utils/spawnCommand.js +55 -7
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +1 -1
- package/dist/utils/tokenizeInput.js +6 -3
- package/dist/utils/tokenizeInput.js.map +1 -1
- package/dist/utils/transformPromisable.d.ts +40 -0
- package/dist/utils/transformPromisable.js +64 -0
- package/dist/utils/transformPromisable.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
- package/dist/utils/truncateTextAndRoundToWords.js +32 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -1
- package/dist/utils/withLockfile.js.map +1 -1
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +16 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +22 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.js +1 -1
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/dist/utils/wrapAbortSignal.d.ts +1 -0
- package/dist/utils/wrapAbortSignal.js +9 -0
- package/dist/utils/wrapAbortSignal.js.map +1 -0
- package/llama/CMakeLists.txt +134 -5
- package/llama/addon/AddonContext.cpp +629 -0
- package/llama/addon/AddonContext.h +52 -0
- package/llama/addon/AddonGrammar.cpp +39 -0
- package/llama/addon/AddonGrammar.h +19 -0
- package/llama/addon/AddonGrammarEvaluationState.cpp +25 -0
- package/llama/addon/AddonGrammarEvaluationState.h +17 -0
- package/llama/addon/AddonModel.cpp +672 -0
- package/llama/addon/AddonModel.h +61 -0
- package/llama/addon/AddonModelData.cpp +25 -0
- package/llama/addon/AddonModelData.h +15 -0
- package/llama/addon/AddonModelLora.cpp +105 -0
- package/llama/addon/AddonModelLora.h +28 -0
- package/llama/addon/AddonSampler.cpp +513 -0
- package/llama/addon/AddonSampler.h +65 -0
- package/llama/addon/RingBuffer.h +109 -0
- package/llama/addon/addon.cpp +223 -0
- package/llama/addon/addonGlobals.cpp +22 -0
- package/llama/addon/addonGlobals.h +12 -0
- package/llama/addon/globals/addonLog.cpp +136 -0
- package/llama/addon/globals/addonLog.h +21 -0
- package/llama/addon/globals/addonProgress.cpp +15 -0
- package/llama/addon/globals/addonProgress.h +15 -0
- package/llama/addon/globals/getGpuInfo.cpp +108 -0
- package/llama/addon/globals/getGpuInfo.h +6 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +297 -6
- package/llama/grammars/json.gbnf +4 -4
- package/llama/grammars/json_arr.gbnf +4 -4
- package/llama/llama.cpp.info.json +1 -1
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/package.json +109 -59
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/cli/commands/BuildCommand.d.ts +0 -11
- package/dist/cli/commands/BuildCommand.js +0 -106
- package/dist/cli/commands/BuildCommand.js.map +0 -1
- package/dist/cli/commands/ClearCommand.js.map +0 -1
- package/dist/cli/commands/DownloadCommand.js +0 -169
- package/dist/cli/commands/DownloadCommand.js.map +0 -1
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -22
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -121
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/evaluator/LlamaModel.d.ts +0 -120
- package/dist/evaluator/LlamaModel.js +0 -320
- package/dist/evaluator/LlamaModel.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/llama/addon.cpp +0 -950
- package/llamaBins/linux-arm64/.buildMetadata.json +0 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/.buildMetadata.json +0 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +0 -1
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -6119
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/.buildMetadata.json +0 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/.buildMetadata.json +0 -1
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/.buildMetadata.json +0 -1
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/evaluator/LlamaContext/utils/{batchItemsPrioritizingStrategies → batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
|
@@ -1,66 +1,113 @@
|
|
|
1
|
-
import { DisposeAggregator, EventRelay, withLock
|
|
1
|
+
import { AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
|
2
2
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
3
|
import { compareTokens } from "../../utils/compareTokens.js";
|
|
4
|
-
import {
|
|
4
|
+
import { DisposeGuard } from "../../utils/DisposeGuard.js";
|
|
5
|
+
import { TokenMeter } from "../TokenMeter.js";
|
|
6
|
+
import { UnsupportedError } from "../../utils/UnsupportedError.js";
|
|
7
|
+
import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
|
|
8
|
+
import { LlamaSampler } from "./LlamaSampler.js";
|
|
9
|
+
const defaultLoraScale = 1;
|
|
10
|
+
const shrinkRetriesMinContextSize = 4096;
|
|
11
|
+
const defaultMaxPunishTokens = 64;
|
|
12
|
+
const defaultFailedCreationRemedy = {
|
|
13
|
+
retries: 6,
|
|
14
|
+
autoContextSizeShrink: 0.16
|
|
15
|
+
};
|
|
5
16
|
export class LlamaContext {
|
|
6
17
|
/** @internal */ _llama;
|
|
7
18
|
/** @internal */ _ctx;
|
|
8
19
|
/** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
|
|
20
|
+
/** @internal */ _backendContextDisposeGuard;
|
|
9
21
|
/** @internal */ _model;
|
|
10
22
|
/** @internal */ _contextSize;
|
|
11
23
|
/** @internal */ _batchSize;
|
|
24
|
+
/** @internal */ _flashAttention;
|
|
25
|
+
/** @internal */ _idealThreads;
|
|
26
|
+
/** @internal */ _minThreads;
|
|
27
|
+
/** @internal */ _performanceTracking;
|
|
12
28
|
/** @internal */ _totalSequences;
|
|
13
29
|
/** @internal */ _unusedSequenceIds = [];
|
|
14
30
|
/** @internal */ _batchingOptions;
|
|
15
31
|
/** @internal */ _queuedDecodeSequenceIds = new Set();
|
|
16
32
|
/** @internal */ _queuedDecodes = [];
|
|
17
|
-
/** @internal */ _disposeAggregator = new
|
|
33
|
+
/** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
|
|
34
|
+
/** @internal */ _modelPreventDisposalHandle;
|
|
35
|
+
/** @internal */ _loraAdapters = new Set();
|
|
36
|
+
/** @internal */ _gcRegistry;
|
|
18
37
|
/** @internal */ _nextGeneratedSequenceId = 0;
|
|
19
38
|
/** @internal */ _dispatchDecodeScheduled = false;
|
|
20
39
|
/** @internal */ _batchDispatchPending = false;
|
|
40
|
+
/** @internal */ _threadSplitterConsumer;
|
|
41
|
+
/** @internal */ _freeReservedThreadsTimeout;
|
|
21
42
|
/** @internal */ _currentDispatchBatchHandle = {};
|
|
22
43
|
/** @internal */ _allocatedContextSize;
|
|
23
44
|
/** @internal */ _disposed = false;
|
|
24
45
|
onDispose = new EventRelay();
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
*/
|
|
28
|
-
constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embedding, _noSeed }) {
|
|
29
|
-
if (model.disposed)
|
|
46
|
+
constructor({ _model }, { sequences, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, performanceTracking = false, _embeddings }) {
|
|
47
|
+
if (_model.disposed)
|
|
30
48
|
throw new DisposedError();
|
|
31
|
-
this._llama =
|
|
32
|
-
this._model =
|
|
49
|
+
this._llama = _model._llama;
|
|
50
|
+
this._model = _model;
|
|
51
|
+
this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
|
|
52
|
+
this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
|
|
33
53
|
this._totalSequences = Math.max(1, Math.floor(sequences));
|
|
34
54
|
this._contextSize = Math.max(2, contextSize);
|
|
35
55
|
this._batchSize = Math.max(batchSize, this._totalSequences);
|
|
56
|
+
this._flashAttention = flashAttention;
|
|
57
|
+
this._idealThreads = typeof threads === "number"
|
|
58
|
+
? this._llama._threadsSplitter.normalizeThreadsValue(threads)
|
|
59
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.ideal ?? (this._llama.maxThreads === 0
|
|
60
|
+
? this._llama.cpuMathCores
|
|
61
|
+
: this._llama.maxThreads));
|
|
62
|
+
this._minThreads = Math.max(1, typeof threads === "number"
|
|
63
|
+
? 1
|
|
64
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.min ?? 1));
|
|
65
|
+
this._performanceTracking = !!performanceTracking;
|
|
36
66
|
this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
|
|
37
|
-
|
|
38
|
-
contextSize: contextSize * this._totalSequences,
|
|
67
|
+
contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
|
|
39
68
|
batchSize: this._batchSize,
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
69
|
+
sequences: this._totalSequences,
|
|
70
|
+
flashAttention: this._flashAttention,
|
|
71
|
+
threads: this._idealThreads,
|
|
72
|
+
embeddings: _embeddings,
|
|
73
|
+
performanceTracking: this._performanceTracking
|
|
43
74
|
}));
|
|
44
75
|
this._batchingOptions = {
|
|
45
76
|
dispatchSchedule: batchingDispatchSchedule,
|
|
46
|
-
|
|
77
|
+
itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
|
|
47
78
|
};
|
|
79
|
+
this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
|
|
80
|
+
this._gcRegistry.register(this, this._loraAdapters);
|
|
48
81
|
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
82
|
+
this._freeReservedThreads = this._freeReservedThreads.bind(this);
|
|
83
|
+
this._disposeAggregator.add(() => {
|
|
84
|
+
this._disposed = true;
|
|
85
|
+
});
|
|
86
|
+
this._disposeAggregator.add(() => void this._gcRegistry.unregister(this));
|
|
49
87
|
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
|
|
50
88
|
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
89
|
+
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
51
90
|
this._disposeAggregator.add(() => {
|
|
52
|
-
this.
|
|
91
|
+
if (this._loraAdapters.size > 0) {
|
|
92
|
+
const loraAdapters = new Set(this._loraAdapters);
|
|
93
|
+
this._loraAdapters.clear();
|
|
94
|
+
return this._model._removeLoraUsage(loraAdapters);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
this._disposeAggregator.add(async () => {
|
|
98
|
+
await this._backendContextDisposeGuard.acquireDisposeLock();
|
|
99
|
+
await this._ctx.dispose();
|
|
100
|
+
this._modelPreventDisposalHandle.dispose();
|
|
53
101
|
});
|
|
54
|
-
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
55
102
|
}
|
|
56
|
-
dispose() {
|
|
103
|
+
async dispose() {
|
|
57
104
|
if (this._disposed)
|
|
58
105
|
return;
|
|
59
106
|
this._disposed = true;
|
|
60
|
-
this._disposeAggregator.dispose();
|
|
107
|
+
await this._disposeAggregator.dispose();
|
|
61
108
|
}
|
|
62
109
|
/** @hidden */
|
|
63
|
-
[Symbol.
|
|
110
|
+
[Symbol.asyncDispose]() {
|
|
64
111
|
return this.dispose();
|
|
65
112
|
}
|
|
66
113
|
get disposed() {
|
|
@@ -75,6 +122,30 @@ export class LlamaContext {
|
|
|
75
122
|
get batchSize() {
|
|
76
123
|
return this._batchSize;
|
|
77
124
|
}
|
|
125
|
+
get flashAttention() {
|
|
126
|
+
return this._flashAttention;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* The actual size of the state in the memory in bytes.
|
|
130
|
+
* This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
|
|
131
|
+
*/
|
|
132
|
+
get stateSize() {
|
|
133
|
+
this._ensureNotDisposed();
|
|
134
|
+
return this._ctx.getStateSize();
|
|
135
|
+
}
|
|
136
|
+
/** The number of threads currently used to evaluate tokens */
|
|
137
|
+
get currentThreads() {
|
|
138
|
+
this._ensureNotDisposed();
|
|
139
|
+
return this._ctx.getThreads();
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* The number of threads that are preferred to be used to evaluate tokens.
|
|
143
|
+
*
|
|
144
|
+
* The actual number of threads used may be lower when other evaluations are running in parallel.
|
|
145
|
+
*/
|
|
146
|
+
get idealThreads() {
|
|
147
|
+
return this._idealThreads;
|
|
148
|
+
}
|
|
78
149
|
getAllocatedContextSize() {
|
|
79
150
|
this._ensureNotDisposed();
|
|
80
151
|
if (this._allocatedContextSize == null)
|
|
@@ -90,9 +161,9 @@ export class LlamaContext {
|
|
|
90
161
|
/**
|
|
91
162
|
* Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
|
|
92
163
|
* When there are no sequences left, this method will throw an error.
|
|
93
|
-
* @param [options]
|
|
94
164
|
*/
|
|
95
|
-
getSequence(
|
|
165
|
+
getSequence(options = {}) {
|
|
166
|
+
const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
|
|
96
167
|
this._ensureNotDisposed();
|
|
97
168
|
const nextSequenceId = this._popSequenceId();
|
|
98
169
|
if (nextSequenceId == null)
|
|
@@ -100,6 +171,7 @@ export class LlamaContext {
|
|
|
100
171
|
return LlamaContextSequence._create({
|
|
101
172
|
sequenceId: nextSequenceId,
|
|
102
173
|
context: this,
|
|
174
|
+
tokenMeter: _tokenMeter,
|
|
103
175
|
contextShift: {
|
|
104
176
|
size: contextShiftSize,
|
|
105
177
|
strategy: contextShiftStrategy
|
|
@@ -116,17 +188,18 @@ export class LlamaContext {
|
|
|
116
188
|
this._currentDispatchBatchHandle = {};
|
|
117
189
|
this._dispatchDecodeScheduled = false;
|
|
118
190
|
this._batchDispatchPending = false;
|
|
119
|
-
let
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
191
|
+
let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
192
|
+
const resolvePrioritizationStrategy = () => {
|
|
193
|
+
try {
|
|
194
|
+
this._ensureNotDisposed();
|
|
195
|
+
return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
|
|
196
|
+
}
|
|
197
|
+
catch (err) {
|
|
198
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
};
|
|
202
|
+
const getOrderedQueuedDecodes = (prioritizationStrategy) => {
|
|
130
203
|
const batchItemToQueuedDecodeMap = new Map();
|
|
131
204
|
const batchItemsList = [];
|
|
132
205
|
for (const queuedDecode of this._queuedDecodes) {
|
|
@@ -139,42 +212,65 @@ export class LlamaContext {
|
|
|
139
212
|
}
|
|
140
213
|
let prioritizedItems;
|
|
141
214
|
try {
|
|
142
|
-
prioritizedItems =
|
|
215
|
+
prioritizedItems = prioritizationStrategy({
|
|
143
216
|
items: batchItemsList,
|
|
144
217
|
size: this._batchSize
|
|
145
218
|
});
|
|
146
219
|
}
|
|
147
220
|
catch (err) {
|
|
148
221
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
149
|
-
return;
|
|
222
|
+
return null;
|
|
150
223
|
}
|
|
151
|
-
|
|
152
|
-
const afterDecodeActions = [];
|
|
153
|
-
const queuedDecodesToDelete = new Set();
|
|
154
|
-
const currentQueuedDecodeItems = new Set();
|
|
155
|
-
const currentBatchItems = [];
|
|
156
|
-
let currentBatchSize = 0;
|
|
157
|
-
for (const prioritizedItem of prioritizedItems) {
|
|
224
|
+
return prioritizedItems.map((prioritizedItem) => {
|
|
158
225
|
const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
|
|
159
226
|
if (queuedDecode == null)
|
|
160
227
|
throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
|
|
161
228
|
"of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
|
|
162
|
-
|
|
163
|
-
|
|
229
|
+
return {
|
|
230
|
+
queuedDecode,
|
|
231
|
+
processAmount: prioritizedItem.processAmount
|
|
232
|
+
};
|
|
233
|
+
});
|
|
234
|
+
};
|
|
235
|
+
const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
|
|
236
|
+
const currentBatchItems = [];
|
|
237
|
+
let currentBatchSize = 0;
|
|
238
|
+
let batchTokenSlotsLeft = batchSize;
|
|
239
|
+
for (const { queuedDecode, processAmount } of queuedDecodes) {
|
|
240
|
+
const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
|
|
241
|
+
if (resolvedProcessAmount <= 0) {
|
|
242
|
+
if (batchTokenSlotsLeft === 0)
|
|
243
|
+
break;
|
|
164
244
|
continue;
|
|
165
|
-
|
|
245
|
+
}
|
|
246
|
+
batchTokenSlotsLeft -= resolvedProcessAmount;
|
|
247
|
+
currentBatchSize += resolvedProcessAmount;
|
|
166
248
|
currentBatchItems.push({
|
|
167
249
|
queuedDecode,
|
|
168
|
-
processAmount
|
|
250
|
+
processAmount: resolvedProcessAmount
|
|
169
251
|
});
|
|
170
|
-
currentBatchSize += processAmount;
|
|
171
252
|
}
|
|
253
|
+
return {
|
|
254
|
+
currentBatchItems,
|
|
255
|
+
currentBatchSize
|
|
256
|
+
};
|
|
257
|
+
};
|
|
258
|
+
const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
|
|
259
|
+
const afterDecodeActions = [];
|
|
260
|
+
const queuedDecodesToDelete = new Set();
|
|
261
|
+
const currentQueuedDecodeItems = new Set();
|
|
172
262
|
if (currentBatchSize !== 0)
|
|
173
263
|
this._ctx.initBatch(currentBatchSize);
|
|
174
|
-
for (const { queuedDecode, processAmount } of
|
|
264
|
+
for (const { queuedDecode, processAmount } of batchItems) {
|
|
175
265
|
let batchLogitIndex;
|
|
176
266
|
try {
|
|
177
|
-
|
|
267
|
+
const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
|
|
268
|
+
processAmount === queuedDecode.tokens.length;
|
|
269
|
+
const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
|
|
270
|
+
const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
|
|
271
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
|
|
272
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
|
|
273
|
+
batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
|
|
178
274
|
}
|
|
179
275
|
catch (err) {
|
|
180
276
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
|
|
@@ -193,8 +289,6 @@ export class LlamaContext {
|
|
|
193
289
|
queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
|
|
194
290
|
queuedDecode.firstTokenSequenceIndex += processAmount;
|
|
195
291
|
}
|
|
196
|
-
if (batchTokenSlotsLeft === 0)
|
|
197
|
-
break;
|
|
198
292
|
}
|
|
199
293
|
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
200
294
|
const queuedDecode = this._queuedDecodes[i];
|
|
@@ -204,14 +298,22 @@ export class LlamaContext {
|
|
|
204
298
|
i--;
|
|
205
299
|
}
|
|
206
300
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
301
|
+
if (currentBatchSize !== 0) {
|
|
302
|
+
const allocationResult = this._threadSplitterConsumer?.getAllocationToConsume();
|
|
303
|
+
const [threadsToUse, consumerHandle] = allocationResult instanceof Promise
|
|
304
|
+
? await allocationResult ?? []
|
|
305
|
+
: allocationResult ?? [];
|
|
306
|
+
try {
|
|
307
|
+
if (threadsToUse != null)
|
|
308
|
+
this._ctx.setThreads(threadsToUse);
|
|
210
309
|
await this._ctx.decodeBatch();
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
310
|
+
consumerHandle?.dispose();
|
|
311
|
+
}
|
|
312
|
+
catch (err) {
|
|
313
|
+
consumerHandle?.dispose();
|
|
314
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
215
317
|
}
|
|
216
318
|
for (const action of afterDecodeActions) {
|
|
217
319
|
const [accept, reject] = action.response;
|
|
@@ -225,15 +327,56 @@ export class LlamaContext {
|
|
|
225
327
|
}
|
|
226
328
|
accept(undefined);
|
|
227
329
|
}
|
|
330
|
+
};
|
|
331
|
+
const prioritizationStrategy = resolvePrioritizationStrategy();
|
|
332
|
+
if (prioritizationStrategy == null)
|
|
333
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
334
|
+
this._reserveThreads();
|
|
335
|
+
try {
|
|
336
|
+
while (shouldHaveAnotherLoop) {
|
|
337
|
+
const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
|
|
338
|
+
if (orderedQueuedDecodes == null)
|
|
339
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
340
|
+
const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
|
|
341
|
+
let preventDisposalHandle;
|
|
342
|
+
try {
|
|
343
|
+
preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
|
|
344
|
+
}
|
|
345
|
+
catch (err) {
|
|
346
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
try {
|
|
350
|
+
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
|
|
351
|
+
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
352
|
+
}
|
|
353
|
+
finally {
|
|
354
|
+
preventDisposalHandle.dispose();
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
finally {
|
|
359
|
+
this._scheduleToFreeReservedThreads();
|
|
228
360
|
}
|
|
229
361
|
});
|
|
230
362
|
}
|
|
363
|
+
/**
|
|
364
|
+
* Print the timings of token evaluation since that last print for this context.
|
|
365
|
+
*
|
|
366
|
+
* Requires the `performanceTracking` option to be enabled.
|
|
367
|
+
*
|
|
368
|
+
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
369
|
+
* it won't print anything.
|
|
370
|
+
*/
|
|
231
371
|
async printTimings() {
|
|
372
|
+
this._ensureNotDisposed();
|
|
373
|
+
if (!this._performanceTracking)
|
|
374
|
+
throw new UnsupportedError("Performance tracking is not enabled");
|
|
232
375
|
this._ctx.printTimings();
|
|
233
376
|
await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
|
|
234
377
|
}
|
|
235
378
|
/** @internal */
|
|
236
|
-
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
|
|
379
|
+
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
|
|
237
380
|
return await new Promise((accept, reject) => {
|
|
238
381
|
this._queuedDecodes.push({
|
|
239
382
|
sequenceId,
|
|
@@ -241,6 +384,7 @@ export class LlamaContext {
|
|
|
241
384
|
firstTokenSequenceIndex,
|
|
242
385
|
generateLogitAtTheEnd,
|
|
243
386
|
evaluationPriority,
|
|
387
|
+
tokenMeter,
|
|
244
388
|
response: [accept, reject],
|
|
245
389
|
onDone
|
|
246
390
|
});
|
|
@@ -253,16 +397,14 @@ export class LlamaContext {
|
|
|
253
397
|
if (this._disposed)
|
|
254
398
|
return;
|
|
255
399
|
void withLock(this, "context", async () => {
|
|
400
|
+
if (this._disposed)
|
|
401
|
+
return;
|
|
256
402
|
this._ctx.disposeSequence(sequenceId);
|
|
257
403
|
this._unusedSequenceIds.push(sequenceId);
|
|
258
404
|
this._onReclaimUnusedSequenceId.dispatchEvent();
|
|
259
405
|
});
|
|
260
406
|
}
|
|
261
407
|
/** @internal */
|
|
262
|
-
_acceptTokenOnGrammarEvaluationState(grammarEvaluationState, token) {
|
|
263
|
-
this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
|
|
264
|
-
}
|
|
265
|
-
/** @internal */
|
|
266
408
|
_popSequenceId() {
|
|
267
409
|
if (this._unusedSequenceIds.length > 0)
|
|
268
410
|
return this._unusedSequenceIds.shift();
|
|
@@ -312,20 +454,177 @@ export class LlamaContext {
|
|
|
312
454
|
if (this._disposed)
|
|
313
455
|
throw new DisposedError();
|
|
314
456
|
}
|
|
457
|
+
/** @internal */
|
|
458
|
+
async _setLora({ filePath, scale }) {
|
|
459
|
+
const lora = await this._model._getOrLoadLora(filePath);
|
|
460
|
+
this._ctx.setLora(lora, scale ?? defaultLoraScale);
|
|
461
|
+
if (!this._loraAdapters.has(lora)) {
|
|
462
|
+
this._loraAdapters.add(lora);
|
|
463
|
+
lora.usages++;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
/** @internal */
|
|
467
|
+
_reserveThreads() {
|
|
468
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
469
|
+
delete this._freeReservedThreadsTimeout;
|
|
470
|
+
if (this._threadSplitterConsumer != null)
|
|
471
|
+
return;
|
|
472
|
+
this._threadSplitterConsumer = this._llama._threadsSplitter.createConsumer(this._idealThreads, this._minThreads);
|
|
473
|
+
}
|
|
474
|
+
/** @internal */
|
|
475
|
+
_freeReservedThreads() {
|
|
476
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
477
|
+
delete this._freeReservedThreadsTimeout;
|
|
478
|
+
if (this._threadSplitterConsumer == null)
|
|
479
|
+
return;
|
|
480
|
+
this._threadSplitterConsumer.dispose();
|
|
481
|
+
delete this._threadSplitterConsumer;
|
|
482
|
+
}
|
|
483
|
+
/** @internal */
|
|
484
|
+
_scheduleToFreeReservedThreads() {
|
|
485
|
+
if (this._threadSplitterConsumer == null)
|
|
486
|
+
return;
|
|
487
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
488
|
+
this._freeReservedThreadsTimeout = setTimeout(this._freeReservedThreads, 0);
|
|
489
|
+
}
|
|
490
|
+
/** @internal */
|
|
491
|
+
static async _create(options, { _model }) {
|
|
492
|
+
const sequences = options.sequences ?? getDefaultContextSequences();
|
|
493
|
+
const flashAttention = _model.flashAttentionSupported
|
|
494
|
+
? Boolean(options.flashAttention ?? _model.defaultContextFlashAttention)
|
|
495
|
+
: false;
|
|
496
|
+
const loraOptions = typeof options.lora === "string"
|
|
497
|
+
? { adapters: [{ filePath: options.lora }] }
|
|
498
|
+
: options.lora;
|
|
499
|
+
let failedCreationRetries = options.failedCreationRemedy === false
|
|
500
|
+
? 0
|
|
501
|
+
: Math.max(0, options.failedCreationRemedy?.retries ?? defaultFailedCreationRemedy.retries);
|
|
502
|
+
const failedCreationAutoContextSizeShrink = options.failedCreationRemedy === false
|
|
503
|
+
? 0
|
|
504
|
+
: options.failedCreationRemedy?.autoContextSizeShrink ?? defaultFailedCreationRemedy.autoContextSizeShrink;
|
|
505
|
+
let contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
|
|
506
|
+
batchSize: options.batchSize,
|
|
507
|
+
sequences: sequences,
|
|
508
|
+
modelGpuLayers: _model.gpuLayers,
|
|
509
|
+
modelTrainContextSize: _model.trainContextSize,
|
|
510
|
+
flashAttention,
|
|
511
|
+
getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
|
|
512
|
+
llamaGpu: _model._llama.gpu,
|
|
513
|
+
ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
|
|
514
|
+
isEmbeddingContext: options._embeddings
|
|
515
|
+
});
|
|
516
|
+
const minContextSize = options.contextSize === "auto"
|
|
517
|
+
? shrinkRetriesMinContextSize
|
|
518
|
+
: (typeof options.contextSize === "object" && typeof options.contextSize.min === "number")
|
|
519
|
+
? options.contextSize.min
|
|
520
|
+
: typeof options.contextSize === "number"
|
|
521
|
+
? options.contextSize
|
|
522
|
+
: shrinkRetriesMinContextSize;
|
|
523
|
+
const { createSignal } = options;
|
|
524
|
+
async function createContext(contextSize) {
|
|
525
|
+
const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
|
|
526
|
+
const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
|
|
527
|
+
contextSize,
|
|
528
|
+
sequences,
|
|
529
|
+
isEmbeddingContext: options._embeddings,
|
|
530
|
+
modelGpuLayers: _model.gpuLayers,
|
|
531
|
+
batchSize,
|
|
532
|
+
flashAttention
|
|
533
|
+
}).gpuVram;
|
|
534
|
+
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
|
|
535
|
+
const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
|
|
536
|
+
? null
|
|
537
|
+
: _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
|
|
538
|
+
try {
|
|
539
|
+
if (createSignal?.aborted)
|
|
540
|
+
throw createSignal.reason;
|
|
541
|
+
const contextLoaded = await context._ctx.init();
|
|
542
|
+
if (createSignal?.aborted) {
|
|
543
|
+
if (contextLoaded)
|
|
544
|
+
await context._ctx.dispose();
|
|
545
|
+
throw createSignal.reason;
|
|
546
|
+
}
|
|
547
|
+
else if (!contextLoaded)
|
|
548
|
+
throw new Error("Failed to create context");
|
|
549
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
550
|
+
if (loraOptions != null && loraOptions.adapters.length > 0) {
|
|
551
|
+
let loadedAdapters = 0;
|
|
552
|
+
for (const adapter of loraOptions.adapters) {
|
|
553
|
+
try {
|
|
554
|
+
await context._setLora({
|
|
555
|
+
filePath: adapter.filePath,
|
|
556
|
+
scale: adapter.scale
|
|
557
|
+
});
|
|
558
|
+
loadedAdapters++;
|
|
559
|
+
try {
|
|
560
|
+
loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
console.error(err);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
catch (err) {
|
|
567
|
+
await context.dispose();
|
|
568
|
+
throw err;
|
|
569
|
+
}
|
|
570
|
+
if (createSignal?.aborted) {
|
|
571
|
+
await context.dispose();
|
|
572
|
+
throw createSignal.reason;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
else if (loraOptions?.onLoadProgress != null) {
|
|
577
|
+
try {
|
|
578
|
+
loraOptions.onLoadProgress(1);
|
|
579
|
+
}
|
|
580
|
+
catch (err) {
|
|
581
|
+
console.error(err);
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
return context;
|
|
585
|
+
}
|
|
586
|
+
finally {
|
|
587
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
while (failedCreationRetries >= 0) {
|
|
591
|
+
try {
|
|
592
|
+
return await createContext(contextSize);
|
|
593
|
+
}
|
|
594
|
+
catch (err) {
|
|
595
|
+
if (failedCreationRetries === 0 || (createSignal?.aborted && err === createSignal.reason))
|
|
596
|
+
throw err;
|
|
597
|
+
failedCreationRetries--;
|
|
598
|
+
let newContextSize = typeof failedCreationAutoContextSizeShrink === "number"
|
|
599
|
+
? Math.floor(contextSize * (1 - failedCreationAutoContextSizeShrink))
|
|
600
|
+
: Math.floor(failedCreationAutoContextSizeShrink(contextSize));
|
|
601
|
+
if (!Number.isFinite(newContextSize))
|
|
602
|
+
throw err;
|
|
603
|
+
if (newContextSize < minContextSize)
|
|
604
|
+
newContextSize = minContextSize;
|
|
605
|
+
if (newContextSize >= contextSize)
|
|
606
|
+
throw err;
|
|
607
|
+
contextSize = newContextSize;
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
throw new Error("Failed to create context");
|
|
611
|
+
}
|
|
315
612
|
}
|
|
316
613
|
export class LlamaContextSequence {
|
|
317
614
|
/** @internal */ _sequenceId;
|
|
318
615
|
/** @internal */ _gcRegistry;
|
|
319
616
|
/** @internal */ _context;
|
|
320
617
|
/** @internal */ _contextShift;
|
|
618
|
+
/** @internal */ _tokenMeter;
|
|
321
619
|
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
322
620
|
/** @internal */ _contextTokens = [];
|
|
323
621
|
/** @internal */ _nextTokenIndex = 0;
|
|
324
622
|
/** @internal */ _disposed = false;
|
|
325
623
|
onDispose = new EventRelay();
|
|
326
|
-
constructor({ sequenceId, context, contextShift }) {
|
|
624
|
+
constructor({ sequenceId, context, tokenMeter, contextShift }) {
|
|
327
625
|
this._sequenceId = sequenceId;
|
|
328
626
|
this._context = context;
|
|
627
|
+
this._tokenMeter = tokenMeter ?? new TokenMeter();
|
|
329
628
|
this._contextShift = contextShift;
|
|
330
629
|
this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
|
|
331
630
|
this._gcRegistry.register(this, sequenceId);
|
|
@@ -362,6 +661,9 @@ export class LlamaContextSequence {
|
|
|
362
661
|
get contextTokens() {
|
|
363
662
|
return this._contextTokens.slice();
|
|
364
663
|
}
|
|
664
|
+
get tokenMeter() {
|
|
665
|
+
return this._tokenMeter;
|
|
666
|
+
}
|
|
365
667
|
get isLoadedToMemory() {
|
|
366
668
|
return !this._disposed;
|
|
367
669
|
}
|
|
@@ -387,7 +689,7 @@ export class LlamaContextSequence {
|
|
|
387
689
|
}
|
|
388
690
|
/**
|
|
389
691
|
* Erase context tokens in the provided ranges to free up space for new tokens to be generated.
|
|
390
|
-
*
|
|
692
|
+
* The start of each range is inclusive, and the end of each range is exclusive.
|
|
391
693
|
* For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
|
|
392
694
|
*/
|
|
393
695
|
async eraseContextTokenRanges(ranges) {
|
|
@@ -396,6 +698,8 @@ export class LlamaContextSequence {
|
|
|
396
698
|
this._ensureNotDisposed();
|
|
397
699
|
if (ranges.length === 0)
|
|
398
700
|
return;
|
|
701
|
+
// if the deletion fails, we'll have to dispose the sequence and fill it up again
|
|
702
|
+
let deletionSuccessful = true;
|
|
399
703
|
const resolvedRanges = ranges
|
|
400
704
|
.map(({ start, end }) => {
|
|
401
705
|
if (start === end)
|
|
@@ -425,34 +729,42 @@ export class LlamaContextSequence {
|
|
|
425
729
|
let lastDeleteRangeEndPos = null;
|
|
426
730
|
for (const range of resolvedRanges) {
|
|
427
731
|
this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
|
|
428
|
-
|
|
429
|
-
|
|
732
|
+
if (deletionSuccessful)
|
|
733
|
+
deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
|
|
734
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
|
|
430
735
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
|
|
431
736
|
removedTokens += range.end - range.start;
|
|
432
737
|
lastDeleteRangeEndPos = range.end;
|
|
433
738
|
}
|
|
434
|
-
if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
739
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
435
740
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
|
|
436
741
|
this._nextTokenIndex -= removedTokens;
|
|
742
|
+
if (deletionSuccessful)
|
|
743
|
+
return;
|
|
744
|
+
const newSequenceTokens = this._contextTokens.slice();
|
|
745
|
+
this._nextTokenIndex = 0;
|
|
746
|
+
this._context._ctx.disposeSequence(this._sequenceId);
|
|
747
|
+
await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
|
|
437
748
|
});
|
|
438
749
|
}
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
* @param [options]
|
|
442
|
-
*/
|
|
443
|
-
evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
|
|
750
|
+
evaluate(tokens, options = {}) {
|
|
751
|
+
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
444
752
|
return this._evaluate(tokens, {
|
|
445
753
|
temperature,
|
|
754
|
+
minP,
|
|
446
755
|
topK,
|
|
447
756
|
topP,
|
|
757
|
+
seed,
|
|
448
758
|
grammarEvaluationState,
|
|
449
759
|
repeatPenalty,
|
|
760
|
+
tokenBias,
|
|
450
761
|
evaluationPriority,
|
|
451
762
|
contextShiftOptions: {
|
|
452
763
|
size: contextShiftSize,
|
|
453
764
|
strategy: contextShiftStrategy
|
|
454
765
|
},
|
|
455
|
-
|
|
766
|
+
yieldEogToken,
|
|
767
|
+
_noSampling
|
|
456
768
|
});
|
|
457
769
|
}
|
|
458
770
|
/**
|
|
@@ -475,59 +787,85 @@ export class LlamaContextSequence {
|
|
|
475
787
|
}
|
|
476
788
|
}
|
|
477
789
|
/** @internal */
|
|
478
|
-
async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions,
|
|
790
|
+
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
479
791
|
this._ensureNotDisposed();
|
|
480
792
|
let evalTokens = tokens;
|
|
481
793
|
if (evalTokens.length === 0)
|
|
482
794
|
return;
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
const
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
795
|
+
const sampler = new LlamaSampler(this.model);
|
|
796
|
+
try {
|
|
797
|
+
while (true) {
|
|
798
|
+
this._ensureNotDisposed();
|
|
799
|
+
// Evaluate to get the next token.
|
|
800
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
|
|
801
|
+
if (_noSampling)
|
|
802
|
+
return null;
|
|
803
|
+
const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
|
|
804
|
+
? repeatPenalty.punishTokens()
|
|
805
|
+
: repeatPenalty?.punishTokens;
|
|
806
|
+
const maxPunishTokens = Math.max(repeatPenalty?.maxPunishTokens ?? defaultMaxPunishTokens, repeatPenaltyTokens?.length ?? 0);
|
|
807
|
+
const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
|
|
808
|
+
? grammarEvaluationState()
|
|
809
|
+
: grammarEvaluationState;
|
|
810
|
+
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
|
|
811
|
+
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
|
|
812
|
+
const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
|
|
813
|
+
sampler.applyConfig(removeNullFields({
|
|
814
|
+
temperature,
|
|
815
|
+
minP,
|
|
816
|
+
topK,
|
|
817
|
+
topP,
|
|
818
|
+
seed: Math.max(0, Number.isFinite(seed)
|
|
819
|
+
? Math.floor(seed ?? (Date.now() / 1000))
|
|
820
|
+
: Math.floor(Date.now() / 1000)),
|
|
821
|
+
repeatPenalty: repeatPenalty?.penalty,
|
|
822
|
+
repeatPenaltyMaxTokens: maxPunishTokens,
|
|
823
|
+
repeatPenaltyTokens: repeatPenaltyTokens != null
|
|
824
|
+
? Uint32Array.from(repeatPenaltyTokens)
|
|
825
|
+
: undefined,
|
|
826
|
+
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
827
|
+
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
828
|
+
tokenBiasKeys,
|
|
829
|
+
tokenBiasValues,
|
|
830
|
+
grammarEvaluationState: resolvedGrammarEvaluationState?._state
|
|
831
|
+
}));
|
|
832
|
+
return withLock(sampler, "sample", async () => {
|
|
833
|
+
if (sampler.disposed)
|
|
834
|
+
return null;
|
|
835
|
+
return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
|
|
836
|
+
});
|
|
837
|
+
});
|
|
838
|
+
if (nextToken === -1)
|
|
839
|
+
throw new Error("Failed to sample next token");
|
|
840
|
+
if (nextToken == null)
|
|
841
|
+
return;
|
|
842
|
+
// the model finished generating text
|
|
843
|
+
if (!yieldEogToken && this._context.model.isEogToken(nextToken))
|
|
844
|
+
break;
|
|
845
|
+
const replacementToken = (yield nextToken);
|
|
846
|
+
// set the tokens for the next evaluation
|
|
847
|
+
if (replacementToken != null)
|
|
848
|
+
evalTokens = [replacementToken];
|
|
849
|
+
else
|
|
850
|
+
evalTokens = [nextToken];
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
finally {
|
|
854
|
+
void withLock(sampler, "sample", sampler.asyncDispose);
|
|
517
855
|
}
|
|
518
856
|
}
|
|
519
857
|
/** @internal */
|
|
520
|
-
async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
|
|
858
|
+
async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
|
|
521
859
|
this._ensureNotDisposed();
|
|
522
860
|
const tokensLeftToDecode = tokens.slice();
|
|
523
861
|
return await withLock(this, "evaluate", async () => {
|
|
524
862
|
while (tokensLeftToDecode.length > 0) {
|
|
525
863
|
this._ensureNotDisposed();
|
|
526
|
-
let freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
527
|
-
if (freeSpace <=
|
|
864
|
+
let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
865
|
+
if (freeSpace <= 0) {
|
|
528
866
|
await this._freeUpSpaceForTokens(contextShiftOptions);
|
|
529
|
-
freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
530
|
-
if (freeSpace <=
|
|
867
|
+
freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
868
|
+
if (freeSpace <= 0)
|
|
531
869
|
throw new Error("Failed to free up space for new tokens");
|
|
532
870
|
}
|
|
533
871
|
const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
|
|
@@ -537,7 +875,8 @@ export class LlamaContextSequence {
|
|
|
537
875
|
tokens: tokensToDecode,
|
|
538
876
|
firstTokenSequenceIndex: this._nextTokenIndex,
|
|
539
877
|
generateLogitAtTheEnd,
|
|
540
|
-
evaluationPriority
|
|
878
|
+
evaluationPriority,
|
|
879
|
+
tokenMeter
|
|
541
880
|
}, !generateLogitAtTheEnd
|
|
542
881
|
? undefined
|
|
543
882
|
: onDecodeDone);
|
|
@@ -557,7 +896,10 @@ export class LlamaContextSequence {
|
|
|
557
896
|
: contextShiftOptions.size));
|
|
558
897
|
this._ensureNotDisposed();
|
|
559
898
|
if (contextShiftOptions.strategy === "eraseBeginning") {
|
|
560
|
-
|
|
899
|
+
let eraseStartIndex = 0;
|
|
900
|
+
if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
|
|
901
|
+
eraseStartIndex = 1;
|
|
902
|
+
await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }]);
|
|
561
903
|
}
|
|
562
904
|
else {
|
|
563
905
|
const ranges = await contextShiftOptions.strategy({
|
|
@@ -567,7 +909,7 @@ export class LlamaContextSequence {
|
|
|
567
909
|
if (ranges == null)
|
|
568
910
|
throw new Error("Invalid delete ranges");
|
|
569
911
|
await this.eraseContextTokenRanges(ranges);
|
|
570
|
-
if (this.nextTokenIndex >= this._context.contextSize)
|
|
912
|
+
if (this.nextTokenIndex >= this._context.contextSize - 1)
|
|
571
913
|
await this.eraseContextTokenRanges([{ start: 0, end: size }]);
|
|
572
914
|
}
|
|
573
915
|
}
|
|
@@ -580,10 +922,11 @@ export class LlamaContextSequence {
|
|
|
580
922
|
* We need this to make it impossible to manually create instances of this class outside the code of this library
|
|
581
923
|
* @internal
|
|
582
924
|
*/
|
|
583
|
-
static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
925
|
+
static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
584
926
|
return new LlamaContextSequence({
|
|
585
927
|
sequenceId,
|
|
586
928
|
context,
|
|
929
|
+
tokenMeter,
|
|
587
930
|
contextShift: {
|
|
588
931
|
size: contextShiftSize,
|
|
589
932
|
strategy: contextShiftStrategy
|
|
@@ -591,14 +934,52 @@ export class LlamaContextSequence {
|
|
|
591
934
|
});
|
|
592
935
|
}
|
|
593
936
|
}
|
|
937
|
+
function getTokenBiasesForAddon(tokenBias, currentModel) {
|
|
938
|
+
if (tokenBias == null)
|
|
939
|
+
return {
|
|
940
|
+
tokenBiasKeys: undefined,
|
|
941
|
+
tokenBiasValues: undefined
|
|
942
|
+
};
|
|
943
|
+
if (tokenBias instanceof Function)
|
|
944
|
+
tokenBias = tokenBias();
|
|
945
|
+
if (tokenBias._tokenizer !== currentModel.tokenizer)
|
|
946
|
+
throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
|
|
947
|
+
"Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
|
|
948
|
+
const tokenBiasKeys = [];
|
|
949
|
+
const tokenBiasValues = [];
|
|
950
|
+
for (const [token, bias] of tokenBias._biases) {
|
|
951
|
+
tokenBiasKeys.push(token);
|
|
952
|
+
tokenBiasValues.push(bias);
|
|
953
|
+
}
|
|
954
|
+
if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
|
|
955
|
+
return {
|
|
956
|
+
tokenBiasKeys: undefined,
|
|
957
|
+
tokenBiasValues: undefined
|
|
958
|
+
};
|
|
959
|
+
}
|
|
960
|
+
return {
|
|
961
|
+
tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
|
|
962
|
+
tokenBiasValues: Float32Array.from(tokenBiasValues)
|
|
963
|
+
};
|
|
964
|
+
}
|
|
594
965
|
function disposeContextIfReferenced(contextRef) {
|
|
595
966
|
const context = contextRef.deref();
|
|
596
967
|
if (context != null)
|
|
597
|
-
context.dispose();
|
|
968
|
+
void context.dispose();
|
|
598
969
|
}
|
|
599
970
|
function disposeContextSequenceIfReferenced(contextRef) {
|
|
600
971
|
const context = contextRef.deref();
|
|
601
972
|
if (context != null)
|
|
602
973
|
context.dispose();
|
|
603
974
|
}
|
|
975
|
+
export function getDefaultContextBatchSize({ contextSize, sequences }) {
|
|
976
|
+
return Math.min(contextSize * sequences, 512);
|
|
977
|
+
}
|
|
978
|
+
export function getDefaultContextSequences() {
|
|
979
|
+
return 1;
|
|
980
|
+
}
|
|
981
|
+
const defaultFallbackContextSize = 4096;
|
|
982
|
+
export function getDefaultModelContextSize({ trainContextSize }) {
|
|
983
|
+
return trainContextSize ?? defaultFallbackContextSize;
|
|
984
|
+
}
|
|
604
985
|
//# sourceMappingURL=LlamaContext.js.map
|