node-llama-cpp 3.0.0-beta.2 → 3.0.0-beta.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -11
- package/dist/ChatWrapper.d.ts +2 -15
- package/dist/ChatWrapper.js +28 -33
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsOverrides.d.ts +1 -0
- package/dist/apiDocsOverrides.js +5 -0
- package/dist/apiDocsOverrides.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -7
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +47 -0
- package/dist/bindings/Llama.js +343 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +11 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +145 -0
- package/dist/bindings/getLlama.js +389 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +55 -0
- package/dist/bindings/types.js +77 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +15 -0
- package/dist/bindings/utils/compileLLamaCpp.js +221 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +304 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +45 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +98 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +5 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +13 -11
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/FalconChatWrapper.js +28 -11
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +86 -73
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/{LlamaChatWrapper.d.ts → GemmaChatWrapper.d.ts} +6 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +88 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +20 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +29 -11
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +31 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +129 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +359 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +64 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +214 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +21 -7
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +6 -4
- package/dist/cli/commands/BuildCommand.js +103 -41
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +18 -6
- package/dist/cli/commands/ChatCommand.js +298 -142
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +11 -12
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +29 -0
- package/dist/cli/commands/CompleteCommand.js +365 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +6 -4
- package/dist/cli/commands/DownloadCommand.js +120 -69
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +31 -0
- package/dist/cli/commands/InfillCommand.js +401 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +9 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +117 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +19 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +138 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +342 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
- package/dist/cli/utils/interactivelyAskForModel.js +451 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +71 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +1 -0
- package/dist/commands.js +3 -0
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +38 -5
- package/dist/config.js +61 -16
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +3 -0
- package/dist/consts.js +10 -0
- package/dist/consts.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +37 -35
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.js +298 -221
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.js +5 -3
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +18 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.d.ts +40 -3
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.js +28 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +155 -0
- package/dist/evaluator/LlamaCompletion.js +405 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -20
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +271 -81
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +140 -0
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +8 -5
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +13 -10
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/evaluator/LlamaModel.d.ts +230 -0
- package/dist/evaluator/LlamaModel.js +597 -0
- package/dist/evaluator/LlamaModel.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +22 -0
- package/dist/evaluator/TokenBias.js +33 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +42 -0
- package/dist/gguf/insights/GgufInsights.js +361 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +63 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +33 -0
- package/dist/gguf/readGgufFileInfo.js +66 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +334 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +37 -17
- package/dist/index.js +33 -14
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +4 -0
- package/dist/state.js +14 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +53 -2
- package/dist/types.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +50 -25
- package/dist/utils/LlamaText.js +367 -155
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +1 -1
- package/dist/utils/StopGenerationDetector.js +23 -18
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +8 -4
- package/dist/utils/TokenStreamRegulator.js +78 -8
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/cmake.js +38 -20
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +102 -0
- package/dist/utils/createModelDownloader.js +226 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +10 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +73 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -0
- package/dist/utils/prettyPrintObject.js +84 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -1
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +56 -6
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +12 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +22 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +23 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +12 -9
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +115 -4
- package/llama/addon.cpp +1318 -99
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +11 -1
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llama/llama.cpp.info.json +4 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-arm64/llama-addon.exp +0 -0
- package/llamaBins/win-arm64/llama-addon.lib +0 -0
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +61 -34
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
- package/templates/packed/node_modules.json +1 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -55
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -86
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
- package/dist/llamaEvaluator/LlamaModel.js +0 -322
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -132
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/usedBin.json +0 -3
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
|
@@ -1,11 +1,14 @@
|
|
|
1
|
-
import { DisposeAggregator, EventRelay, withLock, DisposedError } from "lifecycle-utils";
|
|
1
|
+
import { DisposeAggregator, EventRelay, withLock, DisposedError, AsyncDisposeAggregator } from "lifecycle-utils";
|
|
2
2
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
|
-
import { AddonContext } from "../LlamaBins.js";
|
|
4
3
|
import { compareTokens } from "../../utils/compareTokens.js";
|
|
5
|
-
import {
|
|
4
|
+
import { DisposeGuard } from "../../utils/DisposeGuard.js";
|
|
5
|
+
import { TokenMeter } from "../TokenMeter.js";
|
|
6
|
+
import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
|
|
6
7
|
export class LlamaContext {
|
|
8
|
+
/** @internal */ _llama;
|
|
7
9
|
/** @internal */ _ctx;
|
|
8
10
|
/** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
|
|
11
|
+
/** @internal */ _backendContextDisposeGuard;
|
|
9
12
|
/** @internal */ _model;
|
|
10
13
|
/** @internal */ _contextSize;
|
|
11
14
|
/** @internal */ _batchSize;
|
|
@@ -14,7 +17,8 @@ export class LlamaContext {
|
|
|
14
17
|
/** @internal */ _batchingOptions;
|
|
15
18
|
/** @internal */ _queuedDecodeSequenceIds = new Set();
|
|
16
19
|
/** @internal */ _queuedDecodes = [];
|
|
17
|
-
/** @internal */ _disposeAggregator = new
|
|
20
|
+
/** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
|
|
21
|
+
/** @internal */ _modelPreventDisposalHandle;
|
|
18
22
|
/** @internal */ _nextGeneratedSequenceId = 0;
|
|
19
23
|
/** @internal */ _dispatchDecodeScheduled = false;
|
|
20
24
|
/** @internal */ _batchDispatchPending = false;
|
|
@@ -22,44 +26,50 @@ export class LlamaContext {
|
|
|
22
26
|
/** @internal */ _allocatedContextSize;
|
|
23
27
|
/** @internal */ _disposed = false;
|
|
24
28
|
onDispose = new EventRelay();
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
*/
|
|
28
|
-
constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, logitsAll, embedding, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {} }) {
|
|
29
|
-
if (model.disposed)
|
|
29
|
+
constructor({ _model }, { sequences, seed = null, contextSize, batchSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, _embeddings, _noSeed }) {
|
|
30
|
+
if (_model.disposed)
|
|
30
31
|
throw new DisposedError();
|
|
31
|
-
this.
|
|
32
|
+
this._llama = _model._llama;
|
|
33
|
+
this._model = _model;
|
|
34
|
+
this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
|
|
35
|
+
this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
|
|
32
36
|
this._totalSequences = Math.max(1, Math.floor(sequences));
|
|
33
37
|
this._contextSize = Math.max(2, contextSize);
|
|
34
38
|
this._batchSize = Math.max(batchSize, this._totalSequences);
|
|
35
|
-
this._ctx = new AddonContext(this._model._model, removeNullFields({
|
|
39
|
+
this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
|
|
36
40
|
seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
|
|
37
|
-
contextSize:
|
|
41
|
+
contextSize: this._contextSize * this._totalSequences,
|
|
38
42
|
batchSize: this._batchSize,
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
43
|
+
sequences: this._totalSequences,
|
|
44
|
+
threads: Math.max(0, Math.floor(threads)),
|
|
45
|
+
embeddings: _embeddings,
|
|
46
|
+
noSeed: _noSeed
|
|
42
47
|
}));
|
|
43
48
|
this._batchingOptions = {
|
|
44
49
|
dispatchSchedule: batchingDispatchSchedule,
|
|
45
|
-
|
|
50
|
+
itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
|
|
46
51
|
};
|
|
47
52
|
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
48
|
-
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
|
|
49
|
-
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
50
53
|
this._disposeAggregator.add(() => {
|
|
51
|
-
this.
|
|
54
|
+
this._disposed = true;
|
|
52
55
|
});
|
|
56
|
+
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
|
|
57
|
+
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
53
58
|
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
59
|
+
this._disposeAggregator.add(async () => {
|
|
60
|
+
await this._backendContextDisposeGuard.acquireDisposeLock();
|
|
61
|
+
await this._ctx.dispose();
|
|
62
|
+
this._modelPreventDisposalHandle.dispose();
|
|
63
|
+
});
|
|
54
64
|
}
|
|
55
|
-
dispose() {
|
|
65
|
+
async dispose() {
|
|
56
66
|
if (this._disposed)
|
|
57
67
|
return;
|
|
58
68
|
this._disposed = true;
|
|
59
|
-
this._disposeAggregator.dispose();
|
|
69
|
+
await this._disposeAggregator.dispose();
|
|
60
70
|
}
|
|
61
71
|
/** @hidden */
|
|
62
|
-
[Symbol.
|
|
72
|
+
[Symbol.asyncDispose]() {
|
|
63
73
|
return this.dispose();
|
|
64
74
|
}
|
|
65
75
|
get disposed() {
|
|
@@ -74,6 +84,14 @@ export class LlamaContext {
|
|
|
74
84
|
get batchSize() {
|
|
75
85
|
return this._batchSize;
|
|
76
86
|
}
|
|
87
|
+
/**
|
|
88
|
+
* The actual size of the state in the memory in bytes.
|
|
89
|
+
* This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
|
|
90
|
+
*/
|
|
91
|
+
get stateSize() {
|
|
92
|
+
this._ensureNotDisposed();
|
|
93
|
+
return this._ctx.getStateSize();
|
|
94
|
+
}
|
|
77
95
|
getAllocatedContextSize() {
|
|
78
96
|
this._ensureNotDisposed();
|
|
79
97
|
if (this._allocatedContextSize == null)
|
|
@@ -89,9 +107,9 @@ export class LlamaContext {
|
|
|
89
107
|
/**
|
|
90
108
|
* Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
|
|
91
109
|
* When there are no sequences left, this method will throw an error.
|
|
92
|
-
* @param [options]
|
|
93
110
|
*/
|
|
94
|
-
getSequence(
|
|
111
|
+
getSequence(options = {}) {
|
|
112
|
+
const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
|
|
95
113
|
this._ensureNotDisposed();
|
|
96
114
|
const nextSequenceId = this._popSequenceId();
|
|
97
115
|
if (nextSequenceId == null)
|
|
@@ -99,6 +117,7 @@ export class LlamaContext {
|
|
|
99
117
|
return LlamaContextSequence._create({
|
|
100
118
|
sequenceId: nextSequenceId,
|
|
101
119
|
context: this,
|
|
120
|
+
tokenMeter: _tokenMeter,
|
|
102
121
|
contextShift: {
|
|
103
122
|
size: contextShiftSize,
|
|
104
123
|
strategy: contextShiftStrategy
|
|
@@ -115,17 +134,18 @@ export class LlamaContext {
|
|
|
115
134
|
this._currentDispatchBatchHandle = {};
|
|
116
135
|
this._dispatchDecodeScheduled = false;
|
|
117
136
|
this._batchDispatchPending = false;
|
|
118
|
-
let
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
137
|
+
let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
138
|
+
const resolvePrioritizationStrategy = () => {
|
|
139
|
+
try {
|
|
140
|
+
this._ensureNotDisposed();
|
|
141
|
+
return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
|
|
142
|
+
}
|
|
143
|
+
catch (err) {
|
|
144
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
145
|
+
}
|
|
146
|
+
return null;
|
|
147
|
+
};
|
|
148
|
+
const getOrderedQueuedDecodes = (prioritizationStrategy) => {
|
|
129
149
|
const batchItemToQueuedDecodeMap = new Map();
|
|
130
150
|
const batchItemsList = [];
|
|
131
151
|
for (const queuedDecode of this._queuedDecodes) {
|
|
@@ -138,42 +158,65 @@ export class LlamaContext {
|
|
|
138
158
|
}
|
|
139
159
|
let prioritizedItems;
|
|
140
160
|
try {
|
|
141
|
-
prioritizedItems =
|
|
161
|
+
prioritizedItems = prioritizationStrategy({
|
|
142
162
|
items: batchItemsList,
|
|
143
163
|
size: this._batchSize
|
|
144
164
|
});
|
|
145
165
|
}
|
|
146
166
|
catch (err) {
|
|
147
167
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
148
|
-
return;
|
|
168
|
+
return null;
|
|
149
169
|
}
|
|
150
|
-
|
|
151
|
-
const afterDecodeActions = [];
|
|
152
|
-
const queuedDecodesToDelete = new Set();
|
|
153
|
-
const currentQueuedDecodeItems = new Set();
|
|
154
|
-
const currentBatchItems = [];
|
|
155
|
-
let currentBatchSize = 0;
|
|
156
|
-
for (const prioritizedItem of prioritizedItems) {
|
|
170
|
+
return prioritizedItems.map((prioritizedItem) => {
|
|
157
171
|
const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
|
|
158
172
|
if (queuedDecode == null)
|
|
159
173
|
throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
|
|
160
174
|
"of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
|
|
161
|
-
|
|
162
|
-
|
|
175
|
+
return {
|
|
176
|
+
queuedDecode,
|
|
177
|
+
processAmount: prioritizedItem.processAmount
|
|
178
|
+
};
|
|
179
|
+
});
|
|
180
|
+
};
|
|
181
|
+
const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
|
|
182
|
+
const currentBatchItems = [];
|
|
183
|
+
let currentBatchSize = 0;
|
|
184
|
+
let batchTokenSlotsLeft = batchSize;
|
|
185
|
+
for (const { queuedDecode, processAmount } of queuedDecodes) {
|
|
186
|
+
const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
|
|
187
|
+
if (resolvedProcessAmount <= 0) {
|
|
188
|
+
if (batchTokenSlotsLeft === 0)
|
|
189
|
+
break;
|
|
163
190
|
continue;
|
|
164
|
-
|
|
191
|
+
}
|
|
192
|
+
batchTokenSlotsLeft -= resolvedProcessAmount;
|
|
193
|
+
currentBatchSize += resolvedProcessAmount;
|
|
165
194
|
currentBatchItems.push({
|
|
166
195
|
queuedDecode,
|
|
167
|
-
processAmount
|
|
196
|
+
processAmount: resolvedProcessAmount
|
|
168
197
|
});
|
|
169
|
-
currentBatchSize += processAmount;
|
|
170
198
|
}
|
|
199
|
+
return {
|
|
200
|
+
currentBatchItems,
|
|
201
|
+
currentBatchSize
|
|
202
|
+
};
|
|
203
|
+
};
|
|
204
|
+
const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
|
|
205
|
+
const afterDecodeActions = [];
|
|
206
|
+
const queuedDecodesToDelete = new Set();
|
|
207
|
+
const currentQueuedDecodeItems = new Set();
|
|
171
208
|
if (currentBatchSize !== 0)
|
|
172
209
|
this._ctx.initBatch(currentBatchSize);
|
|
173
|
-
for (const { queuedDecode, processAmount } of
|
|
210
|
+
for (const { queuedDecode, processAmount } of batchItems) {
|
|
174
211
|
let batchLogitIndex;
|
|
175
212
|
try {
|
|
176
|
-
|
|
213
|
+
const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
|
|
214
|
+
processAmount === queuedDecode.tokens.length;
|
|
215
|
+
const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
|
|
216
|
+
const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
|
|
217
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
|
|
218
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
|
|
219
|
+
batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
|
|
177
220
|
}
|
|
178
221
|
catch (err) {
|
|
179
222
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
|
|
@@ -192,8 +235,6 @@ export class LlamaContext {
|
|
|
192
235
|
queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
|
|
193
236
|
queuedDecode.firstTokenSequenceIndex += processAmount;
|
|
194
237
|
}
|
|
195
|
-
if (batchTokenSlotsLeft === 0)
|
|
196
|
-
break;
|
|
197
238
|
}
|
|
198
239
|
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
199
240
|
const queuedDecode = this._queuedDecodes[i];
|
|
@@ -203,7 +244,6 @@ export class LlamaContext {
|
|
|
203
244
|
i--;
|
|
204
245
|
}
|
|
205
246
|
}
|
|
206
|
-
shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
|
|
207
247
|
try {
|
|
208
248
|
if (currentBatchSize !== 0)
|
|
209
249
|
await this._ctx.decodeBatch();
|
|
@@ -224,11 +264,45 @@ export class LlamaContext {
|
|
|
224
264
|
}
|
|
225
265
|
accept(undefined);
|
|
226
266
|
}
|
|
267
|
+
};
|
|
268
|
+
const prioritizationStrategy = resolvePrioritizationStrategy();
|
|
269
|
+
if (prioritizationStrategy == null)
|
|
270
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
271
|
+
while (shouldHaveAnotherLoop) {
|
|
272
|
+
const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
|
|
273
|
+
if (orderedQueuedDecodes == null)
|
|
274
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
275
|
+
const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
|
|
276
|
+
let preventDisposalHandle;
|
|
277
|
+
try {
|
|
278
|
+
preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
|
|
279
|
+
}
|
|
280
|
+
catch (err) {
|
|
281
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
try {
|
|
285
|
+
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
|
|
286
|
+
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
287
|
+
}
|
|
288
|
+
finally {
|
|
289
|
+
preventDisposalHandle.dispose();
|
|
290
|
+
}
|
|
227
291
|
}
|
|
228
292
|
});
|
|
229
293
|
}
|
|
294
|
+
/**
|
|
295
|
+
* Print the timings of token evaluation since that last print for this context.
|
|
296
|
+
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
297
|
+
* it won't print anything.
|
|
298
|
+
*/
|
|
299
|
+
async printTimings() {
|
|
300
|
+
this._ensureNotDisposed();
|
|
301
|
+
this._ctx.printTimings();
|
|
302
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
|
|
303
|
+
}
|
|
230
304
|
/** @internal */
|
|
231
|
-
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
|
|
305
|
+
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
|
|
232
306
|
return await new Promise((accept, reject) => {
|
|
233
307
|
this._queuedDecodes.push({
|
|
234
308
|
sequenceId,
|
|
@@ -236,6 +310,7 @@ export class LlamaContext {
|
|
|
236
310
|
firstTokenSequenceIndex,
|
|
237
311
|
generateLogitAtTheEnd,
|
|
238
312
|
evaluationPriority,
|
|
313
|
+
tokenMeter,
|
|
239
314
|
response: [accept, reject],
|
|
240
315
|
onDone
|
|
241
316
|
});
|
|
@@ -248,6 +323,8 @@ export class LlamaContext {
|
|
|
248
323
|
if (this._disposed)
|
|
249
324
|
return;
|
|
250
325
|
void withLock(this, "context", async () => {
|
|
326
|
+
if (this._disposed)
|
|
327
|
+
return;
|
|
251
328
|
this._ctx.disposeSequence(sequenceId);
|
|
252
329
|
this._unusedSequenceIds.push(sequenceId);
|
|
253
330
|
this._onReclaimUnusedSequenceId.dispatchEvent();
|
|
@@ -258,6 +335,10 @@ export class LlamaContext {
|
|
|
258
335
|
this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
|
|
259
336
|
}
|
|
260
337
|
/** @internal */
|
|
338
|
+
_canBeNextTokenForGrammarEvaluationState(grammarEvaluationState, token) {
|
|
339
|
+
return this._ctx.canBeNextTokenForGrammarEvaluationState(grammarEvaluationState._state, token);
|
|
340
|
+
}
|
|
341
|
+
/** @internal */
|
|
261
342
|
_popSequenceId() {
|
|
262
343
|
if (this._unusedSequenceIds.length > 0)
|
|
263
344
|
return this._unusedSequenceIds.shift();
|
|
@@ -307,20 +388,63 @@ export class LlamaContext {
|
|
|
307
388
|
if (this._disposed)
|
|
308
389
|
throw new DisposedError();
|
|
309
390
|
}
|
|
391
|
+
/** @internal */
|
|
392
|
+
static async _create(options, { _model }) {
|
|
393
|
+
const sequences = options.sequences ?? getDefaultContextSequences();
|
|
394
|
+
const contextSize = _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
|
|
395
|
+
batchSize: options.batchSize,
|
|
396
|
+
sequences: sequences,
|
|
397
|
+
modelGpuLayers: _model.gpuLayers,
|
|
398
|
+
modelTrainContextSize: _model.trainContextSize,
|
|
399
|
+
getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
|
|
400
|
+
llamaGpu: _model._llama.gpu,
|
|
401
|
+
ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
|
|
402
|
+
isEmbeddingContext: options._embeddings
|
|
403
|
+
});
|
|
404
|
+
const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
|
|
405
|
+
const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
|
|
406
|
+
contextSize,
|
|
407
|
+
sequences,
|
|
408
|
+
isEmbeddingContext: options._embeddings,
|
|
409
|
+
modelGpuLayers: _model.gpuLayers,
|
|
410
|
+
batchSize
|
|
411
|
+
}).gpuVram;
|
|
412
|
+
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences });
|
|
413
|
+
const { createSignal } = options;
|
|
414
|
+
const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
|
|
415
|
+
? null
|
|
416
|
+
: _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
|
|
417
|
+
try {
|
|
418
|
+
const contextLoaded = await context._ctx.init();
|
|
419
|
+
if (createSignal?.aborted) {
|
|
420
|
+
if (contextLoaded)
|
|
421
|
+
await context._ctx.dispose();
|
|
422
|
+
throw createSignal.reason;
|
|
423
|
+
}
|
|
424
|
+
else if (!contextLoaded)
|
|
425
|
+
throw new Error("Failed to create context");
|
|
426
|
+
return context;
|
|
427
|
+
}
|
|
428
|
+
finally {
|
|
429
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
430
|
+
}
|
|
431
|
+
}
|
|
310
432
|
}
|
|
311
433
|
export class LlamaContextSequence {
|
|
312
434
|
/** @internal */ _sequenceId;
|
|
313
435
|
/** @internal */ _gcRegistry;
|
|
314
436
|
/** @internal */ _context;
|
|
315
437
|
/** @internal */ _contextShift;
|
|
438
|
+
/** @internal */ _tokenMeter;
|
|
316
439
|
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
317
440
|
/** @internal */ _contextTokens = [];
|
|
318
441
|
/** @internal */ _nextTokenIndex = 0;
|
|
319
442
|
/** @internal */ _disposed = false;
|
|
320
443
|
onDispose = new EventRelay();
|
|
321
|
-
constructor({ sequenceId, context, contextShift }) {
|
|
444
|
+
constructor({ sequenceId, context, tokenMeter, contextShift }) {
|
|
322
445
|
this._sequenceId = sequenceId;
|
|
323
446
|
this._context = context;
|
|
447
|
+
this._tokenMeter = tokenMeter ?? new TokenMeter();
|
|
324
448
|
this._contextShift = contextShift;
|
|
325
449
|
this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
|
|
326
450
|
this._gcRegistry.register(this, sequenceId);
|
|
@@ -357,6 +481,9 @@ export class LlamaContextSequence {
|
|
|
357
481
|
get contextTokens() {
|
|
358
482
|
return this._contextTokens.slice();
|
|
359
483
|
}
|
|
484
|
+
get tokenMeter() {
|
|
485
|
+
return this._tokenMeter;
|
|
486
|
+
}
|
|
360
487
|
get isLoadedToMemory() {
|
|
361
488
|
return !this._disposed;
|
|
362
489
|
}
|
|
@@ -382,7 +509,7 @@ export class LlamaContextSequence {
|
|
|
382
509
|
}
|
|
383
510
|
/**
|
|
384
511
|
* Erase context tokens in the provided ranges to free up space for new tokens to be generated.
|
|
385
|
-
*
|
|
512
|
+
* The start of each range is inclusive, and the end of each range is exclusive.
|
|
386
513
|
* For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
|
|
387
514
|
*/
|
|
388
515
|
async eraseContextTokenRanges(ranges) {
|
|
@@ -391,6 +518,8 @@ export class LlamaContextSequence {
|
|
|
391
518
|
this._ensureNotDisposed();
|
|
392
519
|
if (ranges.length === 0)
|
|
393
520
|
return;
|
|
521
|
+
// if the deletion fails, we'll have to dispose the sequence and fill it up again
|
|
522
|
+
let deletionSuccessful = true;
|
|
394
523
|
const resolvedRanges = ranges
|
|
395
524
|
.map(({ start, end }) => {
|
|
396
525
|
if (start === end)
|
|
@@ -420,34 +549,41 @@ export class LlamaContextSequence {
|
|
|
420
549
|
let lastDeleteRangeEndPos = null;
|
|
421
550
|
for (const range of resolvedRanges) {
|
|
422
551
|
this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
|
|
423
|
-
|
|
424
|
-
|
|
552
|
+
if (deletionSuccessful)
|
|
553
|
+
deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
|
|
554
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
|
|
425
555
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
|
|
426
556
|
removedTokens += range.end - range.start;
|
|
427
557
|
lastDeleteRangeEndPos = range.end;
|
|
428
558
|
}
|
|
429
|
-
if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
559
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
430
560
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
|
|
431
561
|
this._nextTokenIndex -= removedTokens;
|
|
562
|
+
if (deletionSuccessful)
|
|
563
|
+
return;
|
|
564
|
+
const newSequenceTokens = this._contextTokens.slice();
|
|
565
|
+
this._nextTokenIndex = 0;
|
|
566
|
+
this._context._ctx.disposeSequence(this._sequenceId);
|
|
567
|
+
await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
|
|
432
568
|
});
|
|
433
569
|
}
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
* @param [options]
|
|
437
|
-
*/
|
|
438
|
-
evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
|
|
570
|
+
evaluate(tokens, options = {}) {
|
|
571
|
+
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
439
572
|
return this._evaluate(tokens, {
|
|
440
573
|
temperature,
|
|
574
|
+
minP,
|
|
441
575
|
topK,
|
|
442
576
|
topP,
|
|
443
577
|
grammarEvaluationState,
|
|
444
578
|
repeatPenalty,
|
|
579
|
+
tokenBias,
|
|
445
580
|
evaluationPriority,
|
|
446
581
|
contextShiftOptions: {
|
|
447
582
|
size: contextShiftSize,
|
|
448
583
|
strategy: contextShiftStrategy
|
|
449
584
|
},
|
|
450
|
-
|
|
585
|
+
yieldEogToken,
|
|
586
|
+
_noSampling
|
|
451
587
|
});
|
|
452
588
|
}
|
|
453
589
|
/**
|
|
@@ -470,24 +606,29 @@ export class LlamaContextSequence {
|
|
|
470
606
|
}
|
|
471
607
|
}
|
|
472
608
|
/** @internal */
|
|
473
|
-
async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions,
|
|
609
|
+
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
474
610
|
this._ensureNotDisposed();
|
|
475
611
|
let evalTokens = tokens;
|
|
476
612
|
if (evalTokens.length === 0)
|
|
477
613
|
return;
|
|
478
|
-
// eslint-disable-next-line no-constant-condition
|
|
479
614
|
while (true) {
|
|
480
615
|
this._ensureNotDisposed();
|
|
481
616
|
// Evaluate to get the next token.
|
|
482
|
-
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
|
|
617
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
|
|
618
|
+
if (_noSampling)
|
|
619
|
+
return null;
|
|
483
620
|
const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
|
|
484
621
|
? repeatPenalty.punishTokens()
|
|
485
622
|
: repeatPenalty?.punishTokens;
|
|
486
623
|
const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
|
|
487
624
|
? grammarEvaluationState()
|
|
488
625
|
: grammarEvaluationState;
|
|
626
|
+
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
|
|
627
|
+
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
|
|
628
|
+
const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
|
|
489
629
|
return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
|
|
490
630
|
temperature,
|
|
631
|
+
minP,
|
|
491
632
|
topK,
|
|
492
633
|
topP,
|
|
493
634
|
repeatPenalty: repeatPenalty?.penalty,
|
|
@@ -496,30 +637,35 @@ export class LlamaContextSequence {
|
|
|
496
637
|
: undefined,
|
|
497
638
|
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
498
639
|
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
640
|
+
tokenBiasKeys,
|
|
641
|
+
tokenBiasValues,
|
|
499
642
|
grammarEvaluationState: resolvedGrammarEvaluationState?._state
|
|
500
643
|
}));
|
|
501
644
|
});
|
|
502
645
|
if (nextToken == null)
|
|
503
646
|
return;
|
|
504
647
|
// the model finished generating text
|
|
505
|
-
if (!
|
|
648
|
+
if (!yieldEogToken && this._context.model.isEogToken(nextToken))
|
|
506
649
|
break;
|
|
507
|
-
yield nextToken;
|
|
508
|
-
//
|
|
509
|
-
|
|
650
|
+
const replacementToken = (yield nextToken);
|
|
651
|
+
// set the tokens for the next evaluation
|
|
652
|
+
if (replacementToken != null)
|
|
653
|
+
evalTokens = [replacementToken];
|
|
654
|
+
else
|
|
655
|
+
evalTokens = [nextToken];
|
|
510
656
|
}
|
|
511
657
|
}
|
|
512
658
|
/** @internal */
|
|
513
|
-
async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
|
|
659
|
+
async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
|
|
514
660
|
this._ensureNotDisposed();
|
|
515
661
|
const tokensLeftToDecode = tokens.slice();
|
|
516
662
|
return await withLock(this, "evaluate", async () => {
|
|
517
663
|
while (tokensLeftToDecode.length > 0) {
|
|
518
664
|
this._ensureNotDisposed();
|
|
519
|
-
let freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
665
|
+
let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
520
666
|
if (freeSpace <= 1) {
|
|
521
667
|
await this._freeUpSpaceForTokens(contextShiftOptions);
|
|
522
|
-
freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
668
|
+
freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
523
669
|
if (freeSpace <= 1)
|
|
524
670
|
throw new Error("Failed to free up space for new tokens");
|
|
525
671
|
}
|
|
@@ -530,7 +676,8 @@ export class LlamaContextSequence {
|
|
|
530
676
|
tokens: tokensToDecode,
|
|
531
677
|
firstTokenSequenceIndex: this._nextTokenIndex,
|
|
532
678
|
generateLogitAtTheEnd,
|
|
533
|
-
evaluationPriority
|
|
679
|
+
evaluationPriority,
|
|
680
|
+
tokenMeter
|
|
534
681
|
}, !generateLogitAtTheEnd
|
|
535
682
|
? undefined
|
|
536
683
|
: onDecodeDone);
|
|
@@ -550,7 +697,11 @@ export class LlamaContextSequence {
|
|
|
550
697
|
: contextShiftOptions.size));
|
|
551
698
|
this._ensureNotDisposed();
|
|
552
699
|
if (contextShiftOptions.strategy === "eraseBeginning") {
|
|
553
|
-
|
|
700
|
+
let eraseStartIndex = 0;
|
|
701
|
+
if (this.model.tokens.shouldPrependBosToken && this.model.tokens.bos != null &&
|
|
702
|
+
this._contextTokens[0] === this.model.tokens.bos)
|
|
703
|
+
eraseStartIndex = 1;
|
|
704
|
+
await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size }]);
|
|
554
705
|
}
|
|
555
706
|
else {
|
|
556
707
|
const ranges = await contextShiftOptions.strategy({
|
|
@@ -560,7 +711,7 @@ export class LlamaContextSequence {
|
|
|
560
711
|
if (ranges == null)
|
|
561
712
|
throw new Error("Invalid delete ranges");
|
|
562
713
|
await this.eraseContextTokenRanges(ranges);
|
|
563
|
-
if (this.nextTokenIndex >= this._context.contextSize)
|
|
714
|
+
if (this.nextTokenIndex >= this._context.contextSize - 1)
|
|
564
715
|
await this.eraseContextTokenRanges([{ start: 0, end: size }]);
|
|
565
716
|
}
|
|
566
717
|
}
|
|
@@ -573,10 +724,11 @@ export class LlamaContextSequence {
|
|
|
573
724
|
* We need this to make it impossible to manually create instances of this class outside the code of this library
|
|
574
725
|
* @internal
|
|
575
726
|
*/
|
|
576
|
-
static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
727
|
+
static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
577
728
|
return new LlamaContextSequence({
|
|
578
729
|
sequenceId,
|
|
579
730
|
context,
|
|
731
|
+
tokenMeter,
|
|
580
732
|
contextShift: {
|
|
581
733
|
size: contextShiftSize,
|
|
582
734
|
strategy: contextShiftStrategy
|
|
@@ -584,14 +736,52 @@ export class LlamaContextSequence {
|
|
|
584
736
|
});
|
|
585
737
|
}
|
|
586
738
|
}
|
|
739
|
+
function getTokenBiasesForAddon(tokenBias, currentModel) {
|
|
740
|
+
if (tokenBias == null)
|
|
741
|
+
return {
|
|
742
|
+
tokenBiasKeys: undefined,
|
|
743
|
+
tokenBiasValues: undefined
|
|
744
|
+
};
|
|
745
|
+
if (tokenBias instanceof Function)
|
|
746
|
+
tokenBias = tokenBias();
|
|
747
|
+
if (tokenBias._model !== currentModel)
|
|
748
|
+
throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
|
|
749
|
+
"Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
|
|
750
|
+
const tokenBiasKeys = [];
|
|
751
|
+
const tokenBiasValues = [];
|
|
752
|
+
for (const [token, bias] of tokenBias._biases) {
|
|
753
|
+
tokenBiasKeys.push(token);
|
|
754
|
+
tokenBiasValues.push(bias);
|
|
755
|
+
}
|
|
756
|
+
if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
|
|
757
|
+
return {
|
|
758
|
+
tokenBiasKeys: undefined,
|
|
759
|
+
tokenBiasValues: undefined
|
|
760
|
+
};
|
|
761
|
+
}
|
|
762
|
+
return {
|
|
763
|
+
tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
|
|
764
|
+
tokenBiasValues: Float32Array.from(tokenBiasValues)
|
|
765
|
+
};
|
|
766
|
+
}
|
|
587
767
|
function disposeContextIfReferenced(contextRef) {
|
|
588
768
|
const context = contextRef.deref();
|
|
589
769
|
if (context != null)
|
|
590
|
-
context.dispose();
|
|
770
|
+
void context.dispose();
|
|
591
771
|
}
|
|
592
772
|
function disposeContextSequenceIfReferenced(contextRef) {
|
|
593
773
|
const context = contextRef.deref();
|
|
594
774
|
if (context != null)
|
|
595
775
|
context.dispose();
|
|
596
776
|
}
|
|
777
|
+
export function getDefaultContextBatchSize({ contextSize, sequences }) {
|
|
778
|
+
return Math.min(contextSize * sequences, 512);
|
|
779
|
+
}
|
|
780
|
+
export function getDefaultContextSequences() {
|
|
781
|
+
return 1;
|
|
782
|
+
}
|
|
783
|
+
const defaultFallbackContextSize = 4096;
|
|
784
|
+
export function getDefaultModelContextSize({ trainContextSize }) {
|
|
785
|
+
return trainContextSize ?? defaultFallbackContextSize;
|
|
786
|
+
}
|
|
597
787
|
//# sourceMappingURL=LlamaContext.js.map
|