node-llama-cpp 3.0.0-beta.4 → 3.0.0-beta.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -11
- package/bins/_linux-x64-cuda.moved.txt +1 -0
- package/bins/_win-x64-cuda.moved.txt +1 -0
- package/bins/linux-arm64/_nlcBuildMetadata.json +1 -0
- package/bins/linux-arm64/libggml.so +0 -0
- package/bins/linux-arm64/libllama.so +0 -0
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -0
- package/bins/linux-armv7l/libggml.so +0 -0
- package/bins/linux-armv7l/libllama.so +0 -0
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +1 -0
- package/bins/linux-x64/libggml.so +0 -0
- package/bins/linux-x64/libllama.so +0 -0
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/bins/linux-x64-vulkan/libggml.so +0 -0
- package/bins/linux-x64-vulkan/libllama.so +0 -0
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
- package/bins/mac-arm64-metal/ggml-common.h +1833 -0
- package/bins/mac-arm64-metal/ggml-metal.metal +6168 -0
- package/bins/mac-arm64-metal/libggml.dylib +0 -0
- package/bins/mac-arm64-metal/libllama.dylib +0 -0
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +1 -0
- package/bins/mac-x64/libggml.dylib +0 -0
- package/bins/mac-x64/libllama.dylib +0 -0
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/bins/win-arm64/ggml.dll +0 -0
- package/bins/win-arm64/llama-addon.exp +0 -0
- package/bins/win-arm64/llama-addon.lib +0 -0
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-arm64/llama.dll +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +1 -0
- package/bins/win-x64/ggml.dll +0 -0
- package/bins/win-x64/llama-addon.exp +0 -0
- package/bins/win-x64/llama-addon.lib +0 -0
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64/llama.dll +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/bins/win-x64-vulkan/ggml.dll +0 -0
- package/bins/win-x64-vulkan/llama-addon.exp +0 -0
- package/bins/win-x64-vulkan/llama-addon.lib +0 -0
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/llama.dll +0 -0
- package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
- package/dist/ChatWrapper.d.ts +9 -39
- package/dist/ChatWrapper.js +129 -72
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsIndex.d.ts +1 -0
- package/dist/apiDocsIndex.js +7 -0
- package/dist/apiDocsIndex.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +66 -9
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +47 -0
- package/dist/bindings/Llama.js +356 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +11 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +152 -0
- package/dist/bindings/getLlama.js +403 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +57 -0
- package/dist/bindings/types.js +77 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +21 -0
- package/dist/bindings/utils/compileLLamaCpp.js +288 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +305 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +10 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +13 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +100 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +6 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +2 -9
- package/dist/chatWrappers/ChatMLChatWrapper.js +23 -21
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/FalconChatWrapper.js +38 -21
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +8 -32
- package/dist/chatWrappers/FunctionaryChatWrapper.js +326 -118
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +4 -10
- package/dist/chatWrappers/GeneralChatWrapper.js +45 -22
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +38 -20
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +31 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +223 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +76 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +371 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +54 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +23 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +57 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +119 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +210 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
- package/dist/chatWrappers/utils/jsonDumps.js +18 -0
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +71 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +289 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +21 -7
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +11 -4
- package/dist/cli/commands/BuildCommand.js +114 -41
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +19 -7
- package/dist/cli/commands/ChatCommand.js +306 -150
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +11 -12
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +30 -0
- package/dist/cli/commands/CompleteCommand.js +374 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +7 -4
- package/dist/cli/commands/DownloadCommand.js +121 -70
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +32 -0
- package/dist/cli/commands/InfillCommand.js +410 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +9 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +117 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +19 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +180 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +18 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +626 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +376 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +8 -0
- package/dist/cli/utils/interactivelyAskForModel.js +461 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +79 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +5 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +72 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +1 -0
- package/dist/commands.js +3 -0
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +38 -5
- package/dist/config.js +61 -16
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +4 -0
- package/dist/consts.js +11 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +270 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +1544 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +42 -16
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +310 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +425 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +39 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +186 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +154 -0
- package/dist/evaluator/LlamaCompletion.js +424 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +42 -22
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +338 -81
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +175 -0
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/evaluator/LlamaGrammar.d.ts +34 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +15 -12
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +242 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +765 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +22 -0
- package/dist/evaluator/TokenBias.js +33 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +4 -0
- package/dist/gguf/consts.js +12 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +37 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +109 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +18 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +23 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +79 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +50 -0
- package/dist/gguf/insights/GgufInsights.js +401 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +90 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +144 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +19 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +78 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +15 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +183 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
- package/dist/gguf/parser/GgufV2Parser.js +156 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +61 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +33 -0
- package/dist/gguf/readGgufFileInfo.js +66 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +356 -0
- package/dist/gguf/types/GgufMetadataTypes.js +99 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +74 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +43 -18
- package/dist/index.js +38 -15
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +4 -0
- package/dist/state.js +14 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +130 -5
- package/dist/types.js.map +1 -1
- package/dist/utils/DeepPartialObject.d.ts +3 -0
- package/dist/utils/DeepPartialObject.js +2 -0
- package/dist/utils/DeepPartialObject.js.map +1 -0
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +70 -26
- package/dist/utils/LlamaText.js +472 -157
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +12 -0
- package/dist/utils/LruCache.js +44 -0
- package/dist/utils/LruCache.js.map +1 -0
- package/dist/utils/ReplHistory.js +5 -1
- package/dist/utils/ReplHistory.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +27 -8
- package/dist/utils/StopGenerationDetector.js +108 -22
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +10 -4
- package/dist/utils/TokenStreamRegulator.js +102 -10
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/clearTempFolder.js.map +1 -1
- package/dist/utils/cmake.js +38 -20
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +111 -0
- package/dist/utils/createModelDownloader.js +273 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +23 -12
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/types.d.ts +3 -0
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +2 -2
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +10 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +73 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -0
- package/dist/utils/prettyPrintObject.js +84 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/pushAll.d.ts +6 -0
- package/dist/utils/pushAll.js +11 -0
- package/dist/utils/pushAll.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -1
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/resolveLastTokens.d.ts +2 -0
- package/dist/utils/resolveLastTokens.js +12 -0
- package/dist/utils/resolveLastTokens.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/safeEventCallback.d.ts +6 -0
- package/dist/utils/safeEventCallback.js +29 -0
- package/dist/utils/safeEventCallback.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +56 -6
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +12 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/transformPromisable.d.ts +40 -0
- package/dist/utils/transformPromisable.js +64 -0
- package/dist/utils/transformPromisable.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +2 -0
- package/dist/utils/truncateTextAndRoundToWords.js +30 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +22 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +23 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +12 -9
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/dist/utils/wrapAbortSignal.d.ts +2 -0
- package/dist/utils/wrapAbortSignal.js +9 -0
- package/dist/utils/wrapAbortSignal.js.map +1 -0
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +126 -5
- package/llama/addon/AddonContext.cpp +772 -0
- package/llama/addon/AddonContext.h +53 -0
- package/llama/addon/AddonGrammar.cpp +44 -0
- package/llama/addon/AddonGrammar.h +18 -0
- package/llama/addon/AddonGrammarEvaluationState.cpp +28 -0
- package/llama/addon/AddonGrammarEvaluationState.h +15 -0
- package/llama/addon/AddonModel.cpp +681 -0
- package/llama/addon/AddonModel.h +61 -0
- package/llama/addon/AddonModelData.cpp +25 -0
- package/llama/addon/AddonModelData.h +15 -0
- package/llama/addon/AddonModelLora.cpp +107 -0
- package/llama/addon/AddonModelLora.h +28 -0
- package/llama/addon/addon.cpp +216 -0
- package/llama/addon/addonGlobals.cpp +22 -0
- package/llama/addon/addonGlobals.h +12 -0
- package/llama/addon/globals/addonLog.cpp +135 -0
- package/llama/addon/globals/addonLog.h +21 -0
- package/llama/addon/globals/addonProgress.cpp +15 -0
- package/llama/addon/globals/addonProgress.h +15 -0
- package/llama/addon/globals/getGpuInfo.cpp +108 -0
- package/llama/addon/globals/getGpuInfo.h +6 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +297 -6
- package/llama/grammars/json.gbnf +4 -4
- package/llama/grammars/json_arr.gbnf +4 -4
- package/llama/llama.cpp.info.json +4 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/package.json +85 -54
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +0 -13
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -57
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.d.ts +0 -175
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +0 -704
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +0 -21
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +0 -120
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +0 -146
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +0 -211
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -82
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaEmbeddingContext.d.ts +0 -35
- package/dist/llamaEvaluator/LlamaEmbeddingContext.js +0 -73
- package/dist/llamaEvaluator/LlamaEmbeddingContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -28
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
- package/dist/llamaEvaluator/LlamaModel.js +0 -322
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -132
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/addon.cpp +0 -814
- package/llama/usedBin.json +0 -3
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
|
@@ -1,20 +1,28 @@
|
|
|
1
|
-
import { DisposeAggregator, EventRelay, withLock
|
|
1
|
+
import { AsyncDisposeAggregator, DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
|
2
2
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
3
|
-
import { AddonContext } from "../LlamaBins.js";
|
|
4
3
|
import { compareTokens } from "../../utils/compareTokens.js";
|
|
5
|
-
import {
|
|
4
|
+
import { DisposeGuard } from "../../utils/DisposeGuard.js";
|
|
5
|
+
import { TokenMeter } from "../TokenMeter.js";
|
|
6
|
+
import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
|
|
7
|
+
const defaultLoraScale = 1;
|
|
6
8
|
export class LlamaContext {
|
|
9
|
+
/** @internal */ _llama;
|
|
7
10
|
/** @internal */ _ctx;
|
|
8
11
|
/** @internal */ _onReclaimUnusedSequenceId = new EventRelay();
|
|
12
|
+
/** @internal */ _backendContextDisposeGuard;
|
|
9
13
|
/** @internal */ _model;
|
|
10
14
|
/** @internal */ _contextSize;
|
|
11
15
|
/** @internal */ _batchSize;
|
|
16
|
+
/** @internal */ _flashAttention;
|
|
12
17
|
/** @internal */ _totalSequences;
|
|
13
18
|
/** @internal */ _unusedSequenceIds = [];
|
|
14
19
|
/** @internal */ _batchingOptions;
|
|
15
20
|
/** @internal */ _queuedDecodeSequenceIds = new Set();
|
|
16
21
|
/** @internal */ _queuedDecodes = [];
|
|
17
|
-
/** @internal */ _disposeAggregator = new
|
|
22
|
+
/** @internal */ _disposeAggregator = new AsyncDisposeAggregator();
|
|
23
|
+
/** @internal */ _modelPreventDisposalHandle;
|
|
24
|
+
/** @internal */ _loraAdapters = new Set();
|
|
25
|
+
/** @internal */ _gcRegistry;
|
|
18
26
|
/** @internal */ _nextGeneratedSequenceId = 0;
|
|
19
27
|
/** @internal */ _dispatchDecodeScheduled = false;
|
|
20
28
|
/** @internal */ _batchDispatchPending = false;
|
|
@@ -22,44 +30,62 @@ export class LlamaContext {
|
|
|
22
30
|
/** @internal */ _allocatedContextSize;
|
|
23
31
|
/** @internal */ _disposed = false;
|
|
24
32
|
onDispose = new EventRelay();
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
*/
|
|
28
|
-
constructor({ model, sequences = 1, seed = null, contextSize = model.trainContextSize, batchSize = contextSize, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy = "maximumParallelism" } = {}, _embedding, _noSeed }) {
|
|
29
|
-
if (model.disposed)
|
|
33
|
+
constructor({ _model }, { sequences, seed = null, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads = 6, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, _embeddings, _noSeed }) {
|
|
34
|
+
if (_model.disposed)
|
|
30
35
|
throw new DisposedError();
|
|
31
|
-
this.
|
|
36
|
+
this._llama = _model._llama;
|
|
37
|
+
this._model = _model;
|
|
38
|
+
this._backendContextDisposeGuard = new DisposeGuard([this._model._backendModelDisposeGuard]);
|
|
39
|
+
this._modelPreventDisposalHandle = this._model._backendModelDisposeGuard.createPreventDisposalHandle();
|
|
32
40
|
this._totalSequences = Math.max(1, Math.floor(sequences));
|
|
33
41
|
this._contextSize = Math.max(2, contextSize);
|
|
34
42
|
this._batchSize = Math.max(batchSize, this._totalSequences);
|
|
35
|
-
this.
|
|
43
|
+
this._flashAttention = flashAttention;
|
|
44
|
+
this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
|
|
36
45
|
seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
|
|
37
|
-
contextSize:
|
|
46
|
+
contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
|
|
38
47
|
batchSize: this._batchSize,
|
|
48
|
+
sequences: this._totalSequences,
|
|
49
|
+
flashAttention: this._flashAttention,
|
|
39
50
|
threads: Math.max(0, Math.floor(threads)),
|
|
40
|
-
|
|
51
|
+
embeddings: _embeddings,
|
|
41
52
|
noSeed: _noSeed
|
|
42
53
|
}));
|
|
43
54
|
this._batchingOptions = {
|
|
44
55
|
dispatchSchedule: batchingDispatchSchedule,
|
|
45
|
-
|
|
56
|
+
itemPrioritizationStrategy: batchingItemsPrioritizationStrategy
|
|
46
57
|
};
|
|
58
|
+
this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
|
|
59
|
+
this._gcRegistry.register(this, this._loraAdapters);
|
|
47
60
|
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
61
|
+
this._disposeAggregator.add(() => {
|
|
62
|
+
this._disposed = true;
|
|
63
|
+
});
|
|
64
|
+
this._disposeAggregator.add(() => this._gcRegistry.unregister(this));
|
|
48
65
|
this._disposeAggregator.add(this._onReclaimUnusedSequenceId);
|
|
49
66
|
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
67
|
+
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
50
68
|
this._disposeAggregator.add(() => {
|
|
51
|
-
this.
|
|
69
|
+
if (this._loraAdapters.size > 0) {
|
|
70
|
+
const loraAdapters = new Set(this._loraAdapters);
|
|
71
|
+
this._loraAdapters.clear();
|
|
72
|
+
return this._model._removeLoraUsage(loraAdapters);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
this._disposeAggregator.add(async () => {
|
|
76
|
+
await this._backendContextDisposeGuard.acquireDisposeLock();
|
|
77
|
+
await this._ctx.dispose();
|
|
78
|
+
this._modelPreventDisposalHandle.dispose();
|
|
52
79
|
});
|
|
53
|
-
this._disposeAggregator.add(this.model.onDispose.createListener(disposeContextIfReferenced.bind(null, new WeakRef(this))));
|
|
54
80
|
}
|
|
55
|
-
dispose() {
|
|
81
|
+
async dispose() {
|
|
56
82
|
if (this._disposed)
|
|
57
83
|
return;
|
|
58
84
|
this._disposed = true;
|
|
59
|
-
this._disposeAggregator.dispose();
|
|
85
|
+
await this._disposeAggregator.dispose();
|
|
60
86
|
}
|
|
61
87
|
/** @hidden */
|
|
62
|
-
[Symbol.
|
|
88
|
+
[Symbol.asyncDispose]() {
|
|
63
89
|
return this.dispose();
|
|
64
90
|
}
|
|
65
91
|
get disposed() {
|
|
@@ -74,6 +100,17 @@ export class LlamaContext {
|
|
|
74
100
|
get batchSize() {
|
|
75
101
|
return this._batchSize;
|
|
76
102
|
}
|
|
103
|
+
get flashAttention() {
|
|
104
|
+
return this._flashAttention;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* The actual size of the state in the memory in bytes.
|
|
108
|
+
* This value is provided by `llama.cpp` and doesn't include all the memory overhead of the context.
|
|
109
|
+
*/
|
|
110
|
+
get stateSize() {
|
|
111
|
+
this._ensureNotDisposed();
|
|
112
|
+
return this._ctx.getStateSize();
|
|
113
|
+
}
|
|
77
114
|
getAllocatedContextSize() {
|
|
78
115
|
this._ensureNotDisposed();
|
|
79
116
|
if (this._allocatedContextSize == null)
|
|
@@ -89,9 +126,9 @@ export class LlamaContext {
|
|
|
89
126
|
/**
|
|
90
127
|
* Before calling this method, make sure to call `sequencesLeft` to check if there are any sequences left.
|
|
91
128
|
* When there are no sequences left, this method will throw an error.
|
|
92
|
-
* @param [options]
|
|
93
129
|
*/
|
|
94
|
-
getSequence(
|
|
130
|
+
getSequence(options = {}) {
|
|
131
|
+
const { contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(this.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {}, _tokenMeter } = options;
|
|
95
132
|
this._ensureNotDisposed();
|
|
96
133
|
const nextSequenceId = this._popSequenceId();
|
|
97
134
|
if (nextSequenceId == null)
|
|
@@ -99,6 +136,7 @@ export class LlamaContext {
|
|
|
99
136
|
return LlamaContextSequence._create({
|
|
100
137
|
sequenceId: nextSequenceId,
|
|
101
138
|
context: this,
|
|
139
|
+
tokenMeter: _tokenMeter,
|
|
102
140
|
contextShift: {
|
|
103
141
|
size: contextShiftSize,
|
|
104
142
|
strategy: contextShiftStrategy
|
|
@@ -115,17 +153,18 @@ export class LlamaContext {
|
|
|
115
153
|
this._currentDispatchBatchHandle = {};
|
|
116
154
|
this._dispatchDecodeScheduled = false;
|
|
117
155
|
this._batchDispatchPending = false;
|
|
118
|
-
let
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
156
|
+
let shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
157
|
+
const resolvePrioritizationStrategy = () => {
|
|
158
|
+
try {
|
|
159
|
+
this._ensureNotDisposed();
|
|
160
|
+
return resolveBatchItemsPrioritizationStrategy(this._batchingOptions.itemPrioritizationStrategy);
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
164
|
+
}
|
|
165
|
+
return null;
|
|
166
|
+
};
|
|
167
|
+
const getOrderedQueuedDecodes = (prioritizationStrategy) => {
|
|
129
168
|
const batchItemToQueuedDecodeMap = new Map();
|
|
130
169
|
const batchItemsList = [];
|
|
131
170
|
for (const queuedDecode of this._queuedDecodes) {
|
|
@@ -138,42 +177,65 @@ export class LlamaContext {
|
|
|
138
177
|
}
|
|
139
178
|
let prioritizedItems;
|
|
140
179
|
try {
|
|
141
|
-
prioritizedItems =
|
|
180
|
+
prioritizedItems = prioritizationStrategy({
|
|
142
181
|
items: batchItemsList,
|
|
143
182
|
size: this._batchSize
|
|
144
183
|
});
|
|
145
184
|
}
|
|
146
185
|
catch (err) {
|
|
147
186
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
148
|
-
return;
|
|
187
|
+
return null;
|
|
149
188
|
}
|
|
150
|
-
|
|
151
|
-
const afterDecodeActions = [];
|
|
152
|
-
const queuedDecodesToDelete = new Set();
|
|
153
|
-
const currentQueuedDecodeItems = new Set();
|
|
154
|
-
const currentBatchItems = [];
|
|
155
|
-
let currentBatchSize = 0;
|
|
156
|
-
for (const prioritizedItem of prioritizedItems) {
|
|
189
|
+
return prioritizedItems.map((prioritizedItem) => {
|
|
157
190
|
const queuedDecode = batchItemToQueuedDecodeMap.get(prioritizedItem.item);
|
|
158
191
|
if (queuedDecode == null)
|
|
159
192
|
throw new Error("Received invalid batch item. Make sure you keep the original object reference " +
|
|
160
193
|
"of the batch item on `item` on `PrioritizedBatchItem` in your custom prioritization strategy");
|
|
161
|
-
|
|
162
|
-
|
|
194
|
+
return {
|
|
195
|
+
queuedDecode,
|
|
196
|
+
processAmount: prioritizedItem.processAmount
|
|
197
|
+
};
|
|
198
|
+
});
|
|
199
|
+
};
|
|
200
|
+
const fitQueuedDecodesToABatch = (queuedDecodes, batchSize) => {
|
|
201
|
+
const currentBatchItems = [];
|
|
202
|
+
let currentBatchSize = 0;
|
|
203
|
+
let batchTokenSlotsLeft = batchSize;
|
|
204
|
+
for (const { queuedDecode, processAmount } of queuedDecodes) {
|
|
205
|
+
const resolvedProcessAmount = Math.min(processAmount <= 0 ? 1 : processAmount, queuedDecode.tokens.length, batchTokenSlotsLeft);
|
|
206
|
+
if (resolvedProcessAmount <= 0) {
|
|
207
|
+
if (batchTokenSlotsLeft === 0)
|
|
208
|
+
break;
|
|
163
209
|
continue;
|
|
164
|
-
|
|
210
|
+
}
|
|
211
|
+
batchTokenSlotsLeft -= resolvedProcessAmount;
|
|
212
|
+
currentBatchSize += resolvedProcessAmount;
|
|
165
213
|
currentBatchItems.push({
|
|
166
214
|
queuedDecode,
|
|
167
|
-
processAmount
|
|
215
|
+
processAmount: resolvedProcessAmount
|
|
168
216
|
});
|
|
169
|
-
currentBatchSize += processAmount;
|
|
170
217
|
}
|
|
218
|
+
return {
|
|
219
|
+
currentBatchItems,
|
|
220
|
+
currentBatchSize
|
|
221
|
+
};
|
|
222
|
+
};
|
|
223
|
+
const decodeTokenBatchItems = async (batchItems, currentBatchSize) => {
|
|
224
|
+
const afterDecodeActions = [];
|
|
225
|
+
const queuedDecodesToDelete = new Set();
|
|
226
|
+
const currentQueuedDecodeItems = new Set();
|
|
171
227
|
if (currentBatchSize !== 0)
|
|
172
228
|
this._ctx.initBatch(currentBatchSize);
|
|
173
|
-
for (const { queuedDecode, processAmount } of
|
|
229
|
+
for (const { queuedDecode, processAmount } of batchItems) {
|
|
174
230
|
let batchLogitIndex;
|
|
175
231
|
try {
|
|
176
|
-
|
|
232
|
+
const shouldGenerateLogitAtTheEnd = queuedDecode.generateLogitAtTheEnd &&
|
|
233
|
+
processAmount === queuedDecode.tokens.length;
|
|
234
|
+
const tokensToProcess = queuedDecode.tokens.slice(0, processAmount);
|
|
235
|
+
const numberOfOutputTokens = shouldGenerateLogitAtTheEnd ? 1 : 0;
|
|
236
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, Math.max(0, tokensToProcess.length - numberOfOutputTokens), "input");
|
|
237
|
+
TokenMeter.useTokens(queuedDecode.tokenMeter, numberOfOutputTokens, "output");
|
|
238
|
+
batchLogitIndex = this._ctx.addToBatch(queuedDecode.sequenceId, queuedDecode.firstTokenSequenceIndex, Uint32Array.from(tokensToProcess), shouldGenerateLogitAtTheEnd);
|
|
177
239
|
}
|
|
178
240
|
catch (err) {
|
|
179
241
|
this._dispatchErrorForQueuedDecodesAndDequeue(new Set([queuedDecode]), err);
|
|
@@ -192,8 +254,6 @@ export class LlamaContext {
|
|
|
192
254
|
queuedDecode.tokens = queuedDecode.tokens.slice(processAmount);
|
|
193
255
|
queuedDecode.firstTokenSequenceIndex += processAmount;
|
|
194
256
|
}
|
|
195
|
-
if (batchTokenSlotsLeft === 0)
|
|
196
|
-
break;
|
|
197
257
|
}
|
|
198
258
|
for (let i = 0; i < this._queuedDecodes.length; i++) {
|
|
199
259
|
const queuedDecode = this._queuedDecodes[i];
|
|
@@ -203,7 +263,6 @@ export class LlamaContext {
|
|
|
203
263
|
i--;
|
|
204
264
|
}
|
|
205
265
|
}
|
|
206
|
-
shouldHaveAnotherBatch = this._queuedDecodes.length > 0;
|
|
207
266
|
try {
|
|
208
267
|
if (currentBatchSize !== 0)
|
|
209
268
|
await this._ctx.decodeBatch();
|
|
@@ -224,14 +283,45 @@ export class LlamaContext {
|
|
|
224
283
|
}
|
|
225
284
|
accept(undefined);
|
|
226
285
|
}
|
|
286
|
+
};
|
|
287
|
+
const prioritizationStrategy = resolvePrioritizationStrategy();
|
|
288
|
+
if (prioritizationStrategy == null)
|
|
289
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
290
|
+
while (shouldHaveAnotherLoop) {
|
|
291
|
+
const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
|
|
292
|
+
if (orderedQueuedDecodes == null)
|
|
293
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
294
|
+
const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
|
|
295
|
+
let preventDisposalHandle;
|
|
296
|
+
try {
|
|
297
|
+
preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
|
|
298
|
+
}
|
|
299
|
+
catch (err) {
|
|
300
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
try {
|
|
304
|
+
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
|
|
305
|
+
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
306
|
+
}
|
|
307
|
+
finally {
|
|
308
|
+
preventDisposalHandle.dispose();
|
|
309
|
+
}
|
|
227
310
|
}
|
|
228
311
|
});
|
|
229
312
|
}
|
|
230
|
-
|
|
313
|
+
/**
|
|
314
|
+
* Print the timings of token evaluation since that last print for this context.
|
|
315
|
+
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
316
|
+
* it won't print anything.
|
|
317
|
+
*/
|
|
318
|
+
async printTimings() {
|
|
319
|
+
this._ensureNotDisposed();
|
|
231
320
|
this._ctx.printTimings();
|
|
321
|
+
await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
|
|
232
322
|
}
|
|
233
323
|
/** @internal */
|
|
234
|
-
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5 }, onDone) {
|
|
324
|
+
async _decodeTokens({ sequenceId, firstTokenSequenceIndex, tokens, generateLogitAtTheEnd = false, evaluationPriority = 5, tokenMeter }, onDone) {
|
|
235
325
|
return await new Promise((accept, reject) => {
|
|
236
326
|
this._queuedDecodes.push({
|
|
237
327
|
sequenceId,
|
|
@@ -239,6 +329,7 @@ export class LlamaContext {
|
|
|
239
329
|
firstTokenSequenceIndex,
|
|
240
330
|
generateLogitAtTheEnd,
|
|
241
331
|
evaluationPriority,
|
|
332
|
+
tokenMeter,
|
|
242
333
|
response: [accept, reject],
|
|
243
334
|
onDone
|
|
244
335
|
});
|
|
@@ -251,6 +342,8 @@ export class LlamaContext {
|
|
|
251
342
|
if (this._disposed)
|
|
252
343
|
return;
|
|
253
344
|
void withLock(this, "context", async () => {
|
|
345
|
+
if (this._disposed)
|
|
346
|
+
return;
|
|
254
347
|
this._ctx.disposeSequence(sequenceId);
|
|
255
348
|
this._unusedSequenceIds.push(sequenceId);
|
|
256
349
|
this._onReclaimUnusedSequenceId.dispatchEvent();
|
|
@@ -261,6 +354,10 @@ export class LlamaContext {
|
|
|
261
354
|
this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
|
|
262
355
|
}
|
|
263
356
|
/** @internal */
|
|
357
|
+
_canBeNextTokenForGrammarEvaluationState(grammarEvaluationState, token) {
|
|
358
|
+
return this._ctx.canBeNextTokenForGrammarEvaluationState(grammarEvaluationState._state, token);
|
|
359
|
+
}
|
|
360
|
+
/** @internal */
|
|
264
361
|
_popSequenceId() {
|
|
265
362
|
if (this._unusedSequenceIds.length > 0)
|
|
266
363
|
return this._unusedSequenceIds.shift();
|
|
@@ -310,20 +407,115 @@ export class LlamaContext {
|
|
|
310
407
|
if (this._disposed)
|
|
311
408
|
throw new DisposedError();
|
|
312
409
|
}
|
|
410
|
+
/** @internal */
|
|
411
|
+
async _setLora({ filePath, scale }) {
|
|
412
|
+
const lora = await this._model._getOrLoadLora(filePath);
|
|
413
|
+
this._ctx.setLora(lora, scale ?? defaultLoraScale);
|
|
414
|
+
if (!this._loraAdapters.has(lora)) {
|
|
415
|
+
this._loraAdapters.add(lora);
|
|
416
|
+
lora.usages++;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
/** @internal */
|
|
420
|
+
static async _create(options, { _model }) {
|
|
421
|
+
const sequences = options.sequences ?? getDefaultContextSequences();
|
|
422
|
+
const flashAttention = _model.flashAttentionSupported
|
|
423
|
+
? Boolean(options.flashAttention ?? _model.defaultContextFlashAttention)
|
|
424
|
+
: false;
|
|
425
|
+
const loraOptions = typeof options.lora === "string"
|
|
426
|
+
? { adapters: [{ filePath: options.lora }] }
|
|
427
|
+
: options.lora;
|
|
428
|
+
const contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
|
|
429
|
+
batchSize: options.batchSize,
|
|
430
|
+
sequences: sequences,
|
|
431
|
+
modelGpuLayers: _model.gpuLayers,
|
|
432
|
+
modelTrainContextSize: _model.trainContextSize,
|
|
433
|
+
flashAttention,
|
|
434
|
+
getVramState: () => _model._llama._vramOrchestrator.getMemoryState(),
|
|
435
|
+
llamaGpu: _model._llama.gpu,
|
|
436
|
+
ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
|
|
437
|
+
isEmbeddingContext: options._embeddings
|
|
438
|
+
});
|
|
439
|
+
const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
|
|
440
|
+
const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
|
|
441
|
+
contextSize,
|
|
442
|
+
sequences,
|
|
443
|
+
isEmbeddingContext: options._embeddings,
|
|
444
|
+
modelGpuLayers: _model.gpuLayers,
|
|
445
|
+
batchSize,
|
|
446
|
+
flashAttention
|
|
447
|
+
}).gpuVram;
|
|
448
|
+
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
|
|
449
|
+
const { createSignal } = options;
|
|
450
|
+
const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
|
|
451
|
+
? null
|
|
452
|
+
: _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
|
|
453
|
+
try {
|
|
454
|
+
const contextLoaded = await context._ctx.init();
|
|
455
|
+
if (createSignal?.aborted) {
|
|
456
|
+
if (contextLoaded)
|
|
457
|
+
await context._ctx.dispose();
|
|
458
|
+
throw createSignal.reason;
|
|
459
|
+
}
|
|
460
|
+
else if (!contextLoaded)
|
|
461
|
+
throw new Error("Failed to create context");
|
|
462
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
463
|
+
if (loraOptions != null && loraOptions.adapters.length > 0) {
|
|
464
|
+
let loadedAdapters = 0;
|
|
465
|
+
for (const adapter of loraOptions.adapters) {
|
|
466
|
+
try {
|
|
467
|
+
await context._setLora({
|
|
468
|
+
filePath: adapter.filePath,
|
|
469
|
+
scale: adapter.scale
|
|
470
|
+
});
|
|
471
|
+
loadedAdapters++;
|
|
472
|
+
try {
|
|
473
|
+
loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
|
|
474
|
+
}
|
|
475
|
+
catch (err) {
|
|
476
|
+
console.error(err);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
catch (err) {
|
|
480
|
+
await context.dispose();
|
|
481
|
+
throw err;
|
|
482
|
+
}
|
|
483
|
+
if (createSignal?.aborted) {
|
|
484
|
+
await context.dispose();
|
|
485
|
+
throw createSignal.reason;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
else if (loraOptions?.onLoadProgress != null) {
|
|
490
|
+
try {
|
|
491
|
+
loraOptions.onLoadProgress(1);
|
|
492
|
+
}
|
|
493
|
+
catch (err) {
|
|
494
|
+
console.error(err);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return context;
|
|
498
|
+
}
|
|
499
|
+
finally {
|
|
500
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
501
|
+
}
|
|
502
|
+
}
|
|
313
503
|
}
|
|
314
504
|
export class LlamaContextSequence {
|
|
315
505
|
/** @internal */ _sequenceId;
|
|
316
506
|
/** @internal */ _gcRegistry;
|
|
317
507
|
/** @internal */ _context;
|
|
318
508
|
/** @internal */ _contextShift;
|
|
509
|
+
/** @internal */ _tokenMeter;
|
|
319
510
|
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
320
511
|
/** @internal */ _contextTokens = [];
|
|
321
512
|
/** @internal */ _nextTokenIndex = 0;
|
|
322
513
|
/** @internal */ _disposed = false;
|
|
323
514
|
onDispose = new EventRelay();
|
|
324
|
-
constructor({ sequenceId, context, contextShift }) {
|
|
515
|
+
constructor({ sequenceId, context, tokenMeter, contextShift }) {
|
|
325
516
|
this._sequenceId = sequenceId;
|
|
326
517
|
this._context = context;
|
|
518
|
+
this._tokenMeter = tokenMeter ?? new TokenMeter();
|
|
327
519
|
this._contextShift = contextShift;
|
|
328
520
|
this._gcRegistry = new FinalizationRegistry(this._context._reclaimUnusedSequenceId);
|
|
329
521
|
this._gcRegistry.register(this, sequenceId);
|
|
@@ -360,6 +552,9 @@ export class LlamaContextSequence {
|
|
|
360
552
|
get contextTokens() {
|
|
361
553
|
return this._contextTokens.slice();
|
|
362
554
|
}
|
|
555
|
+
get tokenMeter() {
|
|
556
|
+
return this._tokenMeter;
|
|
557
|
+
}
|
|
363
558
|
get isLoadedToMemory() {
|
|
364
559
|
return !this._disposed;
|
|
365
560
|
}
|
|
@@ -385,7 +580,7 @@ export class LlamaContextSequence {
|
|
|
385
580
|
}
|
|
386
581
|
/**
|
|
387
582
|
* Erase context tokens in the provided ranges to free up space for new tokens to be generated.
|
|
388
|
-
*
|
|
583
|
+
* The start of each range is inclusive, and the end of each range is exclusive.
|
|
389
584
|
* For example, the range `{start: 0, end: 1}` will remove the token at the `0` index only.
|
|
390
585
|
*/
|
|
391
586
|
async eraseContextTokenRanges(ranges) {
|
|
@@ -394,6 +589,8 @@ export class LlamaContextSequence {
|
|
|
394
589
|
this._ensureNotDisposed();
|
|
395
590
|
if (ranges.length === 0)
|
|
396
591
|
return;
|
|
592
|
+
// if the deletion fails, we'll have to dispose the sequence and fill it up again
|
|
593
|
+
let deletionSuccessful = true;
|
|
397
594
|
const resolvedRanges = ranges
|
|
398
595
|
.map(({ start, end }) => {
|
|
399
596
|
if (start === end)
|
|
@@ -423,34 +620,41 @@ export class LlamaContextSequence {
|
|
|
423
620
|
let lastDeleteRangeEndPos = null;
|
|
424
621
|
for (const range of resolvedRanges) {
|
|
425
622
|
this._contextTokens.splice(range.start - removedTokens, range.end - range.start);
|
|
426
|
-
|
|
427
|
-
|
|
623
|
+
if (deletionSuccessful)
|
|
624
|
+
deletionSuccessful &&= this._context._ctx.removeTokenCellsFromSequence(this._sequenceId, range.start, range.end);
|
|
625
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== range.start)
|
|
428
626
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, range.start, -removedTokens);
|
|
429
627
|
removedTokens += range.end - range.start;
|
|
430
628
|
lastDeleteRangeEndPos = range.end;
|
|
431
629
|
}
|
|
432
|
-
if (lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
630
|
+
if (deletionSuccessful && lastDeleteRangeEndPos != null && removedTokens > 0 && lastDeleteRangeEndPos !== this._nextTokenIndex)
|
|
433
631
|
this._context._ctx.shiftSequenceTokenCells(this._sequenceId, lastDeleteRangeEndPos, this._nextTokenIndex, -removedTokens);
|
|
434
632
|
this._nextTokenIndex -= removedTokens;
|
|
633
|
+
if (deletionSuccessful)
|
|
634
|
+
return;
|
|
635
|
+
const newSequenceTokens = this._contextTokens.slice();
|
|
636
|
+
this._nextTokenIndex = 0;
|
|
637
|
+
this._context._ctx.disposeSequence(this._sequenceId);
|
|
638
|
+
await this.evaluateWithoutGeneratingNewTokens(newSequenceTokens);
|
|
435
639
|
});
|
|
436
640
|
}
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
* @param [options]
|
|
440
|
-
*/
|
|
441
|
-
evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEosToken = false } = {}) {
|
|
641
|
+
evaluate(tokens, options = {}) {
|
|
642
|
+
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
442
643
|
return this._evaluate(tokens, {
|
|
443
644
|
temperature,
|
|
645
|
+
minP,
|
|
444
646
|
topK,
|
|
445
647
|
topP,
|
|
446
648
|
grammarEvaluationState,
|
|
447
649
|
repeatPenalty,
|
|
650
|
+
tokenBias,
|
|
448
651
|
evaluationPriority,
|
|
449
652
|
contextShiftOptions: {
|
|
450
653
|
size: contextShiftSize,
|
|
451
654
|
strategy: contextShiftStrategy
|
|
452
655
|
},
|
|
453
|
-
|
|
656
|
+
yieldEogToken,
|
|
657
|
+
_noSampling
|
|
454
658
|
});
|
|
455
659
|
}
|
|
456
660
|
/**
|
|
@@ -473,24 +677,29 @@ export class LlamaContextSequence {
|
|
|
473
677
|
}
|
|
474
678
|
}
|
|
475
679
|
/** @internal */
|
|
476
|
-
async *_evaluate(tokens, { temperature = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions,
|
|
680
|
+
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
477
681
|
this._ensureNotDisposed();
|
|
478
682
|
let evalTokens = tokens;
|
|
479
683
|
if (evalTokens.length === 0)
|
|
480
684
|
return;
|
|
481
|
-
// eslint-disable-next-line no-constant-condition
|
|
482
685
|
while (true) {
|
|
483
686
|
this._ensureNotDisposed();
|
|
484
687
|
// Evaluate to get the next token.
|
|
485
|
-
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, contextShiftOptions, (batchLogitIndex) => {
|
|
688
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
|
|
689
|
+
if (_noSampling)
|
|
690
|
+
return null;
|
|
486
691
|
const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
|
|
487
692
|
? repeatPenalty.punishTokens()
|
|
488
693
|
: repeatPenalty?.punishTokens;
|
|
489
694
|
const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
|
|
490
695
|
? grammarEvaluationState()
|
|
491
696
|
: grammarEvaluationState;
|
|
697
|
+
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
|
|
698
|
+
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
|
|
699
|
+
const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
|
|
492
700
|
return this._context._ctx.sampleToken(batchLogitIndex, removeNullFields({
|
|
493
701
|
temperature,
|
|
702
|
+
minP,
|
|
494
703
|
topK,
|
|
495
704
|
topP,
|
|
496
705
|
repeatPenalty: repeatPenalty?.penalty,
|
|
@@ -499,31 +708,36 @@ export class LlamaContextSequence {
|
|
|
499
708
|
: undefined,
|
|
500
709
|
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
501
710
|
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
711
|
+
tokenBiasKeys,
|
|
712
|
+
tokenBiasValues,
|
|
502
713
|
grammarEvaluationState: resolvedGrammarEvaluationState?._state
|
|
503
714
|
}));
|
|
504
715
|
});
|
|
505
716
|
if (nextToken == null)
|
|
506
717
|
return;
|
|
507
718
|
// the model finished generating text
|
|
508
|
-
if (!
|
|
719
|
+
if (!yieldEogToken && this._context.model.isEogToken(nextToken))
|
|
509
720
|
break;
|
|
510
|
-
yield nextToken;
|
|
511
|
-
//
|
|
512
|
-
|
|
721
|
+
const replacementToken = (yield nextToken);
|
|
722
|
+
// set the tokens for the next evaluation
|
|
723
|
+
if (replacementToken != null)
|
|
724
|
+
evalTokens = [replacementToken];
|
|
725
|
+
else
|
|
726
|
+
evalTokens = [nextToken];
|
|
513
727
|
}
|
|
514
728
|
}
|
|
515
729
|
/** @internal */
|
|
516
|
-
async _decodeTokens(tokens, generateLogit, evaluationPriority, contextShiftOptions, onDecodeDone) {
|
|
730
|
+
async _decodeTokens(tokens, generateLogit, evaluationPriority, tokenMeter, contextShiftOptions, onDecodeDone) {
|
|
517
731
|
this._ensureNotDisposed();
|
|
518
732
|
const tokensLeftToDecode = tokens.slice();
|
|
519
733
|
return await withLock(this, "evaluate", async () => {
|
|
520
734
|
while (tokensLeftToDecode.length > 0) {
|
|
521
735
|
this._ensureNotDisposed();
|
|
522
|
-
let freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
523
|
-
if (freeSpace <=
|
|
736
|
+
let freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
737
|
+
if (freeSpace <= 0) {
|
|
524
738
|
await this._freeUpSpaceForTokens(contextShiftOptions);
|
|
525
|
-
freeSpace = this._context.contextSize - this._nextTokenIndex;
|
|
526
|
-
if (freeSpace <=
|
|
739
|
+
freeSpace = this._context.contextSize - 1 - this._nextTokenIndex;
|
|
740
|
+
if (freeSpace <= 0)
|
|
527
741
|
throw new Error("Failed to free up space for new tokens");
|
|
528
742
|
}
|
|
529
743
|
const tokensToDecode = tokensLeftToDecode.splice(0, freeSpace);
|
|
@@ -533,7 +747,8 @@ export class LlamaContextSequence {
|
|
|
533
747
|
tokens: tokensToDecode,
|
|
534
748
|
firstTokenSequenceIndex: this._nextTokenIndex,
|
|
535
749
|
generateLogitAtTheEnd,
|
|
536
|
-
evaluationPriority
|
|
750
|
+
evaluationPriority,
|
|
751
|
+
tokenMeter
|
|
537
752
|
}, !generateLogitAtTheEnd
|
|
538
753
|
? undefined
|
|
539
754
|
: onDecodeDone);
|
|
@@ -553,7 +768,10 @@ export class LlamaContextSequence {
|
|
|
553
768
|
: contextShiftOptions.size));
|
|
554
769
|
this._ensureNotDisposed();
|
|
555
770
|
if (contextShiftOptions.strategy === "eraseBeginning") {
|
|
556
|
-
|
|
771
|
+
let eraseStartIndex = 0;
|
|
772
|
+
if (this.model.tokens.bos != null && this._contextTokens[0] === this.model.tokens.bos)
|
|
773
|
+
eraseStartIndex = 1;
|
|
774
|
+
await this.eraseContextTokenRanges([{ start: eraseStartIndex, end: size + eraseStartIndex }]);
|
|
557
775
|
}
|
|
558
776
|
else {
|
|
559
777
|
const ranges = await contextShiftOptions.strategy({
|
|
@@ -563,7 +781,7 @@ export class LlamaContextSequence {
|
|
|
563
781
|
if (ranges == null)
|
|
564
782
|
throw new Error("Invalid delete ranges");
|
|
565
783
|
await this.eraseContextTokenRanges(ranges);
|
|
566
|
-
if (this.nextTokenIndex >= this._context.contextSize)
|
|
784
|
+
if (this.nextTokenIndex >= this._context.contextSize - 1)
|
|
567
785
|
await this.eraseContextTokenRanges([{ start: 0, end: size }]);
|
|
568
786
|
}
|
|
569
787
|
}
|
|
@@ -576,10 +794,11 @@ export class LlamaContextSequence {
|
|
|
576
794
|
* We need this to make it impossible to manually create instances of this class outside the code of this library
|
|
577
795
|
* @internal
|
|
578
796
|
*/
|
|
579
|
-
static _create({ sequenceId, context, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
797
|
+
static _create({ sequenceId, context, tokenMeter, contextShift: { size: contextShiftSize = Math.min(100, Math.ceil(context.contextSize / 2)), strategy: contextShiftStrategy = "eraseBeginning" } = {} }) {
|
|
580
798
|
return new LlamaContextSequence({
|
|
581
799
|
sequenceId,
|
|
582
800
|
context,
|
|
801
|
+
tokenMeter,
|
|
583
802
|
contextShift: {
|
|
584
803
|
size: contextShiftSize,
|
|
585
804
|
strategy: contextShiftStrategy
|
|
@@ -587,14 +806,52 @@ export class LlamaContextSequence {
|
|
|
587
806
|
});
|
|
588
807
|
}
|
|
589
808
|
}
|
|
809
|
+
function getTokenBiasesForAddon(tokenBias, currentModel) {
|
|
810
|
+
if (tokenBias == null)
|
|
811
|
+
return {
|
|
812
|
+
tokenBiasKeys: undefined,
|
|
813
|
+
tokenBiasValues: undefined
|
|
814
|
+
};
|
|
815
|
+
if (tokenBias instanceof Function)
|
|
816
|
+
tokenBias = tokenBias();
|
|
817
|
+
if (tokenBias._model !== currentModel)
|
|
818
|
+
throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
|
|
819
|
+
"Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
|
|
820
|
+
const tokenBiasKeys = [];
|
|
821
|
+
const tokenBiasValues = [];
|
|
822
|
+
for (const [token, bias] of tokenBias._biases) {
|
|
823
|
+
tokenBiasKeys.push(token);
|
|
824
|
+
tokenBiasValues.push(bias);
|
|
825
|
+
}
|
|
826
|
+
if (tokenBiasKeys.length === 0 || tokenBiasValues.length === 0) {
|
|
827
|
+
return {
|
|
828
|
+
tokenBiasKeys: undefined,
|
|
829
|
+
tokenBiasValues: undefined
|
|
830
|
+
};
|
|
831
|
+
}
|
|
832
|
+
return {
|
|
833
|
+
tokenBiasKeys: Uint32Array.from(tokenBiasKeys),
|
|
834
|
+
tokenBiasValues: Float32Array.from(tokenBiasValues)
|
|
835
|
+
};
|
|
836
|
+
}
|
|
590
837
|
function disposeContextIfReferenced(contextRef) {
|
|
591
838
|
const context = contextRef.deref();
|
|
592
839
|
if (context != null)
|
|
593
|
-
context.dispose();
|
|
840
|
+
void context.dispose();
|
|
594
841
|
}
|
|
595
842
|
function disposeContextSequenceIfReferenced(contextRef) {
|
|
596
843
|
const context = contextRef.deref();
|
|
597
844
|
if (context != null)
|
|
598
845
|
context.dispose();
|
|
599
846
|
}
|
|
847
|
+
export function getDefaultContextBatchSize({ contextSize, sequences }) {
|
|
848
|
+
return Math.min(contextSize * sequences, 512);
|
|
849
|
+
}
|
|
850
|
+
export function getDefaultContextSequences() {
|
|
851
|
+
return 1;
|
|
852
|
+
}
|
|
853
|
+
const defaultFallbackContextSize = 4096;
|
|
854
|
+
export function getDefaultModelContextSize({ trainContextSize }) {
|
|
855
|
+
return trainContextSize ?? defaultFallbackContextSize;
|
|
856
|
+
}
|
|
600
857
|
//# sourceMappingURL=LlamaContext.js.map
|