node-llama-cpp 3.0.0-beta.44 → 3.0.0-beta.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -21
- package/bins/_linux-arm64.moved.txt +1 -0
- package/bins/_linux-armv7l.moved.txt +1 -0
- package/bins/_linux-x64-vulkan.moved.txt +1 -0
- package/bins/_linux-x64.moved.txt +1 -0
- package/bins/_mac-arm64-metal.moved.txt +1 -0
- package/bins/_mac-x64.moved.txt +1 -0
- package/bins/_win-arm64.moved.txt +1 -0
- package/bins/_win-x64-vulkan.moved.txt +1 -0
- package/bins/_win-x64.moved.txt +1 -0
- package/dist/ChatWrapper.d.ts +11 -1
- package/dist/ChatWrapper.js +1 -1
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +30 -19
- package/dist/bindings/Llama.d.ts +11 -0
- package/dist/bindings/Llama.js +37 -6
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/consts.d.ts +1 -1
- package/dist/bindings/consts.js +2 -0
- package/dist/bindings/consts.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +41 -5
- package/dist/bindings/getLlama.js +14 -3
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +2 -2
- package/dist/bindings/types.js +2 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +0 -1
- package/dist/bindings/utils/compileLLamaCpp.js +45 -7
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +0 -1
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +2 -2
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +0 -1
- package/dist/bindings/utils/testCmakeBinary.d.ts +0 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +4 -3
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.js +5 -4
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +2 -2
- package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -12
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +5 -4
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.js +5 -6
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3ChatWrapper.js +1 -1
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +13 -9
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +92 -38
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
- package/dist/chatWrappers/MistralChatWrapper.js +169 -0
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +25 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +50 -12
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +22 -16
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +28 -24
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +1 -1
- package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
- package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
- package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +37 -26
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/jsonDumps.d.ts +1 -1
- package/dist/chatWrappers/utils/jsonDumps.js +2 -2
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +30 -6
- package/dist/chatWrappers/utils/resolveChatWrapper.js +71 -25
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/cli.js +2 -6
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -1
- package/dist/cli/commands/ChatCommand.js +83 -53
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +2 -1
- package/dist/cli/commands/CompleteCommand.js +58 -30
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +1 -1
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +2 -1
- package/dist/cli/commands/InfillCommand.js +58 -30
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/InitCommand.js +1 -1
- package/dist/cli/commands/PullCommand.d.ts +2 -1
- package/dist/cli/commands/PullCommand.js +85 -44
- package/dist/cli/commands/PullCommand.js.map +1 -1
- package/dist/cli/commands/inspect/InspectCommand.js +5 -3
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +17 -4
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +31 -9
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +7 -4
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
- package/dist/cli/commands/source/SourceCommand.js +19 -0
- package/dist/cli/commands/source/SourceCommand.js.map +1 -0
- package/dist/cli/commands/{BuildCommand.d.ts → source/commands/BuildCommand.d.ts} +1 -2
- package/dist/cli/commands/{BuildCommand.js → source/commands/BuildCommand.js} +21 -19
- package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
- package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +6 -6
- package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
- package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +1 -2
- package/dist/cli/commands/{DownloadCommand.js → source/commands/DownloadCommand.js} +26 -22
- package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
- package/dist/cli/recommendedModels.js +192 -23
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/ConsoleInteraction.d.ts +0 -1
- package/dist/cli/utils/ConsoleTable.js.map +1 -1
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.js +6 -17
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/printCommonInfoLines.js +6 -3
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -1
- package/dist/commands.d.ts +3 -3
- package/dist/commands.js +3 -3
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +7 -3
- package/dist/config.js +10 -6
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +17 -2
- package/dist/evaluator/LlamaChat/LlamaChat.js +24 -12
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +3 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +21 -13
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +15 -14
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -2
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +5 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +18 -4
- package/dist/evaluator/LlamaCompletion.js +51 -22
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +21 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +261 -133
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +77 -9
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js +1 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -1
- package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
- package/dist/evaluator/LlamaEmbedding.js +53 -0
- package/dist/evaluator/LlamaEmbedding.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +1 -5
- package/dist/evaluator/LlamaEmbeddingContext.js +6 -8
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +9 -10
- package/dist/evaluator/LlamaGrammar.js +10 -5
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
- package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +28 -15
- package/dist/evaluator/LlamaModel/LlamaModel.js +66 -51
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +10 -10
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +10 -10
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -1
- package/dist/evaluator/TokenBias.d.ts +20 -8
- package/dist/evaluator/TokenBias.js +44 -12
- package/dist/evaluator/TokenBias.js.map +1 -1
- package/dist/evaluator/TokenMeter.d.ts +3 -12
- package/dist/evaluator/TokenMeter.js +4 -16
- package/dist/evaluator/TokenMeter.js.map +1 -1
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +0 -1
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -1
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +0 -2
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +5 -3
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +26 -13
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +57 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +86 -4
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.d.ts +18 -6
- package/dist/gguf/readGgufFileInfo.js +8 -3
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +18 -2
- package/dist/gguf/types/GgufMetadataTypes.js +16 -1
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +2 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -1
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -1
- package/dist/index.d.ts +8 -4
- package/dist/index.js +5 -3
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/dist/types.d.ts +1 -0
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +3 -0
- package/dist/utils/LlamaText.js +7 -4
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +2 -2
- package/dist/utils/LruCache.js.map +1 -1
- package/dist/utils/OverridesObject.d.ts +7 -0
- package/dist/utils/OverridesObject.js +2 -0
- package/dist/utils/OverridesObject.js.map +1 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/ThreadsSplitter.d.ts +32 -0
- package/dist/utils/ThreadsSplitter.js +177 -0
- package/dist/utils/ThreadsSplitter.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +1 -1
- package/dist/utils/compareTokens.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +94 -6
- package/dist/utils/createModelDownloader.js +174 -46
- package/dist/utils/createModelDownloader.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getGrammarsFolder.js +1 -1
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/modelFileAccesTokens.d.ts +4 -0
- package/dist/utils/modelFileAccesTokens.js +40 -0
- package/dist/utils/modelFileAccesTokens.js.map +1 -0
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +1 -1
- package/dist/utils/resolveLastTokens.js.map +1 -1
- package/dist/utils/spawnCommand.d.ts +0 -1
- package/dist/utils/truncateTextAndRoundToWords.js +3 -1
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/withOra.js +1 -1
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +0 -1
- package/dist/utils/wrapAbortSignal.d.ts +0 -1
- package/llama/CMakeLists.txt +20 -12
- package/llama/addon/AddonContext.cpp +69 -202
- package/llama/addon/AddonContext.h +4 -5
- package/llama/addon/AddonGrammar.cpp +8 -11
- package/llama/addon/AddonGrammar.h +4 -3
- package/llama/addon/AddonGrammarEvaluationState.cpp +9 -10
- package/llama/addon/AddonGrammarEvaluationState.h +3 -1
- package/llama/addon/AddonModel.cpp +6 -5
- package/llama/addon/AddonSampler.cpp +513 -0
- package/llama/addon/AddonSampler.h +65 -0
- package/llama/addon/RingBuffer.h +109 -0
- package/llama/addon/addon.cpp +7 -0
- package/llama/addon/globals/addonLog.cpp +2 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +71 -46
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
- package/bins/linux-arm64/_nlcBuildMetadata.json +0 -1
- package/bins/linux-arm64/libggml.so +0 -0
- package/bins/linux-arm64/libllama.so +0 -0
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +0 -1
- package/bins/linux-armv7l/libggml.so +0 -0
- package/bins/linux-armv7l/libllama.so +0 -0
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +0 -1
- package/bins/linux-x64/libggml.so +0 -0
- package/bins/linux-x64/libllama.so +0 -0
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +0 -1
- package/bins/linux-x64-vulkan/libggml.so +0 -0
- package/bins/linux-x64-vulkan/libllama.so +0 -0
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +0 -1
- package/bins/mac-arm64-metal/ggml-common.h +0 -1833
- package/bins/mac-arm64-metal/ggml-metal.metal +0 -6168
- package/bins/mac-arm64-metal/libggml.dylib +0 -0
- package/bins/mac-arm64-metal/libllama.dylib +0 -0
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +0 -1
- package/bins/mac-x64/libggml.dylib +0 -0
- package/bins/mac-x64/libllama.dylib +0 -0
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +0 -1
- package/bins/win-arm64/ggml.dll +0 -0
- package/bins/win-arm64/llama-addon.exp +0 -0
- package/bins/win-arm64/llama-addon.lib +0 -0
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-arm64/llama.dll +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +0 -1
- package/bins/win-x64/ggml.dll +0 -0
- package/bins/win-x64/llama-addon.exp +0 -0
- package/bins/win-x64/llama-addon.lib +0 -0
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64/llama.dll +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +0 -1
- package/bins/win-x64-vulkan/ggml.dll +0 -0
- package/bins/win-x64-vulkan/llama-addon.exp +0 -0
- package/bins/win-x64-vulkan/llama-addon.lib +0 -0
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/llama.dll +0 -0
- package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
- package/dist/cli/commands/BuildCommand.js.map +0 -1
- package/dist/cli/commands/ClearCommand.js.map +0 -1
- package/dist/cli/commands/DownloadCommand.js.map +0 -1
- package/dist/utils/DeepPartialObject.d.ts +0 -3
- package/dist/utils/DeepPartialObject.js +0 -2
- package/dist/utils/DeepPartialObject.js.map +0 -1
- /package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +0 -0
|
@@ -3,8 +3,16 @@ import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
|
3
3
|
import { compareTokens } from "../../utils/compareTokens.js";
|
|
4
4
|
import { DisposeGuard } from "../../utils/DisposeGuard.js";
|
|
5
5
|
import { TokenMeter } from "../TokenMeter.js";
|
|
6
|
+
import { UnsupportedError } from "../../utils/UnsupportedError.js";
|
|
6
7
|
import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
|
|
8
|
+
import { LlamaSampler } from "./LlamaSampler.js";
|
|
7
9
|
const defaultLoraScale = 1;
|
|
10
|
+
const shrinkRetriesMinContextSize = 4096;
|
|
11
|
+
const defaultMaxPunishTokens = 64;
|
|
12
|
+
const defaultFailedCreationRemedy = {
|
|
13
|
+
retries: 6,
|
|
14
|
+
autoContextSizeShrink: 0.16
|
|
15
|
+
};
|
|
8
16
|
export class LlamaContext {
|
|
9
17
|
/** @internal */ _llama;
|
|
10
18
|
/** @internal */ _ctx;
|
|
@@ -14,6 +22,9 @@ export class LlamaContext {
|
|
|
14
22
|
/** @internal */ _contextSize;
|
|
15
23
|
/** @internal */ _batchSize;
|
|
16
24
|
/** @internal */ _flashAttention;
|
|
25
|
+
/** @internal */ _idealThreads;
|
|
26
|
+
/** @internal */ _minThreads;
|
|
27
|
+
/** @internal */ _performanceTracking;
|
|
17
28
|
/** @internal */ _totalSequences;
|
|
18
29
|
/** @internal */ _unusedSequenceIds = [];
|
|
19
30
|
/** @internal */ _batchingOptions;
|
|
@@ -26,11 +37,13 @@ export class LlamaContext {
|
|
|
26
37
|
/** @internal */ _nextGeneratedSequenceId = 0;
|
|
27
38
|
/** @internal */ _dispatchDecodeScheduled = false;
|
|
28
39
|
/** @internal */ _batchDispatchPending = false;
|
|
40
|
+
/** @internal */ _threadSplitterConsumer;
|
|
41
|
+
/** @internal */ _freeReservedThreadsTimeout;
|
|
29
42
|
/** @internal */ _currentDispatchBatchHandle = {};
|
|
30
43
|
/** @internal */ _allocatedContextSize;
|
|
31
44
|
/** @internal */ _disposed = false;
|
|
32
45
|
onDispose = new EventRelay();
|
|
33
|
-
constructor({ _model }, { sequences,
|
|
46
|
+
constructor({ _model }, { sequences, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, performanceTracking = false, _embeddings }) {
|
|
34
47
|
if (_model.disposed)
|
|
35
48
|
throw new DisposedError();
|
|
36
49
|
this._llama = _model._llama;
|
|
@@ -41,15 +54,23 @@ export class LlamaContext {
|
|
|
41
54
|
this._contextSize = Math.max(2, contextSize);
|
|
42
55
|
this._batchSize = Math.max(batchSize, this._totalSequences);
|
|
43
56
|
this._flashAttention = flashAttention;
|
|
57
|
+
this._idealThreads = typeof threads === "number"
|
|
58
|
+
? this._llama._threadsSplitter.normalizeThreadsValue(threads)
|
|
59
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.ideal ?? (this._llama.maxThreads === 0
|
|
60
|
+
? this._llama.cpuMathCores
|
|
61
|
+
: this._llama.maxThreads));
|
|
62
|
+
this._minThreads = Math.max(1, typeof threads === "number"
|
|
63
|
+
? 1
|
|
64
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.min ?? 1));
|
|
65
|
+
this._performanceTracking = !!performanceTracking;
|
|
44
66
|
this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
|
|
45
|
-
seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
|
|
46
67
|
contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
|
|
47
68
|
batchSize: this._batchSize,
|
|
48
69
|
sequences: this._totalSequences,
|
|
49
70
|
flashAttention: this._flashAttention,
|
|
50
|
-
threads:
|
|
71
|
+
threads: this._idealThreads,
|
|
51
72
|
embeddings: _embeddings,
|
|
52
|
-
|
|
73
|
+
performanceTracking: this._performanceTracking
|
|
53
74
|
}));
|
|
54
75
|
this._batchingOptions = {
|
|
55
76
|
dispatchSchedule: batchingDispatchSchedule,
|
|
@@ -58,6 +79,7 @@ export class LlamaContext {
|
|
|
58
79
|
this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
|
|
59
80
|
this._gcRegistry.register(this, this._loraAdapters);
|
|
60
81
|
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
82
|
+
this._freeReservedThreads = this._freeReservedThreads.bind(this);
|
|
61
83
|
this._disposeAggregator.add(() => {
|
|
62
84
|
this._disposed = true;
|
|
63
85
|
});
|
|
@@ -111,6 +133,19 @@ export class LlamaContext {
|
|
|
111
133
|
this._ensureNotDisposed();
|
|
112
134
|
return this._ctx.getStateSize();
|
|
113
135
|
}
|
|
136
|
+
/** The number of threads currently used to evaluate tokens */
|
|
137
|
+
get currentThreads() {
|
|
138
|
+
this._ensureNotDisposed();
|
|
139
|
+
return this._ctx.getThreads();
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* The number of threads that are preferred to be used to evaluate tokens.
|
|
143
|
+
*
|
|
144
|
+
* The actual number of threads used may be lower when other evaluations are running in parallel.
|
|
145
|
+
*/
|
|
146
|
+
get idealThreads() {
|
|
147
|
+
return this._idealThreads;
|
|
148
|
+
}
|
|
114
149
|
getAllocatedContextSize() {
|
|
115
150
|
this._ensureNotDisposed();
|
|
116
151
|
if (this._allocatedContextSize == null)
|
|
@@ -263,13 +298,22 @@ export class LlamaContext {
|
|
|
263
298
|
i--;
|
|
264
299
|
}
|
|
265
300
|
}
|
|
266
|
-
|
|
267
|
-
|
|
301
|
+
if (currentBatchSize !== 0) {
|
|
302
|
+
const allocationResult = this._threadSplitterConsumer?.getAllocationToConsume();
|
|
303
|
+
const [threadsToUse, consumerHandle] = allocationResult instanceof Promise
|
|
304
|
+
? await allocationResult ?? []
|
|
305
|
+
: allocationResult ?? [];
|
|
306
|
+
try {
|
|
307
|
+
if (threadsToUse != null)
|
|
308
|
+
this._ctx.setThreads(threadsToUse);
|
|
268
309
|
await this._ctx.decodeBatch();
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
310
|
+
consumerHandle?.dispose();
|
|
311
|
+
}
|
|
312
|
+
catch (err) {
|
|
313
|
+
consumerHandle?.dispose();
|
|
314
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
|
|
315
|
+
return;
|
|
316
|
+
}
|
|
273
317
|
}
|
|
274
318
|
for (const action of afterDecodeActions) {
|
|
275
319
|
const [accept, reject] = action.response;
|
|
@@ -287,36 +331,47 @@ export class LlamaContext {
|
|
|
287
331
|
const prioritizationStrategy = resolvePrioritizationStrategy();
|
|
288
332
|
if (prioritizationStrategy == null)
|
|
289
333
|
return; // all queued items are rejected and dequeued when we get here
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
preventDisposalHandle
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
334
|
+
this._reserveThreads();
|
|
335
|
+
try {
|
|
336
|
+
while (shouldHaveAnotherLoop) {
|
|
337
|
+
const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
|
|
338
|
+
if (orderedQueuedDecodes == null)
|
|
339
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
340
|
+
const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
|
|
341
|
+
let preventDisposalHandle;
|
|
342
|
+
try {
|
|
343
|
+
preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
|
|
344
|
+
}
|
|
345
|
+
catch (err) {
|
|
346
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
try {
|
|
350
|
+
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
|
|
351
|
+
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
352
|
+
}
|
|
353
|
+
finally {
|
|
354
|
+
preventDisposalHandle.dispose();
|
|
355
|
+
}
|
|
309
356
|
}
|
|
310
357
|
}
|
|
358
|
+
finally {
|
|
359
|
+
this._scheduleToFreeReservedThreads();
|
|
360
|
+
}
|
|
311
361
|
});
|
|
312
362
|
}
|
|
313
363
|
/**
|
|
314
364
|
* Print the timings of token evaluation since that last print for this context.
|
|
365
|
+
*
|
|
366
|
+
* Requires the `performanceTracking` option to be enabled.
|
|
367
|
+
*
|
|
315
368
|
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
316
369
|
* it won't print anything.
|
|
317
370
|
*/
|
|
318
371
|
async printTimings() {
|
|
319
372
|
this._ensureNotDisposed();
|
|
373
|
+
if (!this._performanceTracking)
|
|
374
|
+
throw new UnsupportedError("Performance tracking is not enabled");
|
|
320
375
|
this._ctx.printTimings();
|
|
321
376
|
await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
|
|
322
377
|
}
|
|
@@ -350,14 +405,6 @@ export class LlamaContext {
|
|
|
350
405
|
});
|
|
351
406
|
}
|
|
352
407
|
/** @internal */
|
|
353
|
-
_acceptTokenOnGrammarEvaluationState(grammarEvaluationState, token) {
|
|
354
|
-
this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
|
|
355
|
-
}
|
|
356
|
-
/** @internal */
|
|
357
|
-
_canBeNextTokenForGrammarEvaluationState(grammarEvaluationState, token) {
|
|
358
|
-
return this._ctx.canBeNextTokenForGrammarEvaluationState(grammarEvaluationState._state, token);
|
|
359
|
-
}
|
|
360
|
-
/** @internal */
|
|
361
408
|
_popSequenceId() {
|
|
362
409
|
if (this._unusedSequenceIds.length > 0)
|
|
363
410
|
return this._unusedSequenceIds.shift();
|
|
@@ -417,6 +464,30 @@ export class LlamaContext {
|
|
|
417
464
|
}
|
|
418
465
|
}
|
|
419
466
|
/** @internal */
|
|
467
|
+
_reserveThreads() {
|
|
468
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
469
|
+
delete this._freeReservedThreadsTimeout;
|
|
470
|
+
if (this._threadSplitterConsumer != null)
|
|
471
|
+
return;
|
|
472
|
+
this._threadSplitterConsumer = this._llama._threadsSplitter.createConsumer(this._idealThreads, this._minThreads);
|
|
473
|
+
}
|
|
474
|
+
/** @internal */
|
|
475
|
+
_freeReservedThreads() {
|
|
476
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
477
|
+
delete this._freeReservedThreadsTimeout;
|
|
478
|
+
if (this._threadSplitterConsumer == null)
|
|
479
|
+
return;
|
|
480
|
+
this._threadSplitterConsumer.dispose();
|
|
481
|
+
delete this._threadSplitterConsumer;
|
|
482
|
+
}
|
|
483
|
+
/** @internal */
|
|
484
|
+
_scheduleToFreeReservedThreads() {
|
|
485
|
+
if (this._threadSplitterConsumer == null)
|
|
486
|
+
return;
|
|
487
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
488
|
+
this._freeReservedThreadsTimeout = setTimeout(this._freeReservedThreads, 0);
|
|
489
|
+
}
|
|
490
|
+
/** @internal */
|
|
420
491
|
static async _create(options, { _model }) {
|
|
421
492
|
const sequences = options.sequences ?? getDefaultContextSequences();
|
|
422
493
|
const flashAttention = _model.flashAttentionSupported
|
|
@@ -425,7 +496,13 @@ export class LlamaContext {
|
|
|
425
496
|
const loraOptions = typeof options.lora === "string"
|
|
426
497
|
? { adapters: [{ filePath: options.lora }] }
|
|
427
498
|
: options.lora;
|
|
428
|
-
|
|
499
|
+
let failedCreationRetries = options.failedCreationRemedy === false
|
|
500
|
+
? 0
|
|
501
|
+
: Math.max(0, options.failedCreationRemedy?.retries ?? defaultFailedCreationRemedy.retries);
|
|
502
|
+
const failedCreationAutoContextSizeShrink = options.failedCreationRemedy === false
|
|
503
|
+
? 0
|
|
504
|
+
: options.failedCreationRemedy?.autoContextSizeShrink ?? defaultFailedCreationRemedy.autoContextSizeShrink;
|
|
505
|
+
let contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
|
|
429
506
|
batchSize: options.batchSize,
|
|
430
507
|
sequences: sequences,
|
|
431
508
|
modelGpuLayers: _model.gpuLayers,
|
|
@@ -436,69 +513,101 @@ export class LlamaContext {
|
|
|
436
513
|
ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
|
|
437
514
|
isEmbeddingContext: options._embeddings
|
|
438
515
|
});
|
|
439
|
-
const
|
|
440
|
-
|
|
441
|
-
contextSize
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
flashAttention
|
|
447
|
-
}).gpuVram;
|
|
448
|
-
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
|
|
516
|
+
const minContextSize = options.contextSize === "auto"
|
|
517
|
+
? shrinkRetriesMinContextSize
|
|
518
|
+
: (typeof options.contextSize === "object" && typeof options.contextSize.min === "number")
|
|
519
|
+
? options.contextSize.min
|
|
520
|
+
: typeof options.contextSize === "number"
|
|
521
|
+
? options.contextSize
|
|
522
|
+
: shrinkRetriesMinContextSize;
|
|
449
523
|
const { createSignal } = options;
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
524
|
+
async function createContext(contextSize) {
|
|
525
|
+
const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
|
|
526
|
+
const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
|
|
527
|
+
contextSize,
|
|
528
|
+
sequences,
|
|
529
|
+
isEmbeddingContext: options._embeddings,
|
|
530
|
+
modelGpuLayers: _model.gpuLayers,
|
|
531
|
+
batchSize,
|
|
532
|
+
flashAttention
|
|
533
|
+
}).gpuVram;
|
|
534
|
+
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
|
|
535
|
+
const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
|
|
536
|
+
? null
|
|
537
|
+
: _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
|
|
538
|
+
try {
|
|
539
|
+
if (createSignal?.aborted)
|
|
540
|
+
throw createSignal.reason;
|
|
541
|
+
const contextLoaded = await context._ctx.init();
|
|
542
|
+
if (createSignal?.aborted) {
|
|
543
|
+
if (contextLoaded)
|
|
544
|
+
await context._ctx.dispose();
|
|
545
|
+
throw createSignal.reason;
|
|
546
|
+
}
|
|
547
|
+
else if (!contextLoaded)
|
|
548
|
+
throw new Error("Failed to create context");
|
|
549
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
550
|
+
if (loraOptions != null && loraOptions.adapters.length > 0) {
|
|
551
|
+
let loadedAdapters = 0;
|
|
552
|
+
for (const adapter of loraOptions.adapters) {
|
|
472
553
|
try {
|
|
473
|
-
|
|
554
|
+
await context._setLora({
|
|
555
|
+
filePath: adapter.filePath,
|
|
556
|
+
scale: adapter.scale
|
|
557
|
+
});
|
|
558
|
+
loadedAdapters++;
|
|
559
|
+
try {
|
|
560
|
+
loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
console.error(err);
|
|
564
|
+
}
|
|
474
565
|
}
|
|
475
566
|
catch (err) {
|
|
476
|
-
|
|
567
|
+
await context.dispose();
|
|
568
|
+
throw err;
|
|
569
|
+
}
|
|
570
|
+
if (createSignal?.aborted) {
|
|
571
|
+
await context.dispose();
|
|
572
|
+
throw createSignal.reason;
|
|
477
573
|
}
|
|
478
574
|
}
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
575
|
+
}
|
|
576
|
+
else if (loraOptions?.onLoadProgress != null) {
|
|
577
|
+
try {
|
|
578
|
+
loraOptions.onLoadProgress(1);
|
|
482
579
|
}
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
throw createSignal.reason;
|
|
580
|
+
catch (err) {
|
|
581
|
+
console.error(err);
|
|
486
582
|
}
|
|
487
583
|
}
|
|
584
|
+
return context;
|
|
488
585
|
}
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
loraOptions.onLoadProgress(1);
|
|
492
|
-
}
|
|
493
|
-
catch (err) {
|
|
494
|
-
console.error(err);
|
|
495
|
-
}
|
|
586
|
+
finally {
|
|
587
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
496
588
|
}
|
|
497
|
-
return context;
|
|
498
589
|
}
|
|
499
|
-
|
|
500
|
-
|
|
590
|
+
while (failedCreationRetries >= 0) {
|
|
591
|
+
try {
|
|
592
|
+
return await createContext(contextSize);
|
|
593
|
+
}
|
|
594
|
+
catch (err) {
|
|
595
|
+
if (failedCreationRetries === 0 || (createSignal?.aborted && err === createSignal.reason))
|
|
596
|
+
throw err;
|
|
597
|
+
failedCreationRetries--;
|
|
598
|
+
let newContextSize = typeof failedCreationAutoContextSizeShrink === "number"
|
|
599
|
+
? Math.floor(contextSize * (1 - failedCreationAutoContextSizeShrink))
|
|
600
|
+
: Math.floor(failedCreationAutoContextSizeShrink(contextSize));
|
|
601
|
+
if (!Number.isFinite(newContextSize))
|
|
602
|
+
throw err;
|
|
603
|
+
if (newContextSize < minContextSize)
|
|
604
|
+
newContextSize = minContextSize;
|
|
605
|
+
if (newContextSize >= contextSize)
|
|
606
|
+
throw err;
|
|
607
|
+
contextSize = newContextSize;
|
|
608
|
+
}
|
|
501
609
|
}
|
|
610
|
+
throw new Error("Failed to create context");
|
|
502
611
|
}
|
|
503
612
|
}
|
|
504
613
|
export class LlamaContextSequence {
|
|
@@ -639,12 +748,13 @@ export class LlamaContextSequence {
|
|
|
639
748
|
});
|
|
640
749
|
}
|
|
641
750
|
evaluate(tokens, options = {}) {
|
|
642
|
-
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
751
|
+
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
643
752
|
return this._evaluate(tokens, {
|
|
644
753
|
temperature,
|
|
645
754
|
minP,
|
|
646
755
|
topK,
|
|
647
756
|
topP,
|
|
757
|
+
seed,
|
|
648
758
|
grammarEvaluationState,
|
|
649
759
|
repeatPenalty,
|
|
650
760
|
tokenBias,
|
|
@@ -677,53 +787,71 @@ export class LlamaContextSequence {
|
|
|
677
787
|
}
|
|
678
788
|
}
|
|
679
789
|
/** @internal */
|
|
680
|
-
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
790
|
+
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
681
791
|
this._ensureNotDisposed();
|
|
682
792
|
let evalTokens = tokens;
|
|
683
793
|
if (evalTokens.length === 0)
|
|
684
794
|
return;
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
:
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
795
|
+
const sampler = new LlamaSampler(this.model);
|
|
796
|
+
try {
|
|
797
|
+
while (true) {
|
|
798
|
+
this._ensureNotDisposed();
|
|
799
|
+
// Evaluate to get the next token.
|
|
800
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
|
|
801
|
+
if (_noSampling)
|
|
802
|
+
return null;
|
|
803
|
+
const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
|
|
804
|
+
? repeatPenalty.punishTokens()
|
|
805
|
+
: repeatPenalty?.punishTokens;
|
|
806
|
+
const maxPunishTokens = Math.max(repeatPenalty?.maxPunishTokens ?? defaultMaxPunishTokens, repeatPenaltyTokens?.length ?? 0);
|
|
807
|
+
const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
|
|
808
|
+
? grammarEvaluationState()
|
|
809
|
+
: grammarEvaluationState;
|
|
810
|
+
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
|
|
811
|
+
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
|
|
812
|
+
const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
|
|
813
|
+
sampler.applyConfig(removeNullFields({
|
|
814
|
+
temperature,
|
|
815
|
+
minP,
|
|
816
|
+
topK,
|
|
817
|
+
topP,
|
|
818
|
+
seed: Math.max(0, Number.isFinite(seed)
|
|
819
|
+
? Math.floor(seed ?? (Date.now() / 1000))
|
|
820
|
+
: Math.floor(Date.now() / 1000)),
|
|
821
|
+
repeatPenalty: repeatPenalty?.penalty,
|
|
822
|
+
repeatPenaltyMaxTokens: maxPunishTokens,
|
|
823
|
+
repeatPenaltyTokens: repeatPenaltyTokens != null
|
|
824
|
+
? Uint32Array.from(repeatPenaltyTokens)
|
|
825
|
+
: undefined,
|
|
826
|
+
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
827
|
+
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
828
|
+
tokenBiasKeys,
|
|
829
|
+
tokenBiasValues,
|
|
830
|
+
grammarEvaluationState: resolvedGrammarEvaluationState?._state
|
|
831
|
+
}));
|
|
832
|
+
return withLock(sampler, "sample", async () => {
|
|
833
|
+
if (sampler.disposed)
|
|
834
|
+
return null;
|
|
835
|
+
return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
|
|
836
|
+
});
|
|
837
|
+
});
|
|
838
|
+
if (nextToken === -1)
|
|
839
|
+
throw new Error("Failed to sample next token");
|
|
840
|
+
if (nextToken == null)
|
|
841
|
+
return;
|
|
842
|
+
// the model finished generating text
|
|
843
|
+
if (!yieldEogToken && this._context.model.isEogToken(nextToken))
|
|
844
|
+
break;
|
|
845
|
+
const replacementToken = (yield nextToken);
|
|
846
|
+
// set the tokens for the next evaluation
|
|
847
|
+
if (replacementToken != null)
|
|
848
|
+
evalTokens = [replacementToken];
|
|
849
|
+
else
|
|
850
|
+
evalTokens = [nextToken];
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
finally {
|
|
854
|
+
void withLock(sampler, "sample", sampler.asyncDispose);
|
|
727
855
|
}
|
|
728
856
|
}
|
|
729
857
|
/** @internal */
|
|
@@ -814,7 +942,7 @@ function getTokenBiasesForAddon(tokenBias, currentModel) {
|
|
|
814
942
|
};
|
|
815
943
|
if (tokenBias instanceof Function)
|
|
816
944
|
tokenBias = tokenBias();
|
|
817
|
-
if (tokenBias.
|
|
945
|
+
if (tokenBias._tokenizer !== currentModel.tokenizer)
|
|
818
946
|
throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
|
|
819
947
|
"Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
|
|
820
948
|
const tokenBiasKeys = [];
|