node-llama-cpp 3.0.0-beta.44 → 3.0.0-beta.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -21
- package/bins/_linux-arm64.moved.txt +1 -0
- package/bins/_linux-armv7l.moved.txt +1 -0
- package/bins/_linux-x64-vulkan.moved.txt +1 -0
- package/bins/_linux-x64.moved.txt +1 -0
- package/bins/_mac-arm64-metal.moved.txt +1 -0
- package/bins/_mac-x64.moved.txt +1 -0
- package/bins/_win-arm64.moved.txt +1 -0
- package/bins/_win-x64-vulkan.moved.txt +1 -0
- package/bins/_win-x64.moved.txt +1 -0
- package/dist/ChatWrapper.d.ts +11 -1
- package/dist/ChatWrapper.js +1 -1
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +30 -19
- package/dist/bindings/Llama.d.ts +9 -0
- package/dist/bindings/Llama.js +33 -6
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/consts.d.ts +1 -1
- package/dist/bindings/consts.js +2 -0
- package/dist/bindings/consts.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +33 -5
- package/dist/bindings/getLlama.js +14 -3
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +2 -2
- package/dist/bindings/types.js +2 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +0 -1
- package/dist/bindings/utils/compileLLamaCpp.js +45 -7
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +0 -1
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +2 -2
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +0 -1
- package/dist/bindings/utils/testCmakeBinary.d.ts +0 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +4 -3
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.js +5 -4
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +2 -2
- package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -12
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +5 -4
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.js +5 -6
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3ChatWrapper.js +1 -1
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +13 -9
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +92 -38
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
- package/dist/chatWrappers/MistralChatWrapper.js +169 -0
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +25 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +50 -12
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +22 -16
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +28 -24
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +1 -1
- package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
- package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
- package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +37 -26
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/jsonDumps.d.ts +1 -1
- package/dist/chatWrappers/utils/jsonDumps.js +2 -2
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +30 -6
- package/dist/chatWrappers/utils/resolveChatWrapper.js +71 -25
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/cli.js +2 -6
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -1
- package/dist/cli/commands/ChatCommand.js +83 -53
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +2 -1
- package/dist/cli/commands/CompleteCommand.js +58 -30
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +1 -1
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +2 -1
- package/dist/cli/commands/InfillCommand.js +58 -30
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/InitCommand.js +1 -1
- package/dist/cli/commands/PullCommand.d.ts +2 -1
- package/dist/cli/commands/PullCommand.js +85 -44
- package/dist/cli/commands/PullCommand.js.map +1 -1
- package/dist/cli/commands/inspect/InspectCommand.js +5 -3
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +17 -4
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +31 -9
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +7 -4
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
- package/dist/cli/commands/source/SourceCommand.js +19 -0
- package/dist/cli/commands/source/SourceCommand.js.map +1 -0
- package/dist/cli/commands/{BuildCommand.d.ts → source/commands/BuildCommand.d.ts} +1 -2
- package/dist/cli/commands/{BuildCommand.js → source/commands/BuildCommand.js} +21 -19
- package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
- package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +6 -6
- package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
- package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +1 -2
- package/dist/cli/commands/{DownloadCommand.js → source/commands/DownloadCommand.js} +26 -22
- package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
- package/dist/cli/recommendedModels.js +192 -23
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/ConsoleInteraction.d.ts +0 -1
- package/dist/cli/utils/ConsoleTable.js.map +1 -1
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.js +6 -17
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/printCommonInfoLines.js +3 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -1
- package/dist/commands.d.ts +3 -3
- package/dist/commands.js +3 -3
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +7 -3
- package/dist/config.js +10 -6
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +17 -2
- package/dist/evaluator/LlamaChat/LlamaChat.js +24 -12
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +3 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +21 -13
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +15 -14
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +18 -4
- package/dist/evaluator/LlamaCompletion.js +51 -22
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +21 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +256 -133
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +71 -9
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js +1 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -1
- package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
- package/dist/evaluator/LlamaEmbedding.js +53 -0
- package/dist/evaluator/LlamaEmbedding.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +1 -5
- package/dist/evaluator/LlamaEmbeddingContext.js +6 -8
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +9 -10
- package/dist/evaluator/LlamaGrammar.js +10 -5
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
- package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +28 -15
- package/dist/evaluator/LlamaModel/LlamaModel.js +66 -51
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +10 -10
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +10 -10
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -1
- package/dist/evaluator/TokenBias.d.ts +20 -8
- package/dist/evaluator/TokenBias.js +44 -12
- package/dist/evaluator/TokenBias.js.map +1 -1
- package/dist/evaluator/TokenMeter.d.ts +3 -12
- package/dist/evaluator/TokenMeter.js +4 -16
- package/dist/evaluator/TokenMeter.js.map +1 -1
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +0 -1
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -1
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +0 -2
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +5 -3
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +26 -13
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +57 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +86 -4
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.d.ts +18 -6
- package/dist/gguf/readGgufFileInfo.js +8 -3
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +18 -2
- package/dist/gguf/types/GgufMetadataTypes.js +16 -1
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +2 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -1
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -1
- package/dist/index.d.ts +8 -4
- package/dist/index.js +5 -3
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/dist/types.d.ts +1 -0
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +3 -0
- package/dist/utils/LlamaText.js +7 -4
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +2 -2
- package/dist/utils/LruCache.js.map +1 -1
- package/dist/utils/OverridesObject.d.ts +7 -0
- package/dist/utils/OverridesObject.js +2 -0
- package/dist/utils/OverridesObject.js.map +1 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/ThreadsSplitter.d.ts +26 -0
- package/dist/utils/ThreadsSplitter.js +164 -0
- package/dist/utils/ThreadsSplitter.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +1 -1
- package/dist/utils/compareTokens.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +94 -6
- package/dist/utils/createModelDownloader.js +174 -46
- package/dist/utils/createModelDownloader.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getGrammarsFolder.js +1 -1
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/modelFileAccesTokens.d.ts +4 -0
- package/dist/utils/modelFileAccesTokens.js +40 -0
- package/dist/utils/modelFileAccesTokens.js.map +1 -0
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +1 -1
- package/dist/utils/resolveLastTokens.js.map +1 -1
- package/dist/utils/spawnCommand.d.ts +0 -1
- package/dist/utils/truncateTextAndRoundToWords.js +3 -1
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/withOra.js +1 -1
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +0 -1
- package/dist/utils/wrapAbortSignal.d.ts +0 -1
- package/llama/CMakeLists.txt +20 -12
- package/llama/addon/AddonContext.cpp +69 -202
- package/llama/addon/AddonContext.h +4 -5
- package/llama/addon/AddonGrammar.cpp +8 -11
- package/llama/addon/AddonGrammar.h +4 -3
- package/llama/addon/AddonGrammarEvaluationState.cpp +9 -10
- package/llama/addon/AddonGrammarEvaluationState.h +3 -1
- package/llama/addon/AddonModel.cpp +6 -5
- package/llama/addon/AddonSampler.cpp +513 -0
- package/llama/addon/AddonSampler.h +65 -0
- package/llama/addon/RingBuffer.h +109 -0
- package/llama/addon/addon.cpp +7 -0
- package/llama/addon/globals/addonLog.cpp +2 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +71 -46
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
- package/bins/linux-arm64/_nlcBuildMetadata.json +0 -1
- package/bins/linux-arm64/libggml.so +0 -0
- package/bins/linux-arm64/libllama.so +0 -0
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +0 -1
- package/bins/linux-armv7l/libggml.so +0 -0
- package/bins/linux-armv7l/libllama.so +0 -0
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +0 -1
- package/bins/linux-x64/libggml.so +0 -0
- package/bins/linux-x64/libllama.so +0 -0
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +0 -1
- package/bins/linux-x64-vulkan/libggml.so +0 -0
- package/bins/linux-x64-vulkan/libllama.so +0 -0
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +0 -1
- package/bins/mac-arm64-metal/ggml-common.h +0 -1833
- package/bins/mac-arm64-metal/ggml-metal.metal +0 -6168
- package/bins/mac-arm64-metal/libggml.dylib +0 -0
- package/bins/mac-arm64-metal/libllama.dylib +0 -0
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +0 -1
- package/bins/mac-x64/libggml.dylib +0 -0
- package/bins/mac-x64/libllama.dylib +0 -0
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +0 -1
- package/bins/win-arm64/ggml.dll +0 -0
- package/bins/win-arm64/llama-addon.exp +0 -0
- package/bins/win-arm64/llama-addon.lib +0 -0
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-arm64/llama.dll +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +0 -1
- package/bins/win-x64/ggml.dll +0 -0
- package/bins/win-x64/llama-addon.exp +0 -0
- package/bins/win-x64/llama-addon.lib +0 -0
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64/llama.dll +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +0 -1
- package/bins/win-x64-vulkan/ggml.dll +0 -0
- package/bins/win-x64-vulkan/llama-addon.exp +0 -0
- package/bins/win-x64-vulkan/llama-addon.lib +0 -0
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/llama.dll +0 -0
- package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
- package/dist/cli/commands/BuildCommand.js.map +0 -1
- package/dist/cli/commands/ClearCommand.js.map +0 -1
- package/dist/cli/commands/DownloadCommand.js.map +0 -1
- package/dist/utils/DeepPartialObject.d.ts +0 -3
- package/dist/utils/DeepPartialObject.js +0 -2
- package/dist/utils/DeepPartialObject.js.map +0 -1
- /package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +0 -0
|
@@ -3,8 +3,16 @@ import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
|
3
3
|
import { compareTokens } from "../../utils/compareTokens.js";
|
|
4
4
|
import { DisposeGuard } from "../../utils/DisposeGuard.js";
|
|
5
5
|
import { TokenMeter } from "../TokenMeter.js";
|
|
6
|
+
import { UnsupportedError } from "../../utils/UnsupportedError.js";
|
|
6
7
|
import { resolveBatchItemsPrioritizationStrategy } from "./utils/resolveBatchItemsPrioritizationStrategy.js";
|
|
8
|
+
import { LlamaSampler } from "./LlamaSampler.js";
|
|
7
9
|
const defaultLoraScale = 1;
|
|
10
|
+
const shrinkRetriesMinContextSize = 4096;
|
|
11
|
+
const defaultMaxPunishTokens = 64;
|
|
12
|
+
const defaultFailedCreationRemedy = {
|
|
13
|
+
retries: 6,
|
|
14
|
+
autoContextSizeShrink: 0.16
|
|
15
|
+
};
|
|
8
16
|
export class LlamaContext {
|
|
9
17
|
/** @internal */ _llama;
|
|
10
18
|
/** @internal */ _ctx;
|
|
@@ -14,6 +22,9 @@ export class LlamaContext {
|
|
|
14
22
|
/** @internal */ _contextSize;
|
|
15
23
|
/** @internal */ _batchSize;
|
|
16
24
|
/** @internal */ _flashAttention;
|
|
25
|
+
/** @internal */ _idealThreads;
|
|
26
|
+
/** @internal */ _minThreads;
|
|
27
|
+
/** @internal */ _performanceTracking;
|
|
17
28
|
/** @internal */ _totalSequences;
|
|
18
29
|
/** @internal */ _unusedSequenceIds = [];
|
|
19
30
|
/** @internal */ _batchingOptions;
|
|
@@ -26,11 +37,13 @@ export class LlamaContext {
|
|
|
26
37
|
/** @internal */ _nextGeneratedSequenceId = 0;
|
|
27
38
|
/** @internal */ _dispatchDecodeScheduled = false;
|
|
28
39
|
/** @internal */ _batchDispatchPending = false;
|
|
40
|
+
/** @internal */ _threadSplitterConsumer;
|
|
41
|
+
/** @internal */ _freeReservedThreadsTimeout;
|
|
29
42
|
/** @internal */ _currentDispatchBatchHandle = {};
|
|
30
43
|
/** @internal */ _allocatedContextSize;
|
|
31
44
|
/** @internal */ _disposed = false;
|
|
32
45
|
onDispose = new EventRelay();
|
|
33
|
-
constructor({ _model }, { sequences,
|
|
46
|
+
constructor({ _model }, { sequences, contextSize, batchSize, flashAttention = _model.defaultContextFlashAttention, threads, batching: { dispatchSchedule: batchingDispatchSchedule = "nextTick", itemPrioritizationStrategy: batchingItemsPrioritizationStrategy = "maximumParallelism" } = {}, performanceTracking = false, _embeddings }) {
|
|
34
47
|
if (_model.disposed)
|
|
35
48
|
throw new DisposedError();
|
|
36
49
|
this._llama = _model._llama;
|
|
@@ -41,15 +54,21 @@ export class LlamaContext {
|
|
|
41
54
|
this._contextSize = Math.max(2, contextSize);
|
|
42
55
|
this._batchSize = Math.max(batchSize, this._totalSequences);
|
|
43
56
|
this._flashAttention = flashAttention;
|
|
57
|
+
this._idealThreads = typeof threads === "number"
|
|
58
|
+
? this._llama._threadsSplitter.normalizeThreadsValue(threads)
|
|
59
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.ideal ?? this._llama.maxThreads);
|
|
60
|
+
this._minThreads = typeof threads === "number"
|
|
61
|
+
? 1
|
|
62
|
+
: this._llama._threadsSplitter.normalizeThreadsValue(threads?.min ?? 1);
|
|
63
|
+
this._performanceTracking = !!performanceTracking;
|
|
44
64
|
this._ctx = new this._llama._bindings.AddonContext(this._model._model, removeNullFields({
|
|
45
|
-
seed: seed != null ? Math.max(-1, Math.floor(seed)) : undefined,
|
|
46
65
|
contextSize: this._contextSize * this._totalSequences, // each sequence needs its own <contextSize> of cells
|
|
47
66
|
batchSize: this._batchSize,
|
|
48
67
|
sequences: this._totalSequences,
|
|
49
68
|
flashAttention: this._flashAttention,
|
|
50
|
-
threads:
|
|
69
|
+
threads: this._idealThreads,
|
|
51
70
|
embeddings: _embeddings,
|
|
52
|
-
|
|
71
|
+
performanceTracking: this._performanceTracking
|
|
53
72
|
}));
|
|
54
73
|
this._batchingOptions = {
|
|
55
74
|
dispatchSchedule: batchingDispatchSchedule,
|
|
@@ -58,6 +77,7 @@ export class LlamaContext {
|
|
|
58
77
|
this._gcRegistry = new FinalizationRegistry(this._model._removeLoraUsage);
|
|
59
78
|
this._gcRegistry.register(this, this._loraAdapters);
|
|
60
79
|
this._reclaimUnusedSequenceId = this._reclaimUnusedSequenceId.bind(this);
|
|
80
|
+
this._freeReservedThreads = this._freeReservedThreads.bind(this);
|
|
61
81
|
this._disposeAggregator.add(() => {
|
|
62
82
|
this._disposed = true;
|
|
63
83
|
});
|
|
@@ -111,6 +131,19 @@ export class LlamaContext {
|
|
|
111
131
|
this._ensureNotDisposed();
|
|
112
132
|
return this._ctx.getStateSize();
|
|
113
133
|
}
|
|
134
|
+
/** The number of threads currently used to evaluate tokens */
|
|
135
|
+
get currentThreads() {
|
|
136
|
+
this._ensureNotDisposed();
|
|
137
|
+
return this._ctx.getThreads();
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* The number of threads that are preferred to be used to evaluate tokens.
|
|
141
|
+
*
|
|
142
|
+
* The actual number of threads used may be lower when other evaluations are running in parallel.
|
|
143
|
+
*/
|
|
144
|
+
get idealThreads() {
|
|
145
|
+
return this._idealThreads;
|
|
146
|
+
}
|
|
114
147
|
getAllocatedContextSize() {
|
|
115
148
|
this._ensureNotDisposed();
|
|
116
149
|
if (this._allocatedContextSize == null)
|
|
@@ -263,13 +296,19 @@ export class LlamaContext {
|
|
|
263
296
|
i--;
|
|
264
297
|
}
|
|
265
298
|
}
|
|
266
|
-
|
|
267
|
-
|
|
299
|
+
if (currentBatchSize !== 0) {
|
|
300
|
+
const [threadsToUse, consumerHandle] = await this._threadSplitterConsumer?.getAllocationToConsume() ?? [];
|
|
301
|
+
try {
|
|
302
|
+
if (threadsToUse != null)
|
|
303
|
+
this._ctx.setThreads(threadsToUse);
|
|
268
304
|
await this._ctx.decodeBatch();
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
305
|
+
consumerHandle?.dispose();
|
|
306
|
+
}
|
|
307
|
+
catch (err) {
|
|
308
|
+
consumerHandle?.dispose();
|
|
309
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(currentQueuedDecodeItems, err);
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
273
312
|
}
|
|
274
313
|
for (const action of afterDecodeActions) {
|
|
275
314
|
const [accept, reject] = action.response;
|
|
@@ -287,36 +326,47 @@ export class LlamaContext {
|
|
|
287
326
|
const prioritizationStrategy = resolvePrioritizationStrategy();
|
|
288
327
|
if (prioritizationStrategy == null)
|
|
289
328
|
return; // all queued items are rejected and dequeued when we get here
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
preventDisposalHandle
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
329
|
+
this._reserveThreads();
|
|
330
|
+
try {
|
|
331
|
+
while (shouldHaveAnotherLoop) {
|
|
332
|
+
const orderedQueuedDecodes = getOrderedQueuedDecodes(prioritizationStrategy);
|
|
333
|
+
if (orderedQueuedDecodes == null)
|
|
334
|
+
return; // all queued items are rejected and dequeued when we get here
|
|
335
|
+
const { currentBatchItems, currentBatchSize } = fitQueuedDecodesToABatch(orderedQueuedDecodes, this._batchSize);
|
|
336
|
+
let preventDisposalHandle;
|
|
337
|
+
try {
|
|
338
|
+
preventDisposalHandle = this._backendContextDisposeGuard.createPreventDisposalHandle();
|
|
339
|
+
}
|
|
340
|
+
catch (err) {
|
|
341
|
+
this._dispatchErrorForQueuedDecodesAndDequeue(new Set(this._queuedDecodes), err);
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
try {
|
|
345
|
+
await decodeTokenBatchItems(currentBatchItems, currentBatchSize);
|
|
346
|
+
shouldHaveAnotherLoop = this._queuedDecodes.length > 0;
|
|
347
|
+
}
|
|
348
|
+
finally {
|
|
349
|
+
preventDisposalHandle.dispose();
|
|
350
|
+
}
|
|
309
351
|
}
|
|
310
352
|
}
|
|
353
|
+
finally {
|
|
354
|
+
this._scheduleToFreeReservedThreads();
|
|
355
|
+
}
|
|
311
356
|
});
|
|
312
357
|
}
|
|
313
358
|
/**
|
|
314
359
|
* Print the timings of token evaluation since that last print for this context.
|
|
360
|
+
*
|
|
361
|
+
* Requires the `performanceTracking` option to be enabled.
|
|
362
|
+
*
|
|
315
363
|
* > **Note:** it prints on the `LlamaLogLevel.info` level, so if you set the level of your `Llama` instance higher than that,
|
|
316
364
|
* it won't print anything.
|
|
317
365
|
*/
|
|
318
366
|
async printTimings() {
|
|
319
367
|
this._ensureNotDisposed();
|
|
368
|
+
if (!this._performanceTracking)
|
|
369
|
+
throw new UnsupportedError("Performance tracking is not enabled");
|
|
320
370
|
this._ctx.printTimings();
|
|
321
371
|
await new Promise((accept) => setTimeout(accept, 0)); // wait for the logs to finish printing
|
|
322
372
|
}
|
|
@@ -350,14 +400,6 @@ export class LlamaContext {
|
|
|
350
400
|
});
|
|
351
401
|
}
|
|
352
402
|
/** @internal */
|
|
353
|
-
_acceptTokenOnGrammarEvaluationState(grammarEvaluationState, token) {
|
|
354
|
-
this._ctx.acceptGrammarEvaluationStateToken(grammarEvaluationState._state, token);
|
|
355
|
-
}
|
|
356
|
-
/** @internal */
|
|
357
|
-
_canBeNextTokenForGrammarEvaluationState(grammarEvaluationState, token) {
|
|
358
|
-
return this._ctx.canBeNextTokenForGrammarEvaluationState(grammarEvaluationState._state, token);
|
|
359
|
-
}
|
|
360
|
-
/** @internal */
|
|
361
403
|
_popSequenceId() {
|
|
362
404
|
if (this._unusedSequenceIds.length > 0)
|
|
363
405
|
return this._unusedSequenceIds.shift();
|
|
@@ -417,6 +459,30 @@ export class LlamaContext {
|
|
|
417
459
|
}
|
|
418
460
|
}
|
|
419
461
|
/** @internal */
|
|
462
|
+
_reserveThreads() {
|
|
463
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
464
|
+
delete this._freeReservedThreadsTimeout;
|
|
465
|
+
if (this._threadSplitterConsumer != null)
|
|
466
|
+
return;
|
|
467
|
+
this._threadSplitterConsumer = this._llama._threadsSplitter.createConsumer(this._idealThreads, this._minThreads);
|
|
468
|
+
}
|
|
469
|
+
/** @internal */
|
|
470
|
+
_freeReservedThreads() {
|
|
471
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
472
|
+
delete this._freeReservedThreadsTimeout;
|
|
473
|
+
if (this._threadSplitterConsumer == null)
|
|
474
|
+
return;
|
|
475
|
+
this._threadSplitterConsumer.dispose();
|
|
476
|
+
delete this._threadSplitterConsumer;
|
|
477
|
+
}
|
|
478
|
+
/** @internal */
|
|
479
|
+
_scheduleToFreeReservedThreads() {
|
|
480
|
+
if (this._threadSplitterConsumer == null)
|
|
481
|
+
return;
|
|
482
|
+
clearTimeout(this._freeReservedThreadsTimeout);
|
|
483
|
+
this._freeReservedThreadsTimeout = setTimeout(this._freeReservedThreads, 0);
|
|
484
|
+
}
|
|
485
|
+
/** @internal */
|
|
420
486
|
static async _create(options, { _model }) {
|
|
421
487
|
const sequences = options.sequences ?? getDefaultContextSequences();
|
|
422
488
|
const flashAttention = _model.flashAttentionSupported
|
|
@@ -425,7 +491,13 @@ export class LlamaContext {
|
|
|
425
491
|
const loraOptions = typeof options.lora === "string"
|
|
426
492
|
? { adapters: [{ filePath: options.lora }] }
|
|
427
493
|
: options.lora;
|
|
428
|
-
|
|
494
|
+
let failedCreationRetries = options.failedCreationRemedy === false
|
|
495
|
+
? 0
|
|
496
|
+
: Math.max(0, options.failedCreationRemedy?.retries ?? defaultFailedCreationRemedy.retries);
|
|
497
|
+
const failedCreationAutoContextSizeShrink = options.failedCreationRemedy === false
|
|
498
|
+
? 0
|
|
499
|
+
: options.failedCreationRemedy?.autoContextSizeShrink ?? defaultFailedCreationRemedy.autoContextSizeShrink;
|
|
500
|
+
let contextSize = await _model.fileInsights.configurationResolver.resolveContextContextSize(options.contextSize, {
|
|
429
501
|
batchSize: options.batchSize,
|
|
430
502
|
sequences: sequences,
|
|
431
503
|
modelGpuLayers: _model.gpuLayers,
|
|
@@ -436,69 +508,101 @@ export class LlamaContext {
|
|
|
436
508
|
ignoreMemorySafetyChecks: options.ignoreMemorySafetyChecks,
|
|
437
509
|
isEmbeddingContext: options._embeddings
|
|
438
510
|
});
|
|
439
|
-
const
|
|
440
|
-
|
|
441
|
-
contextSize
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
flashAttention
|
|
447
|
-
}).gpuVram;
|
|
448
|
-
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
|
|
511
|
+
const minContextSize = options.contextSize === "auto"
|
|
512
|
+
? shrinkRetriesMinContextSize
|
|
513
|
+
: (typeof options.contextSize === "object" && typeof options.contextSize.min === "number")
|
|
514
|
+
? options.contextSize.min
|
|
515
|
+
: typeof options.contextSize === "number"
|
|
516
|
+
? options.contextSize
|
|
517
|
+
: shrinkRetriesMinContextSize;
|
|
449
518
|
const { createSignal } = options;
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
519
|
+
async function createContext(contextSize) {
|
|
520
|
+
const batchSize = options.batchSize ?? getDefaultContextBatchSize({ contextSize, sequences });
|
|
521
|
+
const vramRequiredEstimate = _model.fileInsights.estimateContextResourceRequirements({
|
|
522
|
+
contextSize,
|
|
523
|
+
sequences,
|
|
524
|
+
isEmbeddingContext: options._embeddings,
|
|
525
|
+
modelGpuLayers: _model.gpuLayers,
|
|
526
|
+
batchSize,
|
|
527
|
+
flashAttention
|
|
528
|
+
}).gpuVram;
|
|
529
|
+
const context = new LlamaContext({ _model }, { ...options, contextSize, batchSize, sequences, flashAttention });
|
|
530
|
+
const contextCreationMemoryReservation = options.ignoreMemorySafetyChecks
|
|
531
|
+
? null
|
|
532
|
+
: _model._llama._vramOrchestrator.reserveMemory(vramRequiredEstimate);
|
|
533
|
+
try {
|
|
534
|
+
if (createSignal?.aborted)
|
|
535
|
+
throw createSignal.reason;
|
|
536
|
+
const contextLoaded = await context._ctx.init();
|
|
537
|
+
if (createSignal?.aborted) {
|
|
538
|
+
if (contextLoaded)
|
|
539
|
+
await context._ctx.dispose();
|
|
540
|
+
throw createSignal.reason;
|
|
541
|
+
}
|
|
542
|
+
else if (!contextLoaded)
|
|
543
|
+
throw new Error("Failed to create context");
|
|
544
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
545
|
+
if (loraOptions != null && loraOptions.adapters.length > 0) {
|
|
546
|
+
let loadedAdapters = 0;
|
|
547
|
+
for (const adapter of loraOptions.adapters) {
|
|
472
548
|
try {
|
|
473
|
-
|
|
549
|
+
await context._setLora({
|
|
550
|
+
filePath: adapter.filePath,
|
|
551
|
+
scale: adapter.scale
|
|
552
|
+
});
|
|
553
|
+
loadedAdapters++;
|
|
554
|
+
try {
|
|
555
|
+
loraOptions.onLoadProgress?.(loadedAdapters / loraOptions.adapters.length);
|
|
556
|
+
}
|
|
557
|
+
catch (err) {
|
|
558
|
+
console.error(err);
|
|
559
|
+
}
|
|
474
560
|
}
|
|
475
561
|
catch (err) {
|
|
476
|
-
|
|
562
|
+
await context.dispose();
|
|
563
|
+
throw err;
|
|
564
|
+
}
|
|
565
|
+
if (createSignal?.aborted) {
|
|
566
|
+
await context.dispose();
|
|
567
|
+
throw createSignal.reason;
|
|
477
568
|
}
|
|
478
569
|
}
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
570
|
+
}
|
|
571
|
+
else if (loraOptions?.onLoadProgress != null) {
|
|
572
|
+
try {
|
|
573
|
+
loraOptions.onLoadProgress(1);
|
|
482
574
|
}
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
throw createSignal.reason;
|
|
575
|
+
catch (err) {
|
|
576
|
+
console.error(err);
|
|
486
577
|
}
|
|
487
578
|
}
|
|
579
|
+
return context;
|
|
488
580
|
}
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
loraOptions.onLoadProgress(1);
|
|
492
|
-
}
|
|
493
|
-
catch (err) {
|
|
494
|
-
console.error(err);
|
|
495
|
-
}
|
|
581
|
+
finally {
|
|
582
|
+
contextCreationMemoryReservation?.dispose?.();
|
|
496
583
|
}
|
|
497
|
-
return context;
|
|
498
584
|
}
|
|
499
|
-
|
|
500
|
-
|
|
585
|
+
while (failedCreationRetries >= 0) {
|
|
586
|
+
try {
|
|
587
|
+
return await createContext(contextSize);
|
|
588
|
+
}
|
|
589
|
+
catch (err) {
|
|
590
|
+
if (failedCreationRetries === 0 || (createSignal?.aborted && err === createSignal.reason))
|
|
591
|
+
throw err;
|
|
592
|
+
failedCreationRetries--;
|
|
593
|
+
let newContextSize = typeof failedCreationAutoContextSizeShrink === "number"
|
|
594
|
+
? Math.floor(contextSize * (1 - failedCreationAutoContextSizeShrink))
|
|
595
|
+
: Math.floor(failedCreationAutoContextSizeShrink(contextSize));
|
|
596
|
+
if (!Number.isFinite(newContextSize))
|
|
597
|
+
throw err;
|
|
598
|
+
if (newContextSize < minContextSize)
|
|
599
|
+
newContextSize = minContextSize;
|
|
600
|
+
if (newContextSize >= contextSize)
|
|
601
|
+
throw err;
|
|
602
|
+
contextSize = newContextSize;
|
|
603
|
+
}
|
|
501
604
|
}
|
|
605
|
+
throw new Error("Failed to create context");
|
|
502
606
|
}
|
|
503
607
|
}
|
|
504
608
|
export class LlamaContextSequence {
|
|
@@ -639,12 +743,13 @@ export class LlamaContextSequence {
|
|
|
639
743
|
});
|
|
640
744
|
}
|
|
641
745
|
evaluate(tokens, options = {}) {
|
|
642
|
-
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
746
|
+
const { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, contextShift: { size: contextShiftSize = this._contextShift.size, strategy: contextShiftStrategy = this._contextShift.strategy } = {}, yieldEogToken = false, _noSampling = false } = options;
|
|
643
747
|
return this._evaluate(tokens, {
|
|
644
748
|
temperature,
|
|
645
749
|
minP,
|
|
646
750
|
topK,
|
|
647
751
|
topP,
|
|
752
|
+
seed,
|
|
648
753
|
grammarEvaluationState,
|
|
649
754
|
repeatPenalty,
|
|
650
755
|
tokenBias,
|
|
@@ -677,53 +782,71 @@ export class LlamaContextSequence {
|
|
|
677
782
|
}
|
|
678
783
|
}
|
|
679
784
|
/** @internal */
|
|
680
|
-
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
785
|
+
async *_evaluate(tokens, { temperature = 0, minP = 0, topK = 40, topP = 0.95, seed, grammarEvaluationState, repeatPenalty, tokenBias, evaluationPriority = 5, generateNewTokens = true, contextShiftOptions, yieldEogToken = false, _noSampling = false }) {
|
|
681
786
|
this._ensureNotDisposed();
|
|
682
787
|
let evalTokens = tokens;
|
|
683
788
|
if (evalTokens.length === 0)
|
|
684
789
|
return;
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
:
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
790
|
+
const sampler = new LlamaSampler(this.model);
|
|
791
|
+
try {
|
|
792
|
+
while (true) {
|
|
793
|
+
this._ensureNotDisposed();
|
|
794
|
+
// Evaluate to get the next token.
|
|
795
|
+
const nextToken = await this._decodeTokens(evalTokens, generateNewTokens, evaluationPriority, this._tokenMeter, contextShiftOptions, (batchLogitIndex) => {
|
|
796
|
+
if (_noSampling)
|
|
797
|
+
return null;
|
|
798
|
+
const repeatPenaltyTokens = repeatPenalty?.punishTokens instanceof Function
|
|
799
|
+
? repeatPenalty.punishTokens()
|
|
800
|
+
: repeatPenalty?.punishTokens;
|
|
801
|
+
const maxPunishTokens = Math.max(repeatPenalty?.maxPunishTokens ?? defaultMaxPunishTokens, repeatPenaltyTokens?.length ?? 0);
|
|
802
|
+
const resolvedGrammarEvaluationState = grammarEvaluationState instanceof Function
|
|
803
|
+
? grammarEvaluationState()
|
|
804
|
+
: grammarEvaluationState;
|
|
805
|
+
if (resolvedGrammarEvaluationState != null && resolvedGrammarEvaluationState._llama !== this.model._llama)
|
|
806
|
+
throw new Error("The LlamaGrammar used by passed to this function was created with a different Llama instance than the one used by this sequence's model. Make sure you use the same Llama instance for both the model and the grammar.");
|
|
807
|
+
const { tokenBiasKeys, tokenBiasValues } = getTokenBiasesForAddon(tokenBias, this.model);
|
|
808
|
+
sampler.applyConfig(removeNullFields({
|
|
809
|
+
temperature,
|
|
810
|
+
minP,
|
|
811
|
+
topK,
|
|
812
|
+
topP,
|
|
813
|
+
seed: Math.max(0, Number.isFinite(seed)
|
|
814
|
+
? Math.floor(seed ?? (Date.now() / 1000))
|
|
815
|
+
: Math.floor(Date.now() / 1000)),
|
|
816
|
+
repeatPenalty: repeatPenalty?.penalty,
|
|
817
|
+
repeatPenaltyMaxTokens: maxPunishTokens,
|
|
818
|
+
repeatPenaltyTokens: repeatPenaltyTokens != null
|
|
819
|
+
? Uint32Array.from(repeatPenaltyTokens)
|
|
820
|
+
: undefined,
|
|
821
|
+
repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
|
|
822
|
+
repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
|
|
823
|
+
tokenBiasKeys,
|
|
824
|
+
tokenBiasValues,
|
|
825
|
+
grammarEvaluationState: resolvedGrammarEvaluationState?._state
|
|
826
|
+
}));
|
|
827
|
+
return withLock(sampler, "sample", async () => {
|
|
828
|
+
if (sampler.disposed)
|
|
829
|
+
return null;
|
|
830
|
+
return this._context._ctx.sampleToken(batchLogitIndex, sampler._sampler);
|
|
831
|
+
});
|
|
832
|
+
});
|
|
833
|
+
if (nextToken === -1)
|
|
834
|
+
throw new Error("Failed to sample next token");
|
|
835
|
+
if (nextToken == null)
|
|
836
|
+
return;
|
|
837
|
+
// the model finished generating text
|
|
838
|
+
if (!yieldEogToken && this._context.model.isEogToken(nextToken))
|
|
839
|
+
break;
|
|
840
|
+
const replacementToken = (yield nextToken);
|
|
841
|
+
// set the tokens for the next evaluation
|
|
842
|
+
if (replacementToken != null)
|
|
843
|
+
evalTokens = [replacementToken];
|
|
844
|
+
else
|
|
845
|
+
evalTokens = [nextToken];
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
finally {
|
|
849
|
+
void withLock(sampler, "sample", sampler.asyncDispose);
|
|
727
850
|
}
|
|
728
851
|
}
|
|
729
852
|
/** @internal */
|
|
@@ -814,7 +937,7 @@ function getTokenBiasesForAddon(tokenBias, currentModel) {
|
|
|
814
937
|
};
|
|
815
938
|
if (tokenBias instanceof Function)
|
|
816
939
|
tokenBias = tokenBias();
|
|
817
|
-
if (tokenBias.
|
|
940
|
+
if (tokenBias._tokenizer !== currentModel.tokenizer)
|
|
818
941
|
throw new Error("This TokenBias instance was created with a different model than the one used by this context. " +
|
|
819
942
|
"Make sure you use the model instance of the context sequence for the TokenBias you use it with.");
|
|
820
943
|
const tokenBiasKeys = [];
|