node-llama-cpp 3.0.0-beta.1 → 3.0.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/ChatWrapper.d.ts +49 -0
- package/dist/ChatWrapper.js +120 -0
- package/dist/ChatWrapper.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +14 -4
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +23 -0
- package/dist/bindings/Llama.js +225 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +103 -0
- package/dist/bindings/getLlama.js +228 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +33 -0
- package/dist/bindings/types.js +30 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +155 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +12 -0
- package/dist/bindings/utils/compileLLamaCpp.js +157 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +85 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +28 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.d.ts +26 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js +43 -0
- package/dist/bindings/utils/resolveChatWrapperBasedOnWrapperTypeName.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +18 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
- package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
- package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
- package/dist/chatWrappers/FalconChatWrapper.js +104 -0
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
- package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
- package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
- package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +5 -5
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +28 -17
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -1
- package/dist/cli/cli.js +8 -0
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +2 -1
- package/dist/cli/commands/BuildCommand.js +50 -10
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +11 -4
- package/dist/cli/commands/ChatCommand.js +138 -64
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.js +4 -6
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +25 -0
- package/dist/cli/commands/CompleteCommand.js +278 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +59 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +2 -1
- package/dist/cli/commands/DownloadCommand.js +47 -40
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +27 -0
- package/dist/cli/commands/InfillCommand.js +316 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +7 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/config.d.ts +10 -3
- package/dist/config.js +18 -7
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +1 -0
- package/dist/consts.js +2 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +154 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +684 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +22 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js +121 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +135 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator/LlamaChatSession}/LlamaChatSession.d.ts +59 -25
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +219 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +148 -0
- package/dist/evaluator/LlamaCompletion.js +402 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +20 -23
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +77 -107
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.d.ts +6 -14
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +37 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +78 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/evaluator/LlamaGrammar.d.ts +30 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +14 -18
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +4 -2
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaModel.d.ts +24 -6
- package/dist/{llamaEvaluator → evaluator}/LlamaModel.js +51 -12
- package/dist/evaluator/LlamaModel.js.map +1 -0
- package/dist/index.d.ts +30 -17
- package/dist/index.js +29 -15
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +72 -3
- package/dist/types.js +5 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +42 -0
- package/dist/utils/LlamaText.js +207 -0
- package/dist/utils/LlamaText.js.map +1 -0
- package/dist/utils/StopGenerationDetector.d.ts +28 -0
- package/dist/utils/StopGenerationDetector.js +205 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +30 -0
- package/dist/utils/TokenStreamRegulator.js +96 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -0
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
- package/dist/utils/appendUserMessageToChatHistory.js +18 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
- package/dist/utils/cmake.js +16 -11
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +2 -0
- package/dist/utils/compareTokens.js +4 -0
- package/dist/utils/compareTokens.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
- package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
- package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
- package/dist/utils/gbnfJson/types.d.ts +1 -1
- package/dist/utils/gbnfJson/types.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +9 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gitReleaseBundles.js +6 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/parseModelTypeDescription.d.ts +1 -1
- package/dist/utils/prettyPrintObject.d.ts +1 -0
- package/dist/utils/prettyPrintObject.js +40 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +1 -0
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveChatWrapper.d.ts +4 -0
- package/dist/utils/resolveChatWrapper.js +16 -0
- package/dist/utils/resolveChatWrapper.js.map +1 -0
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +1 -1
- package/dist/utils/spawnCommand.js +4 -2
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +9 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
- package/dist/utils/truncateTextAndRoundToWords.js +27 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.js +11 -1
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +11 -8
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +87 -2
- package/llama/addon.cpp +319 -31
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +99 -0
- package/llama/gpuInfo/cuda-gpu-info.h +7 -0
- package/llama/gpuInfo/metal-gpu-info.h +5 -0
- package/llama/gpuInfo/metal-gpu-info.mm +17 -0
- package/llama/llama.cpp.info.json +4 -0
- package/llamaBins/linux-arm64/.buildMetadata.json +1 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/.buildMetadata.json +1 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/.buildMetadata.json +1 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/.buildMetadata.json +1 -0
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/.buildMetadata.json +1 -0
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +6491 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/.buildMetadata.json +1 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/.buildMetadata.json +1 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/.buildMetadata.json +1 -0
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/package.json +37 -15
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/ChatPromptWrapper.d.ts +0 -11
- package/dist/ChatPromptWrapper.js +0 -20
- package/dist/ChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
- package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
- package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
- package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
- package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
- package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
- package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
- package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
- package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
- package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
- package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
- package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -15
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -39
- package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -19
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession.js +0 -290
- package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.d.ts +0 -32
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -127
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/getTextCompletion.d.ts +0 -3
- package/dist/utils/getTextCompletion.js +0 -12
- package/dist/utils/getTextCompletion.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/usedBin.json +0 -3
- package/llamaBins/mac-arm64/ggml-metal.metal +0 -2929
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/ggml-metal.metal +0 -2929
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
import { DisposeAggregator, DisposedError, EventRelay, withLock } from "lifecycle-utils";
|
|
2
|
+
import { tokenizeInput } from "../utils/tokenizeInput.js";
|
|
3
|
+
import { UnsupportedError } from "../utils/UnsupportedError.js";
|
|
4
|
+
import { removeNullFields } from "../utils/removeNullFields.js";
|
|
5
|
+
import { TokenStreamRegulator } from "../utils/TokenStreamRegulator.js";
|
|
6
|
+
import { StopGenerationDetector } from "../utils/StopGenerationDetector.js";
|
|
7
|
+
import { UNKNOWN_UNICODE_CHAR } from "../consts.js";
|
|
8
|
+
import { getQueuedTokensBeforeStopTrigger } from "../utils/getQueuedTokensBeforeStopTrigger.js";
|
|
9
|
+
import { LlamaGrammarEvaluationState } from "./LlamaGrammarEvaluationState.js";
|
|
10
|
+
const defaultContextShiftSize = ((sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)));
|
|
11
|
+
const defaultMinPrefixKeepTokens = ((sequence) => Math.max(1, Math.floor(sequence.context.contextSize / 10)));
|
|
12
|
+
export class LlamaCompletion {
|
|
13
|
+
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
14
|
+
/** @internal */ _autoDisposeSequence;
|
|
15
|
+
/** @internal */ _sequence;
|
|
16
|
+
onDispose = new EventRelay();
|
|
17
|
+
constructor({ contextSequence, autoDisposeSequence = true }) {
|
|
18
|
+
this._sequence = contextSequence;
|
|
19
|
+
this._autoDisposeSequence = autoDisposeSequence;
|
|
20
|
+
this._disposeAggregator.add(this._sequence.onDispose.createListener(() => {
|
|
21
|
+
this.dispose();
|
|
22
|
+
}));
|
|
23
|
+
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
24
|
+
}
|
|
25
|
+
dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
|
|
26
|
+
if (this._sequence == null || this.disposed)
|
|
27
|
+
return;
|
|
28
|
+
if (disposeSequence)
|
|
29
|
+
this._sequence.dispose();
|
|
30
|
+
this._sequence = null;
|
|
31
|
+
this._disposeAggregator.dispose();
|
|
32
|
+
}
|
|
33
|
+
/** @hidden */
|
|
34
|
+
[Symbol.dispose]() {
|
|
35
|
+
return this.dispose();
|
|
36
|
+
}
|
|
37
|
+
get disposed() {
|
|
38
|
+
return this._sequence == null || this._sequence.disposed;
|
|
39
|
+
}
|
|
40
|
+
get infillSupported() {
|
|
41
|
+
if (this._sequence == null)
|
|
42
|
+
throw new DisposedError();
|
|
43
|
+
return this._sequence.model.tokens.infill.prefix != null &&
|
|
44
|
+
this._sequence.model.tokens.infill.suffix != null &&
|
|
45
|
+
this._sequence.model.tokens.infill.middle != null;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Generate a completion for an input.
|
|
49
|
+
*/
|
|
50
|
+
async generateCompletion(input, options = {}) {
|
|
51
|
+
const { response } = await this.generateCompletionWithMeta(input, options);
|
|
52
|
+
return response;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Same as `generateCompletion`, but returns additional metadata about the generation.
|
|
56
|
+
* See `generateCompletion` for more information.
|
|
57
|
+
*/
|
|
58
|
+
async generateCompletionWithMeta(input, { onToken, signal, maxTokens, temperature, minP, topK, topP, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, grammar, stopGenerationTriggers, contextShiftSize = defaultContextShiftSize, disableContextShift } = {}) {
|
|
59
|
+
if (this._sequence == null || this.disposed)
|
|
60
|
+
throw new DisposedError();
|
|
61
|
+
const bosToken = this._sequence.model.tokens.bos;
|
|
62
|
+
const shouldPrependBosToken = this._sequence.model.tokens.shouldPrependBosToken;
|
|
63
|
+
async function fitInputIntoContext({ maxTokens, tokens }) {
|
|
64
|
+
const res = [];
|
|
65
|
+
if (shouldPrependBosToken && bosToken != null)
|
|
66
|
+
res.push(bosToken);
|
|
67
|
+
const inputTokensSize = Math.max(0, Math.min(maxTokens - res.length, tokens.length));
|
|
68
|
+
if (inputTokensSize === 0 && tokens.length > 0)
|
|
69
|
+
throw new Error("The context size is too small to generate a response for the given input");
|
|
70
|
+
const slicedTokens = tokens.slice(-inputTokensSize);
|
|
71
|
+
res.push(...slicedTokens);
|
|
72
|
+
return res;
|
|
73
|
+
}
|
|
74
|
+
const ensureNotAborted = () => {
|
|
75
|
+
if (signal?.aborted)
|
|
76
|
+
throw signal.reason;
|
|
77
|
+
if (this.disposed)
|
|
78
|
+
throw new DisposedError();
|
|
79
|
+
};
|
|
80
|
+
return await withLock(this, "generateCompletion", signal, async () => {
|
|
81
|
+
ensureNotAborted();
|
|
82
|
+
if (this._sequence == null || this.disposed)
|
|
83
|
+
throw new DisposedError();
|
|
84
|
+
const resolvedInput = tokenizeInput(input, this._sequence.model.tokenize);
|
|
85
|
+
const resolvedContextShiftSize = await resolveContextShiftSize(contextShiftSize, this._sequence);
|
|
86
|
+
ensureNotAborted();
|
|
87
|
+
const inputTokens = await fitInputIntoContext({
|
|
88
|
+
maxTokens: this._sequence.context.contextSize - resolvedContextShiftSize,
|
|
89
|
+
tokens: resolvedInput
|
|
90
|
+
});
|
|
91
|
+
ensureNotAborted();
|
|
92
|
+
const resolvedMaxTokens = !disableContextShift
|
|
93
|
+
? maxTokens
|
|
94
|
+
: (maxTokens != null && maxTokens > 0)
|
|
95
|
+
? Math.min(maxTokens, this._sequence.context.contextSize - inputTokens.length)
|
|
96
|
+
: this._sequence.context.contextSize - inputTokens.length;
|
|
97
|
+
return await this._generateResponse(inputTokens, {
|
|
98
|
+
onToken,
|
|
99
|
+
signal,
|
|
100
|
+
maxTokens: resolvedMaxTokens,
|
|
101
|
+
temperature,
|
|
102
|
+
minP,
|
|
103
|
+
topK,
|
|
104
|
+
topP,
|
|
105
|
+
trimWhitespaceSuffix,
|
|
106
|
+
repeatPenalty,
|
|
107
|
+
evaluationPriority,
|
|
108
|
+
grammar,
|
|
109
|
+
contextShiftSize,
|
|
110
|
+
stopGenerationTriggers
|
|
111
|
+
}, {
|
|
112
|
+
async contextShift({ shiftSize, res, pendingTokens, sequence }) {
|
|
113
|
+
return {
|
|
114
|
+
newContextState: await fitInputIntoContext({
|
|
115
|
+
maxTokens: sequence.context.contextSize - shiftSize,
|
|
116
|
+
tokens: [...resolvedInput, ...res, ...pendingTokens]
|
|
117
|
+
})
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Infill (also known as Fill-In-Middle), generates a completion for an input (`prefixInput`) that
|
|
125
|
+
* should connect to a given continuation (`suffixInput`).
|
|
126
|
+
* For example, for `prefixInput: "123"` and `suffixInput: "789"`, the model is expected to generate `456`
|
|
127
|
+
* to make the final text be `123456789`.
|
|
128
|
+
*/
|
|
129
|
+
async generateInfillCompletion(prefixInput, suffixInput, options = {}) {
|
|
130
|
+
const { response } = await this.generateInfillCompletionWithMeta(prefixInput, suffixInput, options);
|
|
131
|
+
return response;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Same as `generateInfillCompletion`, but returns additional metadata about the generation.
|
|
135
|
+
* See `generateInfillCompletion` for more information.
|
|
136
|
+
*/
|
|
137
|
+
async generateInfillCompletionWithMeta(prefixInput, suffixInput, { onToken, signal, maxTokens, temperature, minP, topK, topP, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, grammar, contextShiftSize = defaultContextShiftSize, stopGenerationTriggers, minPrefixKeepTokens = defaultMinPrefixKeepTokens, disableContextShift = false } = {}) {
|
|
138
|
+
if (this._sequence == null || this.disposed)
|
|
139
|
+
throw new DisposedError();
|
|
140
|
+
const prefixToken = this._sequence.model.tokens.infill.prefix;
|
|
141
|
+
const suffixToken = this._sequence.model.tokens.infill.suffix;
|
|
142
|
+
const middleToken = this._sequence.model.tokens.infill.middle;
|
|
143
|
+
const bosToken = this._sequence.model.tokens.bos;
|
|
144
|
+
const shouldPrependBosToken = this._sequence.model.tokens.shouldPrependBosToken;
|
|
145
|
+
if (prefixToken == null || suffixToken == null || middleToken == null)
|
|
146
|
+
throw new UnsupportedError("Infill completions are not supported by this model");
|
|
147
|
+
async function fitInputIntoContext({ maxTokens, prefixTokens, suffixTokens, sequence }) {
|
|
148
|
+
if (prefixToken == null || suffixToken == null || middleToken == null)
|
|
149
|
+
throw new UnsupportedError("Infill completions are not supported by this model");
|
|
150
|
+
// 3 - InfillPrefix token, InfillSuffix token, InfillMiddle token
|
|
151
|
+
const specialTokensInContext = 3 + ((shouldPrependBosToken && bosToken != null)
|
|
152
|
+
? 1
|
|
153
|
+
: 0);
|
|
154
|
+
const resolvedMaxTokens = maxTokens - specialTokensInContext;
|
|
155
|
+
let sizeLeftToFill = resolvedMaxTokens;
|
|
156
|
+
let suffixTokensSize = Math.min(sizeLeftToFill, suffixTokens.length);
|
|
157
|
+
sizeLeftToFill -= suffixTokensSize;
|
|
158
|
+
let prefixTokensSize = Math.min(sizeLeftToFill, prefixTokens.length);
|
|
159
|
+
sizeLeftToFill -= prefixTokensSize;
|
|
160
|
+
if (sizeLeftToFill <= 0 && disableContextShift)
|
|
161
|
+
throw new Error("The context size is too small to generate a response for the given input, and context shift is disabled. " +
|
|
162
|
+
"Consider removing `disableContextShift` or reducing the input size.");
|
|
163
|
+
const resolvedMinPrefixKeepTokens = Math.min(Math.min(resolvedMaxTokens, prefixTokens.length), Math.max(1, Math.floor(minPrefixKeepTokens instanceof Function
|
|
164
|
+
? await minPrefixKeepTokens(sequence)
|
|
165
|
+
: minPrefixKeepTokens)));
|
|
166
|
+
if (prefixTokensSize < resolvedMinPrefixKeepTokens) {
|
|
167
|
+
const diffToFill = Math.min(suffixTokensSize, resolvedMinPrefixKeepTokens - prefixTokensSize);
|
|
168
|
+
prefixTokensSize += diffToFill;
|
|
169
|
+
suffixTokensSize -= diffToFill;
|
|
170
|
+
}
|
|
171
|
+
const resolvedPrefixTokens = prefixTokens.slice(-prefixTokensSize);
|
|
172
|
+
const resolvedSuffixTokens = suffixTokens.slice(0, suffixTokensSize);
|
|
173
|
+
const newContextState = [];
|
|
174
|
+
if (shouldPrependBosToken && bosToken != null)
|
|
175
|
+
newContextState.push(bosToken);
|
|
176
|
+
newContextState.push(prefixToken);
|
|
177
|
+
newContextState.push(...resolvedPrefixTokens);
|
|
178
|
+
newContextState.push(suffixToken);
|
|
179
|
+
newContextState.push(...resolvedSuffixTokens);
|
|
180
|
+
newContextState.push(middleToken);
|
|
181
|
+
return newContextState;
|
|
182
|
+
}
|
|
183
|
+
const ensureNotAborted = () => {
|
|
184
|
+
if (signal?.aborted)
|
|
185
|
+
throw signal.reason;
|
|
186
|
+
if (this.disposed)
|
|
187
|
+
throw new DisposedError();
|
|
188
|
+
};
|
|
189
|
+
return await withLock(this, "generateCompletion", signal, async () => {
|
|
190
|
+
ensureNotAborted();
|
|
191
|
+
if (this._sequence == null || this.disposed)
|
|
192
|
+
throw new DisposedError();
|
|
193
|
+
const resolvedPrefixInputTokens = tokenizeInput(prefixInput, this._sequence.model.tokenize);
|
|
194
|
+
const resolvedSuffixInputTokens = tokenizeInput(suffixInput, this._sequence.model.tokenize);
|
|
195
|
+
const resolvedContextShiftSize = await resolveContextShiftSize(contextShiftSize, this._sequence);
|
|
196
|
+
ensureNotAborted();
|
|
197
|
+
const inputTokens = await fitInputIntoContext({
|
|
198
|
+
maxTokens: this._sequence.context.contextSize - resolvedContextShiftSize,
|
|
199
|
+
prefixTokens: resolvedPrefixInputTokens,
|
|
200
|
+
suffixTokens: resolvedSuffixInputTokens,
|
|
201
|
+
sequence: this._sequence
|
|
202
|
+
});
|
|
203
|
+
ensureNotAborted();
|
|
204
|
+
const resolvedMaxTokens = !disableContextShift
|
|
205
|
+
? maxTokens
|
|
206
|
+
: (maxTokens != null && maxTokens > 0)
|
|
207
|
+
? Math.min(maxTokens, this._sequence.context.contextSize - inputTokens.length)
|
|
208
|
+
: this._sequence.context.contextSize - inputTokens.length;
|
|
209
|
+
return await this._generateResponse(inputTokens, {
|
|
210
|
+
onToken,
|
|
211
|
+
signal,
|
|
212
|
+
maxTokens: resolvedMaxTokens,
|
|
213
|
+
temperature,
|
|
214
|
+
minP,
|
|
215
|
+
topK,
|
|
216
|
+
topP,
|
|
217
|
+
trimWhitespaceSuffix,
|
|
218
|
+
repeatPenalty,
|
|
219
|
+
evaluationPriority,
|
|
220
|
+
grammar,
|
|
221
|
+
contextShiftSize,
|
|
222
|
+
stopGenerationTriggers
|
|
223
|
+
}, {
|
|
224
|
+
async contextShift({ shiftSize, res, pendingTokens, sequence }) {
|
|
225
|
+
return {
|
|
226
|
+
newContextState: await fitInputIntoContext({
|
|
227
|
+
maxTokens: sequence.context.contextSize - shiftSize,
|
|
228
|
+
prefixTokens: [...resolvedPrefixInputTokens, ...res, ...pendingTokens],
|
|
229
|
+
suffixTokens: resolvedSuffixInputTokens,
|
|
230
|
+
sequence
|
|
231
|
+
})
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
/** @internal */
|
|
238
|
+
async _generateResponse(tokens, { onToken, signal, maxTokens, temperature, minP, topK, topP, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, grammar, contextShiftSize = defaultContextShiftSize, stopGenerationTriggers }, { contextShift }) {
|
|
239
|
+
if (this._sequence == null)
|
|
240
|
+
throw new DisposedError();
|
|
241
|
+
const sequence = this._sequence;
|
|
242
|
+
const model = sequence.model;
|
|
243
|
+
const context = sequence.context;
|
|
244
|
+
const eosToken = model.tokens.eos;
|
|
245
|
+
const eotToken = model.tokens.infill.eot;
|
|
246
|
+
const res = [];
|
|
247
|
+
const pendingTokens = [];
|
|
248
|
+
const grammarEvaluationState = grammar != null
|
|
249
|
+
? new LlamaGrammarEvaluationState({ grammar })
|
|
250
|
+
: undefined;
|
|
251
|
+
const { lastTokens: repeatPenaltyLastTokens = 64, punishTokensFilter, penalizeNewLine, penalty, frequencyPenalty, presencePenalty } = repeatPenalty === false
|
|
252
|
+
? { lastTokens: 0 }
|
|
253
|
+
: repeatPenalty;
|
|
254
|
+
const streamRegulator = new TokenStreamRegulator();
|
|
255
|
+
const stopGenerationDetector = new StopGenerationDetector();
|
|
256
|
+
const locksToReleaseOnValidGeneration = [];
|
|
257
|
+
const repeatPenaltyEnabled = repeatPenaltyLastTokens > 0;
|
|
258
|
+
let inputTokens = tokens;
|
|
259
|
+
let generatedTokens = 0;
|
|
260
|
+
if (grammar != null)
|
|
261
|
+
StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenize)
|
|
262
|
+
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
263
|
+
if (stopGenerationTriggers != null)
|
|
264
|
+
StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenize)
|
|
265
|
+
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
266
|
+
const ensureNotAborted = () => {
|
|
267
|
+
if (signal?.aborted)
|
|
268
|
+
throw signal.reason;
|
|
269
|
+
if (this.disposed)
|
|
270
|
+
throw new DisposedError();
|
|
271
|
+
};
|
|
272
|
+
const getPenaltyTokens = () => {
|
|
273
|
+
if (this._sequence == null)
|
|
274
|
+
throw new DisposedError();
|
|
275
|
+
let punishTokens = res.slice(-repeatPenaltyLastTokens);
|
|
276
|
+
if (punishTokensFilter != null)
|
|
277
|
+
punishTokens = punishTokensFilter(punishTokens);
|
|
278
|
+
if (penalizeNewLine == null || !penalizeNewLine) {
|
|
279
|
+
const nlToken = model.tokens.nl;
|
|
280
|
+
if (nlToken != null)
|
|
281
|
+
punishTokens = punishTokens.filter(token => token !== nlToken);
|
|
282
|
+
}
|
|
283
|
+
return punishTokens;
|
|
284
|
+
};
|
|
285
|
+
// eslint-disable-next-line no-constant-condition
|
|
286
|
+
while (true) {
|
|
287
|
+
ensureNotAborted();
|
|
288
|
+
let shouldContextShift = false;
|
|
289
|
+
let { firstDifferentIndex } = sequence.compareContextTokens(inputTokens);
|
|
290
|
+
// we need to decode at least one token to generate a response
|
|
291
|
+
if (firstDifferentIndex === inputTokens.length && firstDifferentIndex > 0)
|
|
292
|
+
firstDifferentIndex -= 1;
|
|
293
|
+
inputTokens.splice(0, firstDifferentIndex);
|
|
294
|
+
if (firstDifferentIndex < sequence.nextTokenIndex) {
|
|
295
|
+
await sequence.eraseContextTokenRanges([{
|
|
296
|
+
start: firstDifferentIndex,
|
|
297
|
+
end: sequence.nextTokenIndex
|
|
298
|
+
}]);
|
|
299
|
+
ensureNotAborted();
|
|
300
|
+
}
|
|
301
|
+
const evaluationIterator = sequence.evaluate(inputTokens, removeNullFields({
|
|
302
|
+
temperature, minP, topK, topP,
|
|
303
|
+
grammarEvaluationState,
|
|
304
|
+
repeatPenalty: !repeatPenaltyEnabled ? undefined : {
|
|
305
|
+
punishTokens: getPenaltyTokens,
|
|
306
|
+
penalty,
|
|
307
|
+
frequencyPenalty,
|
|
308
|
+
presencePenalty
|
|
309
|
+
},
|
|
310
|
+
evaluationPriority,
|
|
311
|
+
yieldEosToken: true
|
|
312
|
+
}));
|
|
313
|
+
for await (const token of evaluationIterator) {
|
|
314
|
+
ensureNotAborted();
|
|
315
|
+
generatedTokens++;
|
|
316
|
+
const tokens = [token];
|
|
317
|
+
const text = model.detokenize([token]);
|
|
318
|
+
const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
|
|
319
|
+
if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
|
|
320
|
+
locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
|
|
321
|
+
}
|
|
322
|
+
else {
|
|
323
|
+
while (locksToReleaseOnValidGeneration.length > 0)
|
|
324
|
+
locksToReleaseOnValidGeneration.shift().dispose();
|
|
325
|
+
}
|
|
326
|
+
stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
327
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
328
|
+
if (stopGenerationDetector.hasTriggeredStops || token === eosToken || token === eotToken) {
|
|
329
|
+
const triggeredStops = stopGenerationDetector.getTriggeredStops();
|
|
330
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
|
|
331
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
|
|
332
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
333
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
334
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
335
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
336
|
+
.flat(1);
|
|
337
|
+
if (pendingTokens.length > 0)
|
|
338
|
+
onToken?.(pendingTokens.slice());
|
|
339
|
+
res.push(...pendingTokens);
|
|
340
|
+
pendingTokens.length = 0;
|
|
341
|
+
let modelResponse = model.detokenize(res);
|
|
342
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
|
|
343
|
+
modelResponse = modelResponse.trimEnd();
|
|
344
|
+
return {
|
|
345
|
+
response: modelResponse,
|
|
346
|
+
metadata: {
|
|
347
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
348
|
+
stopReason: (token === eosToken || token === eotToken)
|
|
349
|
+
? "eosToken"
|
|
350
|
+
: "stopGenerationTrigger"
|
|
351
|
+
}
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
if (pendingTokens.length > 0) {
|
|
355
|
+
onToken?.(pendingTokens.slice());
|
|
356
|
+
res.push(...pendingTokens);
|
|
357
|
+
pendingTokens.length = 0;
|
|
358
|
+
}
|
|
359
|
+
if (maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens) {
|
|
360
|
+
let modelResponse = model.detokenize(res);
|
|
361
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix)
|
|
362
|
+
modelResponse = modelResponse.trimEnd();
|
|
363
|
+
return {
|
|
364
|
+
response: modelResponse,
|
|
365
|
+
metadata: {
|
|
366
|
+
stopReason: "maxTokens"
|
|
367
|
+
}
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
if (sequence.nextTokenIndex >= context.contextSize) {
|
|
371
|
+
shouldContextShift = true;
|
|
372
|
+
break;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
if (shouldContextShift) {
|
|
376
|
+
const resolvedContextShiftSize = await resolveContextShiftSize(contextShiftSize, sequence);
|
|
377
|
+
ensureNotAborted();
|
|
378
|
+
const { newContextState } = await contextShift({
|
|
379
|
+
shiftSize: resolvedContextShiftSize,
|
|
380
|
+
res,
|
|
381
|
+
pendingTokens,
|
|
382
|
+
sequence
|
|
383
|
+
});
|
|
384
|
+
ensureNotAborted();
|
|
385
|
+
inputTokens = newContextState;
|
|
386
|
+
continue;
|
|
387
|
+
}
|
|
388
|
+
break;
|
|
389
|
+
}
|
|
390
|
+
throw new Error("The context size is too small to generate a response");
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
async function resolveContextShiftSize(contextShiftSize, sequence) {
|
|
394
|
+
if (typeof contextShiftSize === "number")
|
|
395
|
+
return contextShiftSize;
|
|
396
|
+
else if (contextShiftSize instanceof Function)
|
|
397
|
+
return Math.min(sequence.context.contextSize, Math.max(1, Math.floor(contextShiftSize instanceof Function
|
|
398
|
+
? await contextShiftSize(sequence)
|
|
399
|
+
: contextShiftSize)));
|
|
400
|
+
return defaultContextShiftSize(sequence);
|
|
401
|
+
}
|
|
402
|
+
//# sourceMappingURL=LlamaCompletion.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"LlamaCompletion.js","sourceRoot":"","sources":["../../src/evaluator/LlamaCompletion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,iBAAiB,EAAE,aAAa,EAAE,UAAU,EAAE,QAAQ,EAAC,MAAM,iBAAiB,CAAC;AAGvF,OAAO,EAAC,aAAa,EAAC,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAC9D,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAC9D,OAAO,EAAyB,oBAAoB,EAAC,MAAM,kCAAkC,CAAC;AAC9F,OAAO,EAAC,sBAAsB,EAAwB,MAAM,oCAAoC,CAAC;AACjG,OAAO,EAAC,oBAAoB,EAAC,MAAM,cAAc,CAAC;AAClD,OAAO,EAAC,gCAAgC,EAAC,MAAM,8CAA8C,CAAC;AAC9F,OAAO,EAAC,2BAA2B,EAAC,MAAM,kCAAkC,CAAC;AAwG7E,MAAM,uBAAuB,GAAG,CAC5B,CAAC,QAAQ,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,GAAG,EAAE,CAAC,CAAC,CACZ,CAAC;AACjE,MAAM,0BAA0B,GAAG,CAC/B,CAAC,QAAQ,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,GAAG,EAAE,CAAC,CAAC,CACb,CAAC;AAEhE,MAAM,OAAO,eAAe;IACxB,gBAAgB,CAAkB,kBAAkB,GAAG,IAAI,iBAAiB,EAAE,CAAC;IAC/E,gBAAgB,CAAkB,oBAAoB,CAAU;IAChE,gBAAgB,CAAS,SAAS,CAA8B;IAChD,SAAS,GAAG,IAAI,UAAU,EAAQ,CAAC;IAEnD,YAAmB,EACf,eAAe,EACf,mBAAmB,GAAG,IAAI,EACL;QACrB,IAAI,CAAC,SAAS,GAAG,eAAe,CAAC;QACjC,IAAI,CAAC,oBAAoB,GAAG,mBAAmB,CAAC;QAEhD,IAAI,CAAC,kBAAkB,CAAC,GAAG,CACvB,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,cAAc,CAAC,GAAG,EAAE;YACzC,IAAI,CAAC,OAAO,EAAE,CAAC;QACnB,CAAC,CAAC,CACL,CAAC;QACF,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IAC9D,CAAC;IAEM,OAAO,CAAC,EAAC,eAAe,GAAG,IAAI,CAAC,oBAAoB,KAAiC,EAAE;QAC1F,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ;YACvC,OAAO;QAEX,IAAI,eAAe;YACf,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;QAE7B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAEtB,IAAI,CAAC,kBAAkB,CAAC,OAAO,EAAE,CAAC;IACtC,CAAC;IAED,cAAc;IACP,CAAC,MAAM,CAAC,OAAO,CAAC;QACnB,OAAO,IAAI,CAAC,OAAO,EAAE,CAAC;IAC1B,CAAC;IAED,IAAW,QAAQ;QACf,OAAO,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;IAC7D,CAAC;IAED,IAAW,eAAe;QACtB,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI;YACtB,MAAM,IAAI,aAAa,EAAE,CAAC;QAE9B,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI;YACpD,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI;YACjD,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC;IAC1D,CAAC;IAED;;OAEG;IACI,KAAK,CAAC,kBAAkB,CAAC,KAAmC,EAAE,UAA4C,EAAE;QAC/G,MAAM,EAAC,QAAQ,EAAC,GAAG,MAAM,IAAI,CAAC,0BAA0B,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAEzE,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;OAGG;IACI,KAAK,CAAC,0BAA0B,CACnC,KAAmC,EACnC,EACI,OAAO,EACP,MAAM,EACN,SAAS,EACT,WAAW,EACX,IAAI,EACJ,IAAI,EACJ,IAAI,EACJ,oBAAoB,GAAG,KAAK,EAC5B,aAAa,GAAG,EAAE,EAClB,kBAAkB,GAAG,CAAC,EACtB,OAAO,EACP,sBAAsB,EACtB,gBAAgB,GAAG,uBAAuB,EAC1C,mBAAmB,KACe,EAAE;QAExC,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ;YACvC,MAAM,IAAI,aAAa,EAAE,CAAC;QAE9B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC;QACjD,MAAM,qBAAqB,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,qBAAqB,CAAC;QAEhF,KAAK,UAAU,mBAAmB,CAAC,EAC/B,SAAS,EAAE,MAAM,EAGpB;YACG,MAAM,GAAG,GAAG,EAAE,CAAC;YAEf,IAAI,qBAAqB,IAAI,QAAQ,IAAI,IAAI;gBACzC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEvB,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;YAErF,IAAI,eAAe,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;gBAC1C,MAAM,IAAI,KAAK,CAAC,0EAA0E,CAAC,CAAC;YAEhG,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,eAAe,CAAC,CAAC;YACpD,GAAG,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;YAE1B,OAAO,GAAG,CAAC;QACf,CAAC;QAED,MAAM,gBAAgB,GAAG,GAAG,EAAE;YAC1B,IAAI,MAAM,EAAE,OAAO;gBACf,MAAM,MAAM,CAAC,MAAM,CAAC;YAExB,IAAI,IAAI,CAAC,QAAQ;gBACb,MAAM,IAAI,aAAa,EAAE,CAAC;QAClC,CAAC,CAAC;QAEF,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,KAAK,IAAI,EAAE;YACjE,gBAAgB,EAAE,CAAC;YAEnB,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ;gBACvC,MAAM,IAAI,aAAa,EAAE,CAAC;YAE9B,MAAM,aAAa,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAC1E,MAAM,wBAAwB,GAAG,MAAM,uBAAuB,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YACjG,gBAAgB,EAAE,CAAC;YAEnB,MAAM,WAAW,GAAG,MAAM,mBAAmB,CAAC;gBAC1C,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,GAAG,wBAAwB;gBACxE,MAAM,EAAE,aAAa;aACxB,CAAC,CAAC;YACH,gBAAgB,EAAE,CAAC;YACnB,MAAM,iBAAiB,GAAG,CAAC,mBAAmB;gBAC1C,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,CAAC,SAAS,IAAI,IAAI,IAAI,SAAS,GAAG,CAAC,CAAC;oBAClC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;oBAC9E,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;YAElE,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,WAAW,EAAE;gBAC7C,OAAO;gBACP,MAAM;gBACN,SAAS,EAAE,iBAAiB;gBAC5B,WAAW;gBACX,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,oBAAoB;gBACpB,aAAa;gBACb,kBAAkB;gBAClB,OAAO;gBACP,gBAAgB;gBAChB,sBAAsB;aACzB,EAAE;gBACC,KAAK,CAAC,YAAY,CAAC,EAAC,SAAS,EAAE,GAAG,EAAE,aAAa,EAAE,QAAQ,EAAC;oBAGxD,OAAO;wBACH,eAAe,EAAE,MAAM,mBAAmB,CAAC;4BACvC,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,WAAW,GAAG,SAAS;4BACnD,MAAM,EAAE,CAAC,GAAG,aAAa,EAAE,GAAG,GAAG,EAAE,GAAG,aAAa,CAAC;yBACvD,CAAC;qBACL,CAAC;gBACN,CAAC;aACJ,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;IAED;;;;;OAKG;IACI,KAAK,CAAC,wBAAwB,CACjC,WAAyC,EACzC,WAAyC,EACzC,UAAwC,EAAE;QAE1C,MAAM,EAAC,QAAQ,EAAC,GAAG,MAAM,IAAI,CAAC,gCAAgC,CAAC,WAAW,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAElG,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;;OAGG;IACI,KAAK,CAAC,gCAAgC,CACzC,WAAyC,EACzC,WAAyC,EACzC,EACI,OAAO,EACP,MAAM,EACN,SAAS,EACT,WAAW,EACX,IAAI,EACJ,IAAI,EACJ,IAAI,EACJ,oBAAoB,GAAG,KAAK,EAC5B,aAAa,GAAG,EAAE,EAClB,kBAAkB,GAAG,CAAC,EACtB,OAAO,EACP,gBAAgB,GAAG,uBAAuB,EAC1C,sBAAsB,EACtB,mBAAmB,GAAG,0BAA0B,EAChD,mBAAmB,GAAG,KAAK,KACG,EAAE;QAEpC,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ;YACvC,MAAM,IAAI,aAAa,EAAE,CAAC;QAE9B,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;QAC9D,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;QAC9D,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;QAC9D,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC;QACjD,MAAM,qBAAqB,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,qBAAqB,CAAC;QAEhF,IAAI,WAAW,IAAI,IAAI,IAAI,WAAW,IAAI,IAAI,IAAI,WAAW,IAAI,IAAI;YACjE,MAAM,IAAI,gBAAgB,CAAC,oDAAoD,CAAC,CAAC;QAErF,KAAK,UAAU,mBAAmB,CAAC,EAC/B,SAAS,EAAE,YAAY,EAAE,YAAY,EAAE,QAAQ,EAGlD;YACG,IAAI,WAAW,IAAI,IAAI,IAAI,WAAW,IAAI,IAAI,IAAI,WAAW,IAAI,IAAI;gBACjE,MAAM,IAAI,gBAAgB,CAAC,oDAAoD,CAAC,CAAC;YAErF,iEAAiE;YACjE,MAAM,sBAAsB,GAAG,CAAC,GAAG,CAC/B,CAAC,qBAAqB,IAAI,QAAQ,IAAI,IAAI,CAAC;gBACvC,CAAC,CAAC,CAAC;gBACH,CAAC,CAAC,CAAC,CACV,CAAC;YACF,MAAM,iBAAiB,GAAG,SAAS,GAAG,sBAAsB,CAAC;YAC7D,IAAI,cAAc,GAAG,iBAAiB,CAAC;YAEvC,IAAI,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;YACrE,cAAc,IAAI,gBAAgB,CAAC;YAEnC,IAAI,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;YACrE,cAAc,IAAI,gBAAgB,CAAC;YAEnC,IAAI,cAAc,IAAI,CAAC,IAAI,mBAAmB;gBAC1C,MAAM,IAAI,KAAK,CACX,2GAA2G;oBAC3G,qEAAqE,CACxE,CAAC;YAEN,MAAM,2BAA2B,GAAG,IAAI,CAAC,GAAG,CACxC,IAAI,CAAC,GAAG,CAAC,iBAAiB,EAAE,YAAY,CAAC,MAAM,CAAC,EAChD,IAAI,CAAC,GAAG,CACJ,CAAC,EACD,IAAI,CAAC,KAAK,CACN,mBAAmB,YAAY,QAAQ;gBACnC,CAAC,CAAC,MAAM,mBAAmB,CAAC,QAAQ,CAAC;gBACrC,CAAC,CAAC,mBAAmB,CAC5B,CACJ,CACJ,CAAC;YAEF,IAAI,gBAAgB,GAAG,2BAA2B,EAAE;gBAChD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,2BAA2B,GAAG,gBAAgB,CAAC,CAAC;gBAC9F,gBAAgB,IAAI,UAAU,CAAC;gBAC/B,gBAAgB,IAAI,UAAU,CAAC;aAClC;YAED,MAAM,oBAAoB,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,gBAAgB,CAAC,CAAC;YACnE,MAAM,oBAAoB,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC,CAAC;YAErE,MAAM,eAAe,GAAY,EAAE,CAAC;YAEpC,IAAI,qBAAqB,IAAI,QAAQ,IAAI,IAAI;gBACzC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEnC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAClC,eAAe,CAAC,IAAI,CAAC,GAAG,oBAAoB,CAAC,CAAC;YAE9C,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAClC,eAAe,CAAC,IAAI,CAAC,GAAG,oBAAoB,CAAC,CAAC;YAE9C,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAElC,OAAO,eAAe,CAAC;QAC3B,CAAC;QAED,MAAM,gBAAgB,GAAG,GAAG,EAAE;YAC1B,IAAI,MAAM,EAAE,OAAO;gBACf,MAAM,MAAM,CAAC,MAAM,CAAC;YAExB,IAAI,IAAI,CAAC,QAAQ;gBACb,MAAM,IAAI,aAAa,EAAE,CAAC;QAClC,CAAC,CAAC;QAEF,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,oBAAoB,EAAE,MAAM,EAAE,KAAK,IAAI,EAAE;YACjE,gBAAgB,EAAE,CAAC;YAEnB,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ;gBACvC,MAAM,IAAI,aAAa,EAAE,CAAC;YAE9B,MAAM,yBAAyB,GAAG,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAC5F,MAAM,yBAAyB,GAAG,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAC5F,MAAM,wBAAwB,GAAG,MAAM,uBAAuB,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YACjG,gBAAgB,EAAE,CAAC;YAEnB,MAAM,WAAW,GAAG,MAAM,mBAAmB,CAAC;gBAC1C,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,GAAG,wBAAwB;gBACxE,YAAY,EAAE,yBAAyB;gBACvC,YAAY,EAAE,yBAAyB;gBACvC,QAAQ,EAAE,IAAI,CAAC,SAAS;aAC3B,CAAC,CAAC;YACH,gBAAgB,EAAE,CAAC;YAEnB,MAAM,iBAAiB,GAAG,CAAC,mBAAmB;gBAC1C,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,CAAC,SAAS,IAAI,IAAI,IAAI,SAAS,GAAG,CAAC,CAAC;oBAClC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;oBAC9E,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;YAElE,OAAO,MAAM,IAAI,CAAC,iBAAiB,CAAC,WAAW,EAAE;gBAC7C,OAAO;gBACP,MAAM;gBACN,SAAS,EAAE,iBAAiB;gBAC5B,WAAW;gBACX,IAAI;gBACJ,IAAI;gBACJ,IAAI;gBACJ,oBAAoB;gBACpB,aAAa;gBACb,kBAAkB;gBAClB,OAAO;gBACP,gBAAgB;gBAChB,sBAAsB;aACzB,EAAE;gBACC,KAAK,CAAC,YAAY,CAAC,EAAC,SAAS,EAAE,GAAG,EAAE,aAAa,EAAE,QAAQ,EAAC;oBAGxD,OAAO;wBACH,eAAe,EAAE,MAAM,mBAAmB,CAAC;4BACvC,SAAS,EAAE,QAAQ,CAAC,OAAO,CAAC,WAAW,GAAG,SAAS;4BACnD,YAAY,EAAE,CAAC,GAAG,yBAAyB,EAAE,GAAG,GAAG,EAAE,GAAG,aAAa,CAAC;4BACtE,YAAY,EAAE,yBAAyB;4BACvC,QAAQ;yBACX,CAAC;qBACL,CAAC;gBACN,CAAC;aACJ,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;IAED,gBAAgB;IACR,KAAK,CAAC,iBAAiB,CAC3B,MAAe,EACf,EACI,OAAO,EACP,MAAM,EACN,SAAS,EACT,WAAW,EACX,IAAI,EACJ,IAAI,EACJ,IAAI,EACJ,oBAAoB,GAAG,KAAK,EAC5B,aAAa,GAAG,EAAE,EAClB,kBAAkB,GAAG,CAAC,EACtB,OAAO,EACP,gBAAgB,GAAG,uBAAuB,EAC1C,sBAAsB,EACS,EACnC,EACI,YAAY,EAQf;QAED,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI;YACtB,MAAM,IAAI,aAAa,EAAE,CAAC;QAE9B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC;QAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC;QAC7B,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;QACjC,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC;QAClC,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC;QAEzC,MAAM,GAAG,GAAY,EAAE,CAAC;QACxB,MAAM,aAAa,GAAY,EAAE,CAAC;QAClC,MAAM,sBAAsB,GAAG,OAAO,IAAI,IAAI;YAC1C,CAAC,CAAC,IAAI,2BAA2B,CAAC,EAAC,OAAO,EAAC,CAAC;YAC5C,CAAC,CAAC,SAAS,CAAC;QAChB,MAAM,EACF,UAAU,EAAE,uBAAuB,GAAG,EAAE,EACxC,kBAAkB,EAClB,eAAe,EACf,OAAO,EACP,gBAAgB,EAChB,eAAe,EAClB,GAAiC,aAAa,KAAK,KAAK;YACrD,CAAC,CAAC,EAAC,UAAU,EAAE,CAAC,EAAC;YACjB,CAAC,CAAC,aAAa,CAAC;QACpB,MAAM,eAAe,GAAG,IAAI,oBAAoB,EAAE,CAAC;QACnD,MAAM,sBAAsB,GAAG,IAAI,sBAAsB,EAAE,CAAC;QAC5D,MAAM,+BAA+B,GAA6B,EAAE,CAAC;QACrE,MAAM,oBAAoB,GAAG,uBAAuB,GAAG,CAAC,CAAC;QAEzD,IAAI,WAAW,GAAG,MAAM,CAAC;QACzB,IAAI,eAAe,GAAG,CAAC,CAAC;QAExB,IAAI,OAAO,IAAI,IAAI;YACf,sBAAsB,CAAC,mBAAmB,CAAC,OAAO,CAAC,sBAAsB,EAAE,KAAK,CAAC,QAAQ,CAAC;iBACrF,GAAG,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,sBAAsB,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC,CAAC;QAElF,IAAI,sBAAsB,IAAI,IAAI;YAC9B,sBAAsB,CAAC,mBAAmB,CAAC,sBAAsB,EAAE,KAAK,CAAC,QAAQ,CAAC;iBAC7E,GAAG,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,sBAAsB,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC,CAAC;QAElF,MAAM,gBAAgB,GAAG,GAAG,EAAE;YAC1B,IAAI,MAAM,EAAE,OAAO;gBACf,MAAM,MAAM,CAAC,MAAM,CAAC;YAExB,IAAI,IAAI,CAAC,QAAQ;gBACb,MAAM,IAAI,aAAa,EAAE,CAAC;QAClC,CAAC,CAAC;QAEF,MAAM,gBAAgB,GAAG,GAAG,EAAE;YAC1B,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI;gBACtB,MAAM,IAAI,aAAa,EAAE,CAAC;YAE9B,IAAI,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,uBAAuB,CAAC,CAAC;YAEvD,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,YAAY,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAC;YAEpD,IAAI,eAAe,IAAI,IAAI,IAAI,CAAC,eAAe,EAAE;gBAC7C,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;gBAEhC,IAAI,OAAO,IAAI,IAAI;oBACf,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,KAAK,OAAO,CAAC,CAAC;aACtE;YAED,OAAO,YAAY,CAAC;QACxB,CAAC,CAAC;QAEF,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,gBAAgB,EAAE,CAAC;YAEnB,IAAI,kBAAkB,GAAG,KAAK,CAAC;YAE/B,IAAI,EAAC,mBAAmB,EAAC,GAAG,QAAQ,CAAC,oBAAoB,CAAC,WAAW,CAAC,CAAC;YAEvE,8DAA8D;YAC9D,IAAI,mBAAmB,KAAK,WAAW,CAAC,MAAM,IAAI,mBAAmB,GAAG,CAAC;gBACrE,mBAAmB,IAAI,CAAC,CAAC;YAE7B,WAAW,CAAC,MAAM,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;YAE3C,IAAI,mBAAmB,GAAG,QAAQ,CAAC,cAAc,EAAE;gBAC/C,MAAM,QAAQ,CAAC,uBAAuB,CAAC,CAAC;wBACpC,KAAK,EAAE,mBAAmB;wBAC1B,GAAG,EAAE,QAAQ,CAAC,cAAc;qBAC/B,CAAC,CAAC,CAAC;gBACJ,gBAAgB,EAAE,CAAC;aACtB;YAED,MAAM,kBAAkB,GAAG,QAAQ,CAAC,QAAQ,CAAC,WAAW,EAAE,gBAAgB,CAAC;gBACvE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI;gBAC7B,sBAAsB;gBACtB,aAAa,EAAE,CAAC,oBAAoB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;oBAC/C,YAAY,EAAE,gBAAgB;oBAC9B,OAAO;oBACP,gBAAgB;oBAChB,eAAe;iBAClB;gBACD,kBAAkB;gBAClB,aAAa,EAAE,IAAI;aACtB,CAAC,CAAC,CAAC;YAEJ,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,kBAAkB,EAAE;gBAC1C,gBAAgB,EAAE,CAAC;gBACnB,eAAe,EAAE,CAAC;gBAElB,MAAM,MAAM,GAAG,CAAC,KAAK,CAAC,CAAC;gBACvB,MAAM,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;gBACvC,MAAM,kBAAkB,GAAG,eAAe,CAAC,QAAQ,CAAC,EAAC,MAAM,EAAE,IAAI,EAAC,CAAC,CAAC;gBAEpE,IAAI,IAAI,KAAK,oBAAoB,IAAI,CACjC,CAAC,OAAO,EAAE,oBAAoB,IAAI,oBAAoB,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAChF,EAAE;oBACC,+BAA+B,CAAC,IAAI,CAAC,kBAAkB,CAAC,mBAAmB,CAAC,CAAC,CAAC,CAAC,CAAC;iBACnF;qBAAM;oBACH,OAAO,+BAA+B,CAAC,MAAM,GAAG,CAAC;wBAC7C,+BAA+B,CAAC,KAAK,EAAG,CAAC,OAAO,EAAE,CAAC;iBAC1D;gBAED,sBAAsB,CAAC,gBAAgB,CAAC,EAAC,IAAI,EAAE,MAAM,EAAE,kBAAkB,EAAC,CAAC,CAAC;gBAE5E,aAAa,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,kBAAkB,EAAE,CAAC,CAAC;gBAE5D,IAAI,sBAAsB,CAAC,iBAAiB,IAAI,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,QAAQ,EAAE;oBACtF,MAAM,cAAc,GAAI,sBAAsB,CAAC,iBAAiB,EAAE,CAAC;oBACnE,MAAM,mBAAmB,GAAG,eAAe,CAAC,qBAAqB,EAAE,CAAC;oBAEpE,MAAM,6BAA6B,GAAG,gCAAgC,CAClE,cAAc,EACd,mBAAmB,EACnB,KAAK,CAAC,QAAQ,CACjB,CAAC;oBACF,aAAa,CAAC,IAAI,CAAC,GAAG,6BAA6B,CAAC,CAAC;oBAErD,MAAM,CAAC,iCAAiC,CAAC,GAAG,cAAc;yBACrD,GAAG,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,WAAW,CAAC,oBAAoB,CAAC;yBACtD,MAAM,CAAC,CAAC,oBAAoB,EAAE,EAAE,CAAC,oBAAoB,CAAC,MAAM,GAAG,CAAC,CAAC;yBACjE,IAAI,CAAC,CAAC,CAAC,CAAC;oBAEb,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC;wBACxB,OAAO,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC;oBAErC,GAAG,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;oBAC3B,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC;oBAEzB,IAAI,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;oBAE1C,IAAI,OAAO,EAAE,oBAAoB,IAAI,oBAAoB;wBACrD,aAAa,GAAG,aAAa,CAAC,OAAO,EAAE,CAAC;oBAE5C,OAAO;wBACH,QAAQ,EAAE,aAAa;wBACvB,QAAQ,EAAE;4BACN,4BAA4B,EAAE,iCAAiC;4BAC/D,UAAU,EAAE,CAAC,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,QAAQ,CAAC;gCAClD,CAAC,CAAC,UAAmB;gCACrB,CAAC,CAAC,uBAAgC;yBACzC;qBACJ,CAAC;iBACL;gBAED,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,OAAO,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC;oBACjC,GAAG,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;oBAC3B,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC;iBAC5B;gBAED,IAAI,SAAS,IAAI,IAAI,IAAI,SAAS,GAAG,CAAC,IAAI,eAAe,IAAI,SAAS,EAAE;oBACpE,IAAI,aAAa,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;oBAE1C,IAAI,OAAO,EAAE,oBAAoB,IAAI,oBAAoB;wBACrD,aAAa,GAAG,aAAa,CAAC,OAAO,EAAE,CAAC;oBAE5C,OAAO;wBACH,QAAQ,EAAE,aAAa;wBACvB,QAAQ,EAAE;4BACN,UAAU,EAAE,WAAoB;yBACnC;qBACJ,CAAC;iBACL;gBAED,IAAI,QAAQ,CAAC,cAAc,IAAI,OAAO,CAAC,WAAW,EAAE;oBAChD,kBAAkB,GAAG,IAAI,CAAC;oBAC1B,MAAM;iBACT;aACJ;YAED,IAAI,kBAAkB,EAAE;gBACpB,MAAM,wBAAwB,GAAG,MAAM,uBAAuB,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC;gBAC3F,gBAAgB,EAAE,CAAC;gBAEnB,MAAM,EAAC,eAAe,EAAC,GAAG,MAAM,YAAY,CAAC;oBACzC,SAAS,EAAE,wBAAwB;oBACnC,GAAG;oBACH,aAAa;oBACb,QAAQ;iBACX,CAAC,CAAC;gBACH,gBAAgB,EAAE,CAAC;gBACnB,WAAW,GAAG,eAAe,CAAC;gBAE9B,SAAS;aACZ;YAED,MAAM;SACT;QAED,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAC5E,CAAC;CACJ;AAED,KAAK,UAAU,uBAAuB,CAClC,gBAAgF,EAChF,QAA8B;IAE9B,IAAI,OAAO,gBAAgB,KAAK,QAAQ;QACpC,OAAO,gBAAgB,CAAC;SACvB,IAAI,gBAAgB,YAAY,QAAQ;QACzC,OAAO,IAAI,CAAC,GAAG,CACX,QAAQ,CAAC,OAAO,CAAC,WAAW,EAC5B,IAAI,CAAC,GAAG,CACJ,CAAC,EACD,IAAI,CAAC,KAAK,CACN,gBAAgB,YAAY,QAAQ;YAChC,CAAC,CAAC,MAAM,gBAAgB,CAAC,QAAQ,CAAC;YAClC,CAAC,CAAC,gBAAgB,CACzB,CACJ,CACJ,CAAC;IAEN,OAAO,uBAAuB,CAAC,QAAQ,CAAC,CAAC;AAC7C,CAAC"}
|
|
@@ -2,13 +2,13 @@ import { EventRelay } from "lifecycle-utils";
|
|
|
2
2
|
import { Token } from "../../types.js";
|
|
3
3
|
import { LlamaModel } from "../LlamaModel.js";
|
|
4
4
|
import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
|
|
5
|
-
import { ContextShiftOptions, ContextTokensDeleteRange, EvaluationPriority, LlamaContextOptions, LlamaContextSequenceRepeatPenalty
|
|
5
|
+
import { ContextShiftOptions, ContextTokensDeleteRange, EvaluationPriority, LlamaContextOptions, LlamaContextSequenceRepeatPenalty } from "./types.js";
|
|
6
6
|
export declare class LlamaContext {
|
|
7
7
|
readonly onDispose: EventRelay<void>;
|
|
8
8
|
/**
|
|
9
9
|
* @param options
|
|
10
10
|
*/
|
|
11
|
-
constructor({ model, sequences, seed, contextSize, batchSize,
|
|
11
|
+
constructor({ model, sequences, seed, contextSize, batchSize, threads, batching: { dispatchSchedule: batchingDispatchSchedule, itemsPrioritizingStrategy: batchingItemsPrioritizingStrategy }, _embedding, _noSeed }: LlamaContextOptions);
|
|
12
12
|
dispose(): void;
|
|
13
13
|
/** @hidden */
|
|
14
14
|
[Symbol.dispose](): void;
|
|
@@ -24,11 +24,11 @@ export declare class LlamaContext {
|
|
|
24
24
|
* When there are no sequences left, this method will throw an error.
|
|
25
25
|
* @param [options]
|
|
26
26
|
*/
|
|
27
|
-
getSequence({
|
|
28
|
-
prependBos?: boolean;
|
|
27
|
+
getSequence({ contextShift: { size: contextShiftSize, strategy: contextShiftStrategy } }?: {
|
|
29
28
|
contextShift?: ContextShiftOptions;
|
|
30
29
|
}): LlamaContextSequence;
|
|
31
30
|
dispatchPendingBatch(): void;
|
|
31
|
+
printTimings(): Promise<void>;
|
|
32
32
|
}
|
|
33
33
|
export declare class LlamaContextSequence {
|
|
34
34
|
readonly onDispose: EventRelay<void>;
|
|
@@ -39,10 +39,12 @@ export declare class LlamaContextSequence {
|
|
|
39
39
|
get disposed(): boolean;
|
|
40
40
|
get context(): LlamaContext;
|
|
41
41
|
get model(): LlamaModel;
|
|
42
|
-
get prependBos(): boolean;
|
|
43
42
|
get nextTokenIndex(): number;
|
|
44
43
|
get contextTokens(): Token[];
|
|
45
|
-
get
|
|
44
|
+
get isLoadedToMemory(): boolean;
|
|
45
|
+
compareContextTokens(tokens: Token[]): {
|
|
46
|
+
firstDifferentIndex: number;
|
|
47
|
+
};
|
|
46
48
|
/**
|
|
47
49
|
* Clear the history of the sequence.
|
|
48
50
|
* If `prependBos` was enabled, the BOS token will be prepended to the sequence again.
|
|
@@ -58,11 +60,12 @@ export declare class LlamaContextSequence {
|
|
|
58
60
|
* @param tokens
|
|
59
61
|
* @param [options]
|
|
60
62
|
*/
|
|
61
|
-
evaluate(tokens: Token[], { temperature, topK, topP, grammarEvaluationState, repeatPenalty, evaluationPriority,
|
|
63
|
+
evaluate(tokens: Token[], { temperature, minP, topK, topP, grammarEvaluationState, repeatPenalty, evaluationPriority, contextShift: { size: contextShiftSize, strategy: contextShiftStrategy }, yieldEosToken }?: {
|
|
62
64
|
temperature?: number;
|
|
65
|
+
minP?: number;
|
|
63
66
|
topK?: number;
|
|
64
67
|
topP?: number;
|
|
65
|
-
grammarEvaluationState?: LlamaGrammarEvaluationState;
|
|
68
|
+
grammarEvaluationState?: LlamaGrammarEvaluationState | (() => LlamaGrammarEvaluationState | undefined);
|
|
66
69
|
repeatPenalty?: LlamaContextSequenceRepeatPenalty;
|
|
67
70
|
/**
|
|
68
71
|
* When a lot of tokens are queued for the next batch, more than the configured `batchSize`, the tokens for each sequence will be
|
|
@@ -74,21 +77,21 @@ export declare class LlamaContextSequence {
|
|
|
74
77
|
* is, the more likely and more tokens will be evaluated for that sequence in the next queued batch.
|
|
75
78
|
*/
|
|
76
79
|
evaluationPriority?: EvaluationPriority;
|
|
80
|
+
/** Override the sequence context shift options for this evaluation */
|
|
81
|
+
contextShift?: ContextShiftOptions;
|
|
77
82
|
/**
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
* The higher the priority is, the less likely it will be erased.
|
|
82
|
-
* The default priority is `1`.
|
|
83
|
+
* Yield the EOS token when it's generated.
|
|
84
|
+
* When `false` the generation will stop when the EOS token is generated and the EOS token won't be yielded.
|
|
85
|
+
* Defaults to `false`.
|
|
83
86
|
*/
|
|
84
|
-
|
|
87
|
+
yieldEosToken?: boolean;
|
|
85
88
|
}): AsyncGenerator<Token, void>;
|
|
86
89
|
/**
|
|
87
90
|
* Evaluate the provided tokens into the context sequence without generating new tokens.
|
|
88
91
|
* @param tokens
|
|
89
92
|
* @param [options]
|
|
90
93
|
*/
|
|
91
|
-
evaluateWithoutGeneratingNewTokens(tokens: Token[], { evaluationPriority,
|
|
94
|
+
evaluateWithoutGeneratingNewTokens(tokens: Token[], { evaluationPriority, contextShift: { size: contextShiftSize, strategy: contextShiftStrategy } }?: {
|
|
92
95
|
grammarEvaluationState?: LlamaGrammarEvaluationState;
|
|
93
96
|
/**
|
|
94
97
|
* When a lot of tokens are queued for the next batch, more than the configured `batchSize`, the tokens for each sequence will be
|
|
@@ -100,13 +103,7 @@ export declare class LlamaContextSequence {
|
|
|
100
103
|
* is, the more likely and more tokens will be evaluated for that sequence in the next queued batch.
|
|
101
104
|
*/
|
|
102
105
|
evaluationPriority?: EvaluationPriority;
|
|
103
|
-
/**
|
|
104
|
-
|
|
105
|
-
* By default, the lowest priority tokens at the beginning of the context will be erased.
|
|
106
|
-
* To mark the priority of the evaluated tokens, use this option.
|
|
107
|
-
* The higher the priority is, the less likely it will be erased.
|
|
108
|
-
* The default priority is `1`.
|
|
109
|
-
*/
|
|
110
|
-
tokenPriority?: TokenPriority | TokenPriority[];
|
|
106
|
+
/** Override the sequence context shift options for this evaluation */
|
|
107
|
+
contextShift?: ContextShiftOptions;
|
|
111
108
|
}): Promise<void>;
|
|
112
109
|
}
|