node-llama-cpp 3.0.0-beta.2 → 3.0.0-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -11
- package/dist/ChatWrapper.d.ts +2 -15
- package/dist/ChatWrapper.js +28 -33
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/apiDocsOverrides.d.ts +1 -0
- package/dist/apiDocsOverrides.js +5 -0
- package/dist/apiDocsOverrides.js.map +1 -0
- package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -7
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +47 -0
- package/dist/bindings/Llama.js +343 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +11 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +145 -0
- package/dist/bindings/getLlama.js +389 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +55 -0
- package/dist/bindings/types.js +77 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +27 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +15 -0
- package/dist/bindings/utils/compileLLamaCpp.js +221 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +304 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +46 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +45 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
- package/dist/bindings/utils/testBindingBinary.js +98 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +5 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +13 -11
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/FalconChatWrapper.js +28 -11
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +86 -73
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/{LlamaChatWrapper.d.ts → GemmaChatWrapper.d.ts} +6 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +88 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +20 -0
- package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +29 -11
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +31 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +129 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +359 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +64 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +214 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.js +21 -7
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/BuildCommand.d.ts +6 -4
- package/dist/cli/commands/BuildCommand.js +103 -41
- package/dist/cli/commands/BuildCommand.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +18 -6
- package/dist/cli/commands/ChatCommand.js +298 -142
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/ClearCommand.d.ts +1 -1
- package/dist/cli/commands/ClearCommand.js +11 -12
- package/dist/cli/commands/ClearCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +29 -0
- package/dist/cli/commands/CompleteCommand.js +365 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +54 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/DownloadCommand.d.ts +6 -4
- package/dist/cli/commands/DownloadCommand.js +120 -69
- package/dist/cli/commands/DownloadCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +31 -0
- package/dist/cli/commands/InfillCommand.js +401 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +11 -0
- package/dist/cli/commands/InitCommand.js +195 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.js +9 -10
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/cli/commands/PullCommand.d.ts +12 -0
- package/dist/cli/commands/PullCommand.js +117 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +19 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +138 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +342 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +23 -0
- package/dist/cli/utils/ConsoleTable.js +86 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +82 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
- package/dist/cli/utils/interactivelyAskForModel.js +451 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
- package/dist/cli/utils/printCommonInfoLines.js +71 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +1 -0
- package/dist/commands.js +3 -0
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +38 -5
- package/dist/config.js +61 -16
- package/dist/config.js.map +1 -1
- package/dist/consts.d.ts +3 -0
- package/dist/consts.js +10 -0
- package/dist/consts.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +37 -35
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.js +298 -221
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.js +5 -3
- package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +18 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.d.ts +40 -3
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.js +28 -7
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +155 -0
- package/dist/evaluator/LlamaCompletion.js +405 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -20
- package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +271 -81
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +140 -0
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +8 -5
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +13 -10
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
- package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/evaluator/LlamaModel.d.ts +230 -0
- package/dist/evaluator/LlamaModel.js +597 -0
- package/dist/evaluator/LlamaModel.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +22 -0
- package/dist/evaluator/TokenBias.js +33 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +54 -0
- package/dist/evaluator/TokenMeter.js +86 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/gguf/consts.d.ts +3 -0
- package/dist/gguf/consts.js +8 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +42 -0
- package/dist/gguf/insights/GgufInsights.js +361 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
- package/dist/gguf/parser/GgufV2Parser.js +115 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +63 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +33 -0
- package/dist/gguf/readGgufFileInfo.js +66 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +334 -0
- package/dist/gguf/types/GgufMetadataTypes.js +86 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +37 -17
- package/dist/index.js +33 -14
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +4 -0
- package/dist/state.js +14 -0
- package/dist/state.js.map +1 -1
- package/dist/types.d.ts +53 -2
- package/dist/types.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +50 -25
- package/dist/utils/LlamaText.js +367 -155
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/StopGenerationDetector.d.ts +1 -1
- package/dist/utils/StopGenerationDetector.js +23 -18
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/TokenStreamRegulator.d.ts +8 -4
- package/dist/utils/TokenStreamRegulator.js +78 -8
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/cmake.js +38 -20
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +102 -0
- package/dist/utils/createModelDownloader.js +226 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/getBuildDefaults.d.ts +1 -2
- package/dist/utils/getBuildDefaults.js +2 -3
- package/dist/utils/getBuildDefaults.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +10 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getGrammarsFolder.d.ts +2 -1
- package/dist/utils/getGrammarsFolder.js +8 -7
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js +73 -5
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +1 -0
- package/dist/utils/parseModelFileName.js +6 -1
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -0
- package/dist/utils/prettyPrintObject.js +84 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -1
- package/dist/utils/removeNullFields.js +8 -0
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +36 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +56 -6
- package/dist/utils/spawnCommand.js.map +1 -1
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +12 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/utilTypes.d.ts +3 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +20 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.d.ts +2 -0
- package/dist/utils/withOra.js +22 -6
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +23 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.d.ts +2 -1
- package/dist/utils/withStatusLogs.js +12 -9
- package/dist/utils/withStatusLogs.js.map +1 -1
- package/llama/.clang-format +1 -2
- package/llama/CMakeLists.txt +115 -4
- package/llama/addon.cpp +1318 -99
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
- package/llama/gpuInfo/cuda-gpu-info.h +10 -0
- package/llama/gpuInfo/metal-gpu-info.h +8 -0
- package/llama/gpuInfo/metal-gpu-info.mm +30 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/grammars/README.md +11 -1
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llama/llama.cpp.info.json +4 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
- package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-arm64/llama-addon.exp +0 -0
- package/llamaBins/win-arm64/llama-addon.lib +0 -0
- package/llamaBins/win-arm64/llama-addon.node +0 -0
- package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +61 -34
- package/templates/package.json +10 -0
- package/dist/AbortError.d.ts +0 -2
- package/dist/AbortError.js +0 -7
- package/dist/AbortError.js.map +0 -1
- package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -55
- package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
- package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
- package/dist/llamaEvaluator/LlamaBins.js +0 -5
- package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
- package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -86
- package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
- package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
- package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
- package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
- package/dist/llamaEvaluator/LlamaModel.js +0 -322
- package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
- package/dist/utils/binariesGithubRelease.js.map +0 -1
- package/dist/utils/clearLlamaBuild.d.ts +0 -1
- package/dist/utils/clearLlamaBuild.js +0 -12
- package/dist/utils/clearLlamaBuild.js.map +0 -1
- package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
- package/dist/utils/cloneLlamaCppRepo.js +0 -102
- package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
- package/dist/utils/compileLLamaCpp.d.ts +0 -8
- package/dist/utils/compileLLamaCpp.js +0 -132
- package/dist/utils/compileLLamaCpp.js.map +0 -1
- package/dist/utils/getBin.js +0 -78
- package/dist/utils/getBin.js.map +0 -1
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
- package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
- package/dist/utils/getReleaseInfo.d.ts +0 -7
- package/dist/utils/getReleaseInfo.js +0 -30
- package/dist/utils/getReleaseInfo.js.map +0 -1
- package/dist/utils/parseModelTypeDescription.d.ts +0 -6
- package/dist/utils/parseModelTypeDescription.js +0 -9
- package/dist/utils/parseModelTypeDescription.js.map +0 -1
- package/dist/utils/resolveChatWrapper.d.ts +0 -4
- package/dist/utils/resolveChatWrapper.js +0 -16
- package/dist/utils/resolveChatWrapper.js.map +0 -1
- package/dist/utils/usedBinFlag.d.ts +0 -6
- package/dist/utils/usedBinFlag.js +0 -15
- package/dist/utils/usedBinFlag.js.map +0 -1
- package/llama/usedBin.json +0 -3
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
- /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
- /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import { DisposeAggregator, DisposedError, EventRelay } from "lifecycle-utils";
|
|
2
|
-
import { resolveChatWrapper } from "../../utils/resolveChatWrapper.js";
|
|
3
2
|
import { removeNullFields } from "../../utils/removeNullFields.js";
|
|
4
3
|
import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
|
|
5
|
-
import { AbortError } from "../../AbortError.js";
|
|
6
4
|
import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
|
|
7
5
|
import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
|
|
6
|
+
import { UNKNOWN_UNICODE_CHAR } from "../../consts.js";
|
|
7
|
+
import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
|
|
8
|
+
import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
|
|
9
|
+
import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
|
|
10
|
+
import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
|
|
8
11
|
import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
|
|
9
12
|
import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
|
|
10
13
|
const defaultContextShiftOptions = {
|
|
@@ -12,7 +15,6 @@ const defaultContextShiftOptions = {
|
|
|
12
15
|
strategy: "eraseFirstResponseAndKeepFirstSystem",
|
|
13
16
|
lastEvaluationMetadata: null
|
|
14
17
|
};
|
|
15
|
-
const UNKNOWN_UNICODE_CHAR = "\ufffd";
|
|
16
18
|
export class LlamaChat {
|
|
17
19
|
/** @internal */ _chatWrapper;
|
|
18
20
|
/** @internal */ _disposeAggregator = new DisposeAggregator();
|
|
@@ -30,7 +32,14 @@ export class LlamaChat {
|
|
|
30
32
|
this.dispose();
|
|
31
33
|
}));
|
|
32
34
|
this._disposeAggregator.add(this.onDispose.dispatchEvent);
|
|
33
|
-
this._chatWrapper =
|
|
35
|
+
this._chatWrapper = chatWrapper === "auto"
|
|
36
|
+
? (resolveChatWrapper({
|
|
37
|
+
bosString: contextSequence.model.tokens.bosString,
|
|
38
|
+
filename: contextSequence.model.filename,
|
|
39
|
+
fileInfo: contextSequence.model.fileInfo,
|
|
40
|
+
tokenizer: contextSequence.model.tokenizer
|
|
41
|
+
}) ?? new GeneralChatWrapper())
|
|
42
|
+
: chatWrapper;
|
|
34
43
|
}
|
|
35
44
|
dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
|
|
36
45
|
if (this._sequence == null)
|
|
@@ -63,12 +72,12 @@ export class LlamaChat {
|
|
|
63
72
|
get model() {
|
|
64
73
|
return this.sequence.model;
|
|
65
74
|
}
|
|
66
|
-
async generateResponse(history, { onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
75
|
+
async generateResponse(history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
|
|
67
76
|
const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
|
|
68
77
|
if (grammar != null && functionsEnabled)
|
|
69
78
|
throw new Error("Using both grammar and functions is not supported yet");
|
|
70
79
|
if (signal?.aborted)
|
|
71
|
-
throw
|
|
80
|
+
throw signal.reason;
|
|
72
81
|
if (this._sequence == null)
|
|
73
82
|
throw new DisposedError();
|
|
74
83
|
let resolvedHistory = this._sequence.isLoadedToMemory
|
|
@@ -81,7 +90,6 @@ export class LlamaChat {
|
|
|
81
90
|
});
|
|
82
91
|
const model = this._sequence.model;
|
|
83
92
|
const context = this._sequence.context;
|
|
84
|
-
const eosToken = model.tokens.eos;
|
|
85
93
|
const resolvedContextShift = {
|
|
86
94
|
...defaultContextShiftOptions,
|
|
87
95
|
...removeNullFields(contextShift)
|
|
@@ -99,7 +107,7 @@ export class LlamaChat {
|
|
|
99
107
|
? new LlamaGrammarEvaluationState({ grammar })
|
|
100
108
|
: undefined;
|
|
101
109
|
let functionsGrammar = functionsEnabled
|
|
102
|
-
? new FunctionCallGrammar(functions, this._chatWrapper, false)
|
|
110
|
+
? new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false)
|
|
103
111
|
: undefined;
|
|
104
112
|
let functionsEvaluationState = (functionsEnabled && functionsGrammar != null)
|
|
105
113
|
? new LlamaGrammarEvaluationState({
|
|
@@ -108,6 +116,7 @@ export class LlamaChat {
|
|
|
108
116
|
: undefined;
|
|
109
117
|
const streamRegulator = new TokenStreamRegulator();
|
|
110
118
|
const stopGenerationDetector = new StopGenerationDetector();
|
|
119
|
+
const customStopGenerationTriggersDetector = new StopGenerationDetector();
|
|
111
120
|
const functionSyntaxStartDetector = new StopGenerationDetector();
|
|
112
121
|
const functionSyntaxEndDetector = new StopGenerationDetector();
|
|
113
122
|
const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
|
|
@@ -121,8 +130,8 @@ export class LlamaChat {
|
|
|
121
130
|
let lastContextWindowHistory = resolvedHistory;
|
|
122
131
|
let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
|
|
123
132
|
const ensureNotAborted = () => {
|
|
124
|
-
if (signal?.aborted)
|
|
125
|
-
throw
|
|
133
|
+
if (signal?.aborted && (!stopOnAbortSignal || res.length === 0))
|
|
134
|
+
throw signal.reason;
|
|
126
135
|
if (this._sequence == null)
|
|
127
136
|
throw new DisposedError();
|
|
128
137
|
};
|
|
@@ -132,7 +141,7 @@ export class LlamaChat {
|
|
|
132
141
|
let punishTokens = res.slice(-repeatPenaltyLastTokens);
|
|
133
142
|
if (punishTokensFilter != null)
|
|
134
143
|
punishTokens = punishTokensFilter(punishTokens);
|
|
135
|
-
if (!penalizeNewLine) {
|
|
144
|
+
if (penalizeNewLine == null || !penalizeNewLine) {
|
|
136
145
|
const nlToken = model.tokens.nl;
|
|
137
146
|
if (nlToken != null)
|
|
138
147
|
punishTokens = punishTokens.filter(token => token !== nlToken);
|
|
@@ -173,7 +182,7 @@ export class LlamaChat {
|
|
|
173
182
|
ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
|
|
174
183
|
.map((stopTrigger) => {
|
|
175
184
|
if (typeof stopTrigger === "string")
|
|
176
|
-
return model.tokenize(stopTrigger);
|
|
185
|
+
return model.tokenize(stopTrigger, false, "trimLeadingSpace");
|
|
177
186
|
else
|
|
178
187
|
return [stopTrigger];
|
|
179
188
|
})
|
|
@@ -181,7 +190,7 @@ export class LlamaChat {
|
|
|
181
190
|
const newPendingTokens = mostExhaustiveTriggeredStop.remainingGenerations
|
|
182
191
|
.map((generation) => {
|
|
183
192
|
if (typeof generation === "string")
|
|
184
|
-
return model.tokenize(generation);
|
|
193
|
+
return model.tokenize(generation, false, "trimLeadingSpace");
|
|
185
194
|
else
|
|
186
195
|
return generation;
|
|
187
196
|
})
|
|
@@ -192,8 +201,11 @@ export class LlamaChat {
|
|
|
192
201
|
}
|
|
193
202
|
}
|
|
194
203
|
};
|
|
204
|
+
if (customStopTriggers != null)
|
|
205
|
+
StopGenerationDetector.resolveStopTriggers(customStopTriggers, model.tokenizer)
|
|
206
|
+
.map((stopTrigger) => customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
|
|
195
207
|
if (grammar != null)
|
|
196
|
-
StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.
|
|
208
|
+
StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenizer)
|
|
197
209
|
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
198
210
|
if (functions != null && Object.keys(functions).length > 0)
|
|
199
211
|
functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
|
|
@@ -206,7 +218,7 @@ export class LlamaChat {
|
|
|
206
218
|
resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
|
|
207
219
|
resolvedContextShift,
|
|
208
220
|
lastHistoryCompressionMetadata,
|
|
209
|
-
pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
|
|
221
|
+
pendingTokensCount: ignoredStartTextTokens.length + pendingTokens.length + queuedChunkTokens.length,
|
|
210
222
|
isFirstEvaluation,
|
|
211
223
|
chatWrapper: this._chatWrapper,
|
|
212
224
|
lastEvaluationContextWindowHistory,
|
|
@@ -218,15 +230,15 @@ export class LlamaChat {
|
|
|
218
230
|
});
|
|
219
231
|
ensureNotAborted();
|
|
220
232
|
if (generatedTokens === 0) {
|
|
221
|
-
StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.
|
|
233
|
+
StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenizer)
|
|
222
234
|
.map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
|
|
223
235
|
if (functionsEnabled) {
|
|
224
236
|
initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
|
|
225
|
-
StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.
|
|
237
|
+
StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenizer)
|
|
226
238
|
.map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
|
|
227
239
|
if (initiallyEngagedFunctionMode) {
|
|
228
240
|
inFunctionEvaluationMode = true;
|
|
229
|
-
functionsGrammar = new FunctionCallGrammar(functions, this._chatWrapper, true);
|
|
241
|
+
functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, true);
|
|
230
242
|
functionsEvaluationState = new LlamaGrammarEvaluationState({
|
|
231
243
|
grammar: functionsGrammar
|
|
232
244
|
});
|
|
@@ -239,10 +251,10 @@ export class LlamaChat {
|
|
|
239
251
|
lastContextWindowHistory = contextWindowHistory;
|
|
240
252
|
const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
|
|
241
253
|
const contextWindowsRes = [];
|
|
242
|
-
StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.
|
|
254
|
+
StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenizer)
|
|
243
255
|
.map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
|
|
244
256
|
if (functionsGrammar != null)
|
|
245
|
-
StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.
|
|
257
|
+
StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenizer)
|
|
246
258
|
.map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
|
|
247
259
|
let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
|
|
248
260
|
// we need to decode at least one token to generate a response
|
|
@@ -257,7 +269,7 @@ export class LlamaChat {
|
|
|
257
269
|
ensureNotAborted();
|
|
258
270
|
}
|
|
259
271
|
const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
|
|
260
|
-
temperature, topK, topP,
|
|
272
|
+
temperature, minP, topK, topP,
|
|
261
273
|
grammarEvaluationState: () => {
|
|
262
274
|
if (inFunctionEvaluationMode)
|
|
263
275
|
return functionsEvaluationState;
|
|
@@ -269,194 +281,279 @@ export class LlamaChat {
|
|
|
269
281
|
frequencyPenalty,
|
|
270
282
|
presencePenalty
|
|
271
283
|
},
|
|
284
|
+
tokenBias,
|
|
272
285
|
evaluationPriority,
|
|
273
|
-
|
|
286
|
+
yieldEogToken: true
|
|
274
287
|
}));
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
locksToReleaseOnValidGeneration.
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
288
|
+
try {
|
|
289
|
+
let currentIteration = await evaluationIterator.next();
|
|
290
|
+
while (currentIteration.done !== true) {
|
|
291
|
+
const token = currentIteration.value;
|
|
292
|
+
let replacementToken = undefined;
|
|
293
|
+
ensureNotAborted();
|
|
294
|
+
generatedTokens++;
|
|
295
|
+
const tokens = [token];
|
|
296
|
+
const text = model.detokenize([token]);
|
|
297
|
+
const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
|
|
298
|
+
if (initiallyEngagedFunctionMode)
|
|
299
|
+
disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
|
|
300
|
+
if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
|
|
301
|
+
locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
while (locksToReleaseOnValidGeneration.length > 0)
|
|
305
|
+
locksToReleaseOnValidGeneration.shift().dispose();
|
|
306
|
+
}
|
|
307
|
+
functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
308
|
+
if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
|
|
309
|
+
initiallyEngagedFunctionMode = false;
|
|
310
|
+
let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
|
|
311
|
+
if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
|
|
312
|
+
const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
|
|
313
|
+
try {
|
|
314
|
+
const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
|
|
315
|
+
enableInternalBuiltinFunctions: true,
|
|
316
|
+
initialFunctionCallEngaged: true
|
|
317
|
+
});
|
|
318
|
+
const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
|
|
319
|
+
if (internalBuiltinFunctions[functionName] != null) {
|
|
320
|
+
shouldStopFunctionEvaluationMode = true;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
catch (err) {
|
|
324
|
+
if (!(err instanceof LlamaFunctionCallValidationError))
|
|
325
|
+
throw err;
|
|
304
326
|
}
|
|
305
327
|
}
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
328
|
+
if (shouldStopFunctionEvaluationMode) {
|
|
329
|
+
inFunctionEvaluationMode = false;
|
|
330
|
+
functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
|
|
331
|
+
functionsEvaluationState = new LlamaGrammarEvaluationState({
|
|
332
|
+
grammar: functionsGrammar
|
|
333
|
+
});
|
|
334
|
+
functionCallTokens.length = 0;
|
|
335
|
+
while (functionCallTokenSyntaxLocks.length > 0)
|
|
336
|
+
functionCallTokenSyntaxLocks.shift().dispose();
|
|
337
|
+
functionSyntaxStartDetector.clearInProgressStops();
|
|
338
|
+
functionSyntaxStartDetector.clearTriggeredStops();
|
|
339
|
+
functionSyntaxEndDetector.clearInProgressStops();
|
|
340
|
+
functionSyntaxEndDetector.clearTriggeredStops();
|
|
309
341
|
}
|
|
310
342
|
}
|
|
311
|
-
if (
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
functionSyntaxStartDetector.
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
343
|
+
if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
|
|
344
|
+
functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
|
|
345
|
+
inFunctionEvaluationMode = true;
|
|
346
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
347
|
+
stopGenerationDetector.clearTriggeredStops();
|
|
348
|
+
stopGenerationDetector.clearInProgressStops();
|
|
349
|
+
customStopGenerationTriggersDetector.clearTriggeredStops();
|
|
350
|
+
customStopGenerationTriggersDetector.clearInProgressStops();
|
|
351
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
352
|
+
const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
|
|
353
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
|
|
354
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
355
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
356
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
357
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
358
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
359
|
+
.flat(1);
|
|
360
|
+
const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
|
|
361
|
+
? ""
|
|
362
|
+
: typeof firstRemainingGenerationAfterStop === "string"
|
|
363
|
+
? firstRemainingGenerationAfterStop
|
|
364
|
+
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
365
|
+
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
|
|
366
|
+
for (const functionCallToken of functionCallTokens)
|
|
367
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
368
|
+
// these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
|
|
369
|
+
// or the context state should be modified to not include the incompatible tokens
|
|
370
|
+
const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
|
|
371
|
+
let unfitTokens = [];
|
|
372
|
+
for (let i = 0; i < remainingTextTokens.length; i++) {
|
|
373
|
+
const remainingToken = remainingTextTokens[i];
|
|
374
|
+
const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
375
|
+
if (!canBeNextToken) {
|
|
376
|
+
unfitTokens = remainingTextTokens.slice(i);
|
|
377
|
+
break;
|
|
378
|
+
}
|
|
379
|
+
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, remainingToken);
|
|
380
|
+
functionCallTokens.push(remainingToken);
|
|
381
|
+
}
|
|
382
|
+
if (unfitTokens.length > 0) {
|
|
383
|
+
const unfitTokensText = model.detokenize(unfitTokens); // the current token text must end with it
|
|
384
|
+
const currentTokenText = queuedTokenRelease.text;
|
|
385
|
+
let replacementTokens;
|
|
386
|
+
if (!currentTokenText.endsWith(unfitTokensText)) {
|
|
387
|
+
console.warn(getConsoleLogPrefix() + "The current token text does not end with the unfit function call syntax tokens text");
|
|
388
|
+
replacementTokens = remainingTextTokens.slice(0, -unfitTokens.length);
|
|
389
|
+
}
|
|
390
|
+
else {
|
|
391
|
+
const newCurrentTokensText = currentTokenText.slice(0, -unfitTokensText.length);
|
|
392
|
+
replacementTokens = model.tokenize(newCurrentTokensText, false, "trimLeadingSpace");
|
|
393
|
+
}
|
|
394
|
+
if (replacementTokens.length > 0) {
|
|
395
|
+
replacementToken = replacementTokens[0];
|
|
396
|
+
queuedTokenRelease.modifyTokensAndText(replacementTokens, model.detokenize([replacementToken]));
|
|
397
|
+
}
|
|
398
|
+
}
|
|
324
399
|
}
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
330
|
-
stopGenerationDetector.clearTriggeredStops();
|
|
331
|
-
stopGenerationDetector.clearInProgressStops();
|
|
332
|
-
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
333
|
-
const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
|
|
334
|
-
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
|
|
335
|
-
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
|
|
336
|
-
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
337
|
-
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
338
|
-
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
339
|
-
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
340
|
-
.flat(1);
|
|
341
|
-
const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
|
|
342
|
-
? ""
|
|
343
|
-
: typeof firstRemainingGenerationAfterStop === "string"
|
|
344
|
-
? firstRemainingGenerationAfterStop
|
|
345
|
-
: model.detokenize(firstRemainingGenerationAfterStop);
|
|
346
|
-
functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop));
|
|
347
|
-
for (const functionCallToken of functionCallTokens)
|
|
348
|
-
context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
|
|
349
|
-
}
|
|
350
|
-
else if (inFunctionEvaluationMode) {
|
|
351
|
-
functionCallTokens.push(...tokens);
|
|
352
|
-
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
353
|
-
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
354
|
-
}
|
|
355
|
-
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
356
|
-
const functionCallText = model.detokenize(functionCallTokens);
|
|
357
|
-
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
358
|
-
let modelResponse = model.detokenize(res);
|
|
359
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
360
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
361
|
-
modelResponse = modelResponse.trimEnd();
|
|
362
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
400
|
+
else if (inFunctionEvaluationMode) {
|
|
401
|
+
functionCallTokens.push(...tokens);
|
|
402
|
+
functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
|
|
403
|
+
functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
363
404
|
}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
functionCall: functionCall,
|
|
373
|
-
metadata: {
|
|
374
|
-
stopReason: "functionCall"
|
|
405
|
+
if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
|
|
406
|
+
const functionCallText = model.detokenize(functionCallTokens);
|
|
407
|
+
const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
|
|
408
|
+
let modelResponse = model.detokenize(res);
|
|
409
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
410
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
411
|
+
modelResponse = modelResponse.trimEnd();
|
|
412
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
375
413
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
390
|
-
.flat(1);
|
|
391
|
-
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
392
|
-
if (pendingTokens.length > 0)
|
|
393
|
-
onToken?.(pendingTokens.slice());
|
|
394
|
-
res.push(...pendingTokens);
|
|
395
|
-
contextWindowsRes.push(...pendingTokens);
|
|
396
|
-
pendingTokens.length = 0;
|
|
397
|
-
let modelResponse = model.detokenize(res);
|
|
398
|
-
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
399
|
-
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
400
|
-
modelResponse = modelResponse.trimEnd();
|
|
401
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
414
|
+
return {
|
|
415
|
+
response: modelResponse,
|
|
416
|
+
lastEvaluation: {
|
|
417
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
418
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
419
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
420
|
+
},
|
|
421
|
+
// prevent infinite TS type instantiation
|
|
422
|
+
functionCall: functionCall,
|
|
423
|
+
metadata: {
|
|
424
|
+
stopReason: "functionCall"
|
|
425
|
+
}
|
|
426
|
+
};
|
|
402
427
|
}
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
409
|
-
},
|
|
410
|
-
metadata: {
|
|
411
|
-
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
412
|
-
stopReason: token === eosToken
|
|
413
|
-
? "eosToken"
|
|
414
|
-
: "stopGenerationTrigger"
|
|
415
|
-
}
|
|
416
|
-
};
|
|
417
|
-
}
|
|
418
|
-
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
419
|
-
if (res.length === 0) {
|
|
420
|
-
ignoreStartTextDetector.clearInProgressStops();
|
|
421
|
-
ignoreStartTextDetector.clearTriggeredStops();
|
|
422
|
-
ignoreStartTextDetector.recordGeneration({
|
|
423
|
-
text: model.detokenize(pendingTokens),
|
|
424
|
-
tokens: pendingTokens
|
|
425
|
-
});
|
|
426
|
-
}
|
|
427
|
-
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
428
|
+
if (!inFunctionEvaluationMode) {
|
|
429
|
+
stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
430
|
+
customStopGenerationTriggersDetector.recordGeneration({ text, tokens, queuedTokenRelease });
|
|
431
|
+
}
|
|
432
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
428
433
|
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
429
|
-
if (
|
|
430
|
-
|
|
434
|
+
if (stopGenerationDetector.hasTriggeredStops || customStopGenerationTriggersDetector.hasTriggeredStops ||
|
|
435
|
+
model.isEogToken(token)) {
|
|
436
|
+
stopGenerationDetector.clearInProgressStops();
|
|
437
|
+
customStopGenerationTriggersDetector.clearInProgressStops();
|
|
438
|
+
pendingTokens.push(...streamRegulator.popFreeChunkTokens());
|
|
439
|
+
const triggeredStops = stopGenerationDetector.hasTriggeredStops
|
|
440
|
+
? stopGenerationDetector.getTriggeredStops()
|
|
441
|
+
: customStopGenerationTriggersDetector.getTriggeredStops();
|
|
442
|
+
const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
|
|
443
|
+
const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
|
|
444
|
+
pendingTokens.push(...queuedTokensBeforeStopTrigger);
|
|
445
|
+
const [firstRemainingGenerationAfterStop] = triggeredStops
|
|
446
|
+
.map((stopTrigger) => stopTrigger.remainingGenerations)
|
|
447
|
+
.filter((remainingGenerations) => remainingGenerations.length > 0)
|
|
448
|
+
.flat(1);
|
|
449
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
450
|
+
if (pendingTokens.length > 0)
|
|
451
|
+
onToken?.(pendingTokens.slice());
|
|
431
452
|
res.push(...pendingTokens);
|
|
432
453
|
contextWindowsRes.push(...pendingTokens);
|
|
433
454
|
pendingTokens.length = 0;
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
442
|
-
}
|
|
443
|
-
return {
|
|
444
|
-
response: modelResponse,
|
|
445
|
-
lastEvaluation: {
|
|
455
|
+
let modelResponse = model.detokenize(res);
|
|
456
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
457
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
458
|
+
modelResponse = modelResponse.trimEnd();
|
|
459
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
460
|
+
}
|
|
461
|
+
const lastEvaluation = {
|
|
446
462
|
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
447
463
|
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
448
464
|
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
|
|
465
|
+
};
|
|
466
|
+
const isEogToken = model.isEogToken(token);
|
|
467
|
+
if (isEogToken || stopGenerationDetector.hasTriggeredStops) {
|
|
468
|
+
return {
|
|
469
|
+
response: modelResponse,
|
|
470
|
+
lastEvaluation,
|
|
471
|
+
metadata: {
|
|
472
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
473
|
+
stopReason: isEogToken
|
|
474
|
+
? "eogToken"
|
|
475
|
+
: "stopGenerationTrigger"
|
|
476
|
+
}
|
|
477
|
+
};
|
|
452
478
|
}
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
479
|
+
return {
|
|
480
|
+
response: modelResponse,
|
|
481
|
+
lastEvaluation,
|
|
482
|
+
metadata: {
|
|
483
|
+
remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
|
|
484
|
+
stopReason: "customStopTrigger",
|
|
485
|
+
customStopTrigger: triggeredStops[0].stopTrigger
|
|
486
|
+
}
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
|
|
490
|
+
if (res.length === 0) {
|
|
491
|
+
ignoreStartTextDetector.clearInProgressStops();
|
|
492
|
+
ignoreStartTextDetector.clearTriggeredStops();
|
|
493
|
+
ignoreStartTextDetector.recordGeneration({
|
|
494
|
+
text: model.detokenize(pendingTokens),
|
|
495
|
+
tokens: pendingTokens
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
|
|
499
|
+
removeFoundStartIgnoreTextsFromPendingTokens();
|
|
500
|
+
if (pendingTokens.length > 0) {
|
|
501
|
+
onToken?.(pendingTokens.slice());
|
|
502
|
+
res.push(...pendingTokens);
|
|
503
|
+
contextWindowsRes.push(...pendingTokens);
|
|
504
|
+
pendingTokens.length = 0;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
if (maxTokensTriggered) {
|
|
508
|
+
let modelResponse = model.detokenize(res);
|
|
509
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
510
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
511
|
+
modelResponse = modelResponse.trimEnd();
|
|
512
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
513
|
+
}
|
|
514
|
+
return {
|
|
515
|
+
response: modelResponse,
|
|
516
|
+
lastEvaluation: {
|
|
517
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
518
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
519
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
520
|
+
},
|
|
521
|
+
metadata: {
|
|
522
|
+
stopReason: "maxTokens"
|
|
523
|
+
}
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
if (this._sequence.nextTokenIndex >= context.contextSize - 1) {
|
|
527
|
+
shouldContextShift = true;
|
|
528
|
+
break;
|
|
529
|
+
}
|
|
530
|
+
if (signal?.aborted && stopOnAbortSignal) {
|
|
531
|
+
if (res.length === 0)
|
|
532
|
+
throw signal.reason;
|
|
533
|
+
let modelResponse = model.detokenize(res);
|
|
534
|
+
let contextWindowModelResponse = model.detokenize(contextWindowsRes);
|
|
535
|
+
if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
|
|
536
|
+
modelResponse = modelResponse.trimEnd();
|
|
537
|
+
contextWindowModelResponse = contextWindowModelResponse.trimEnd();
|
|
538
|
+
}
|
|
539
|
+
return {
|
|
540
|
+
response: modelResponse,
|
|
541
|
+
lastEvaluation: {
|
|
542
|
+
contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
|
|
543
|
+
cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
|
|
544
|
+
contextShiftMetadata: lastHistoryCompressionMetadata
|
|
545
|
+
},
|
|
546
|
+
metadata: {
|
|
547
|
+
stopReason: "abort"
|
|
548
|
+
}
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
currentIteration = await evaluationIterator.next(replacementToken);
|
|
458
552
|
}
|
|
459
553
|
}
|
|
554
|
+
finally {
|
|
555
|
+
await evaluationIterator.return();
|
|
556
|
+
}
|
|
460
557
|
isFirstEvaluation = false;
|
|
461
558
|
if (shouldContextShift)
|
|
462
559
|
continue;
|
|
@@ -530,33 +627,13 @@ async function compressHistoryToFitContextSize({ history, contextShiftSize, cont
|
|
|
530
627
|
lastShiftMetadata: contextShiftLastEvaluationMetadata
|
|
531
628
|
});
|
|
532
629
|
if (!checkIfHistoryFitsContext(chatHistory))
|
|
533
|
-
throw new Error("The default context shift strategy did not return a history that fits the context size"
|
|
630
|
+
throw new Error("The default context shift strategy did not return a history that fits the context size. " +
|
|
631
|
+
"This may happen due to the system prompt being too long");
|
|
534
632
|
return {
|
|
535
633
|
compressedHistory: chatHistory,
|
|
536
634
|
metadata
|
|
537
635
|
};
|
|
538
636
|
}
|
|
539
|
-
function getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, tokenizer) {
|
|
540
|
-
if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length === 0)
|
|
541
|
-
return [];
|
|
542
|
-
else if (partiallyFreeTokens.tokens.length !== 0 && partiallyFreeTokens.text.length === 0)
|
|
543
|
-
return partiallyFreeTokens.tokens;
|
|
544
|
-
else if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length !== 0)
|
|
545
|
-
return tokenizer(partiallyFreeTokens.text);
|
|
546
|
-
const triggerThatStartsWithStringIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] === "string");
|
|
547
|
-
const triggerThatStartsWithTokenIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] !== "string");
|
|
548
|
-
if (triggerThatStartsWithTokenIndex > 0 && triggerThatStartsWithStringIndex < 0)
|
|
549
|
-
return partiallyFreeTokens.tokens;
|
|
550
|
-
else if (triggerThatStartsWithStringIndex > 0 && triggerThatStartsWithTokenIndex < 0)
|
|
551
|
-
return tokenizer(partiallyFreeTokens.text);
|
|
552
|
-
const stringTokens = tokenizer(partiallyFreeTokens.text);
|
|
553
|
-
if (stringTokens.length === partiallyFreeTokens.tokens.length &&
|
|
554
|
-
stringTokens.every((value, index) => value === partiallyFreeTokens.tokens[index]))
|
|
555
|
-
return stringTokens;
|
|
556
|
-
else if (triggerThatStartsWithStringIndex < triggerThatStartsWithTokenIndex)
|
|
557
|
-
return stringTokens;
|
|
558
|
-
return partiallyFreeTokens.tokens;
|
|
559
|
-
}
|
|
560
637
|
function getLastTextModelResponseFromChatHistory(chatHistory) {
|
|
561
638
|
if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
|
|
562
639
|
return "";
|
|
@@ -604,7 +681,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
604
681
|
availableFunctions: functions,
|
|
605
682
|
documentFunctionParams
|
|
606
683
|
});
|
|
607
|
-
const tokens = contextText.tokenize(model.
|
|
684
|
+
const tokens = contextText.tokenize(model.tokenizer);
|
|
608
685
|
if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
|
|
609
686
|
const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
|
|
610
687
|
const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
|
|
@@ -630,11 +707,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
630
707
|
: resolvedContextShift.size;
|
|
631
708
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
632
709
|
history: resolvedHistory,
|
|
633
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
710
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
634
711
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
635
712
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
636
713
|
contextSize: context.contextSize,
|
|
637
|
-
tokenizer: model.
|
|
714
|
+
tokenizer: model.tokenizer,
|
|
638
715
|
chatWrapper: chatWrapper,
|
|
639
716
|
functions,
|
|
640
717
|
documentFunctionParams
|
|
@@ -646,7 +723,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
646
723
|
return {
|
|
647
724
|
history: compressedHistory,
|
|
648
725
|
stopGenerationTriggers,
|
|
649
|
-
tokens: contextText.tokenize(model.
|
|
726
|
+
tokens: contextText.tokenize(model.tokenizer),
|
|
650
727
|
newResolvedHistory: resolvedHistory,
|
|
651
728
|
newHistoryCompressionMetadata: metadata,
|
|
652
729
|
ignoreStartText: ignoreStartText ?? [],
|
|
@@ -659,7 +736,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
659
736
|
availableFunctions: functions,
|
|
660
737
|
documentFunctionParams
|
|
661
738
|
});
|
|
662
|
-
const tokens = contextText.tokenize(model.
|
|
739
|
+
const tokens = contextText.tokenize(model.tokenizer);
|
|
663
740
|
if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
|
|
664
741
|
return {
|
|
665
742
|
history: resolvedHistory,
|
|
@@ -672,16 +749,16 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
672
749
|
disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
|
|
673
750
|
};
|
|
674
751
|
}
|
|
675
|
-
const contextShiftSize = resolvedContextShift.size instanceof Function
|
|
752
|
+
const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
|
|
676
753
|
? await resolvedContextShift.size(sequence)
|
|
677
|
-
: resolvedContextShift.size;
|
|
754
|
+
: resolvedContextShift.size)));
|
|
678
755
|
const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
|
|
679
756
|
history: resolvedHistory,
|
|
680
|
-
contextShiftSize: Math.max(contextShiftSize,
|
|
757
|
+
contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
|
|
681
758
|
contextShiftStrategy: resolvedContextShift.strategy,
|
|
682
759
|
contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
|
|
683
760
|
contextSize: context.contextSize,
|
|
684
|
-
tokenizer: model.
|
|
761
|
+
tokenizer: model.tokenizer,
|
|
685
762
|
chatWrapper: chatWrapper,
|
|
686
763
|
functions,
|
|
687
764
|
documentFunctionParams
|
|
@@ -693,7 +770,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
|
|
|
693
770
|
return {
|
|
694
771
|
history: compressedHistory,
|
|
695
772
|
stopGenerationTriggers,
|
|
696
|
-
tokens: contextText.tokenize(model.
|
|
773
|
+
tokens: contextText.tokenize(model.tokenizer),
|
|
697
774
|
newResolvedHistory: resolvedHistory,
|
|
698
775
|
newHistoryCompressionMetadata: metadata,
|
|
699
776
|
ignoreStartText: ignoreStartText ?? [],
|