@realtimex/node-llama-cpp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +108 -0
- package/dist/ChatWrapper.d.ts +27 -0
- package/dist/ChatWrapper.js +233 -0
- package/dist/ChatWrapper.js.map +1 -0
- package/dist/apiDocsIndex.d.ts +1 -0
- package/dist/apiDocsIndex.js +7 -0
- package/dist/apiDocsIndex.js.map +1 -0
- package/dist/bindings/AddonTypes.d.ts +203 -0
- package/dist/bindings/AddonTypes.js +2 -0
- package/dist/bindings/AddonTypes.js.map +1 -0
- package/dist/bindings/Llama.d.ts +104 -0
- package/dist/bindings/Llama.js +570 -0
- package/dist/bindings/Llama.js.map +1 -0
- package/dist/bindings/consts.d.ts +2 -0
- package/dist/bindings/consts.js +13 -0
- package/dist/bindings/consts.js.map +1 -0
- package/dist/bindings/getLlama.d.ts +297 -0
- package/dist/bindings/getLlama.js +569 -0
- package/dist/bindings/getLlama.js.map +1 -0
- package/dist/bindings/types.d.ts +72 -0
- package/dist/bindings/types.js +105 -0
- package/dist/bindings/types.js.map +1 -0
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +23 -0
- package/dist/bindings/utils/MemoryOrchestrator.js +50 -0
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
- package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
- package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
- package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
- package/dist/bindings/utils/asyncEvery.d.ts +5 -0
- package/dist/bindings/utils/asyncEvery.js +15 -0
- package/dist/bindings/utils/asyncEvery.js.map +1 -0
- package/dist/bindings/utils/asyncSome.d.ts +5 -0
- package/dist/bindings/utils/asyncSome.js +29 -0
- package/dist/bindings/utils/asyncSome.js.map +1 -0
- package/dist/bindings/utils/binariesGithubRelease.d.ts +6 -0
- package/dist/bindings/utils/binariesGithubRelease.js +15 -0
- package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
- package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
- package/dist/bindings/utils/compileLLamaCpp.d.ts +22 -0
- package/dist/bindings/utils/compileLLamaCpp.js +526 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +18 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +311 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
- package/dist/bindings/utils/detectBuildTools.d.ts +14 -0
- package/dist/bindings/utils/detectBuildTools.js +149 -0
- package/dist/bindings/utils/detectBuildTools.js.map +1 -0
- package/dist/bindings/utils/detectGlibc.d.ts +4 -0
- package/dist/bindings/utils/detectGlibc.js +74 -0
- package/dist/bindings/utils/detectGlibc.js.map +1 -0
- package/dist/bindings/utils/disposeBeforeExit.d.ts +8 -0
- package/dist/bindings/utils/disposeBeforeExit.js +36 -0
- package/dist/bindings/utils/disposeBeforeExit.js.map +1 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
- package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +6 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +105 -0
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
- package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
- package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +12 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +39 -0
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
- package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
- package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
- package/dist/bindings/utils/getLlamaGpuTypes.d.ts +13 -0
- package/dist/bindings/utils/getLlamaGpuTypes.js +36 -0
- package/dist/bindings/utils/getLlamaGpuTypes.js.map +1 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js +40 -0
- package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
- package/dist/bindings/utils/getPlatform.d.ts +2 -0
- package/dist/bindings/utils/getPlatform.js +15 -0
- package/dist/bindings/utils/getPlatform.js.map +1 -0
- package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
- package/dist/bindings/utils/getPlatformInfo.js +28 -0
- package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
- package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
- package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
- package/dist/bindings/utils/hasFileInPath.js +34 -0
- package/dist/bindings/utils/hasFileInPath.js.map +1 -0
- package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
- package/dist/bindings/utils/lastBuildInfo.js +17 -0
- package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
- package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.d.ts +1 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.js +18 -0
- package/dist/bindings/utils/resolveActualBindingBinaryPath.js.map +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js +43 -0
- package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.d.ts +2 -0
- package/dist/bindings/utils/testBindingBinary.js +269 -0
- package/dist/bindings/utils/testBindingBinary.js.map +1 -0
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/chatWrappers/AlpacaChatWrapper.d.ts +17 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js +33 -0
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/ChatMLChatWrapper.d.ts +6 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js +85 -0
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
- package/dist/chatWrappers/DeepSeekChatWrapper.d.ts +37 -0
- package/dist/chatWrappers/DeepSeekChatWrapper.js +294 -0
- package/dist/chatWrappers/DeepSeekChatWrapper.js.map +1 -0
- package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
- package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FalconChatWrapper.d.ts +19 -0
- package/dist/chatWrappers/FalconChatWrapper.js +126 -0
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +17 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js +622 -0
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GemmaChatWrapper.d.ts +7 -0
- package/dist/chatWrappers/GemmaChatWrapper.js +96 -0
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
- package/dist/chatWrappers/GeneralChatWrapper.d.ts +19 -0
- package/dist/chatWrappers/GeneralChatWrapper.js +140 -0
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/HarmonyChatWrapper.d.ts +78 -0
- package/dist/chatWrappers/HarmonyChatWrapper.js +539 -0
- package/dist/chatWrappers/HarmonyChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama2ChatWrapper.d.ts +12 -0
- package/dist/chatWrappers/Llama2ChatWrapper.js +95 -0
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3ChatWrapper.d.ts +16 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js +173 -0
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +32 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +290 -0
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.d.ts +35 -0
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js +264 -0
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js.map +1 -0
- package/dist/chatWrappers/MistralChatWrapper.d.ts +16 -0
- package/dist/chatWrappers/MistralChatWrapper.js +180 -0
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/QwenChatWrapper.d.ts +36 -0
- package/dist/chatWrappers/QwenChatWrapper.js +344 -0
- package/dist/chatWrappers/QwenChatWrapper.js.map +1 -0
- package/dist/chatWrappers/SeedChatWrapper.d.ts +25 -0
- package/dist/chatWrappers/SeedChatWrapper.js +183 -0
- package/dist/chatWrappers/SeedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +138 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +665 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +76 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +212 -0
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.d.ts +7 -0
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js +30 -0
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +24 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.d.ts +25 -0
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +690 -0
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.d.ts +2 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +66 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.d.ts +6 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.js +19 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.js.map +1 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.d.ts +2 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js +35 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js.map +1 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.d.ts +22 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js +28 -0
- package/dist/chatWrappers/generic/utils/templateSegmentOptionsToChatWrapperSettings.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +76 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +177 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
- package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
- package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
- package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
- package/dist/chatWrappers/utils/getModelLinageNames.d.ts +2 -0
- package/dist/chatWrappers/utils/getModelLinageNames.js +18 -0
- package/dist/chatWrappers/utils/getModelLinageNames.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +394 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
- package/dist/chatWrappers/utils/isLlama3_2LightweightModel.d.ts +2 -0
- package/dist/chatWrappers/utils/isLlama3_2LightweightModel.js +9 -0
- package/dist/chatWrappers/utils/isLlama3_2LightweightModel.js.map +1 -0
- package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
- package/dist/chatWrappers/utils/jsonDumps.js +18 -0
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +148 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js +325 -0
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
- package/dist/cli/cli.d.ts +2 -0
- package/dist/cli/cli.js +45 -0
- package/dist/cli/cli.js.map +1 -0
- package/dist/cli/commands/ChatCommand.d.ts +59 -0
- package/dist/cli/commands/ChatCommand.js +856 -0
- package/dist/cli/commands/ChatCommand.js.map +1 -0
- package/dist/cli/commands/CompleteCommand.d.ts +47 -0
- package/dist/cli/commands/CompleteCommand.js +658 -0
- package/dist/cli/commands/CompleteCommand.js.map +1 -0
- package/dist/cli/commands/DebugCommand.d.ts +7 -0
- package/dist/cli/commands/DebugCommand.js +55 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -0
- package/dist/cli/commands/InfillCommand.d.ts +49 -0
- package/dist/cli/commands/InfillCommand.js +693 -0
- package/dist/cli/commands/InfillCommand.js.map +1 -0
- package/dist/cli/commands/InitCommand.d.ts +12 -0
- package/dist/cli/commands/InitCommand.js +230 -0
- package/dist/cli/commands/InitCommand.js.map +1 -0
- package/dist/cli/commands/OnPostInstallCommand.d.ts +4 -0
- package/dist/cli/commands/OnPostInstallCommand.js +62 -0
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -0
- package/dist/cli/commands/PullCommand.d.ts +13 -0
- package/dist/cli/commands/PullCommand.js +158 -0
- package/dist/cli/commands/PullCommand.js.map +1 -0
- package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/InspectCommand.js +21 -0
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +17 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +275 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +13 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +230 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +296 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +26 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +869 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
- package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
- package/dist/cli/commands/source/SourceCommand.js +19 -0
- package/dist/cli/commands/source/SourceCommand.js.map +1 -0
- package/dist/cli/commands/source/commands/BuildCommand.d.ts +16 -0
- package/dist/cli/commands/source/commands/BuildCommand.js +148 -0
- package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
- package/dist/cli/commands/source/commands/ClearCommand.d.ts +7 -0
- package/dist/cli/commands/source/commands/ClearCommand.js +54 -0
- package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
- package/dist/cli/commands/source/commands/DownloadCommand.d.ts +16 -0
- package/dist/cli/commands/source/commands/DownloadCommand.js +219 -0
- package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
- package/dist/cli/projectTemplates.d.ts +7 -0
- package/dist/cli/projectTemplates.js +10 -0
- package/dist/cli/projectTemplates.js.map +1 -0
- package/dist/cli/recommendedModels.d.ts +2 -0
- package/dist/cli/recommendedModels.js +428 -0
- package/dist/cli/recommendedModels.js.map +1 -0
- package/dist/cli/startCreateCli.d.ts +2 -0
- package/dist/cli/startCreateCli.js +26 -0
- package/dist/cli/startCreateCli.js.map +1 -0
- package/dist/cli/utils/ConsoleInteraction.d.ts +22 -0
- package/dist/cli/utils/ConsoleInteraction.js +122 -0
- package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/ConsoleTable.d.ts +24 -0
- package/dist/cli/utils/ConsoleTable.js +90 -0
- package/dist/cli/utils/ConsoleTable.js.map +1 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
- package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
- package/dist/cli/utils/consolePromptQuestion.js +81 -0
- package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
- package/dist/cli/utils/getReadablePath.d.ts +1 -0
- package/dist/cli/utils/getReadablePath.js +14 -0
- package/dist/cli/utils/getReadablePath.js.map +1 -0
- package/dist/cli/utils/interactivelyAskForModel.d.ts +13 -0
- package/dist/cli/utils/interactivelyAskForModel.js +485 -0
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
- package/dist/cli/utils/isRunningUnderRosetta.d.ts +1 -0
- package/dist/cli/utils/isRunningUnderRosetta.js +20 -0
- package/dist/cli/utils/isRunningUnderRosetta.js.map +1 -0
- package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
- package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
- package/dist/cli/utils/packageJsonConfig.d.ts +6 -0
- package/dist/cli/utils/packageJsonConfig.js +51 -0
- package/dist/cli/utils/packageJsonConfig.js.map +1 -0
- package/dist/cli/utils/packageManager.d.ts +1 -0
- package/dist/cli/utils/packageManager.js +15 -0
- package/dist/cli/utils/packageManager.js.map +1 -0
- package/dist/cli/utils/parseXtcArg.d.ts +5 -0
- package/dist/cli/utils/parseXtcArg.js +16 -0
- package/dist/cli/utils/parseXtcArg.js.map +1 -0
- package/dist/cli/utils/printCommonInfoLines.d.ts +12 -0
- package/dist/cli/utils/printCommonInfoLines.js +163 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
- package/dist/cli/utils/printInfoLine.d.ts +12 -0
- package/dist/cli/utils/printInfoLine.js +54 -0
- package/dist/cli/utils/printInfoLine.js.map +1 -0
- package/dist/cli/utils/printModelDestination.d.ts +2 -0
- package/dist/cli/utils/printModelDestination.js +11 -0
- package/dist/cli/utils/printModelDestination.js.map +1 -0
- package/dist/cli/utils/projectTemplates.d.ts +19 -0
- package/dist/cli/utils/projectTemplates.js +47 -0
- package/dist/cli/utils/projectTemplates.js.map +1 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +19 -0
- package/dist/cli/utils/resolveCommandGgufPath.js +123 -0
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
- package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
- package/dist/cli/utils/resolveHeaderFlag.js +21 -0
- package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +14 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js +12 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
- package/dist/cli/utils/resolveNpmrcConfig.d.ts +18 -0
- package/dist/cli/utils/resolveNpmrcConfig.js +129 -0
- package/dist/cli/utils/resolveNpmrcConfig.js.map +1 -0
- package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
- package/dist/cli/utils/splitAnsiToLines.js +32 -0
- package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
- package/dist/cli/utils/toBytes.d.ts +1 -0
- package/dist/cli/utils/toBytes.js +5 -0
- package/dist/cli/utils/toBytes.js.map +1 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
- package/dist/commands.d.ts +6 -0
- package/dist/commands.js +9 -0
- package/dist/commands.js.map +1 -0
- package/dist/config.d.ts +79 -0
- package/dist/config.js +127 -0
- package/dist/config.js.map +1 -0
- package/dist/consts.d.ts +4 -0
- package/dist/consts.js +11 -0
- package/dist/consts.js.map +1 -0
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +500 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +2696 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.d.ts +11 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js +55 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallNameGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js +45 -0
- package/dist/evaluator/LlamaChat/utils/FunctionCallParamsGrammar.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.d.ts +8 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js +12 -0
- package/dist/evaluator/LlamaChat/utils/LlamaFunctionCallValidationError.js.map +1 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +260 -0
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +520 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +628 -0
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +45 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +191 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +15 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +16 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
- package/dist/evaluator/LlamaCompletion.d.ts +219 -0
- package/dist/evaluator/LlamaCompletion.js +498 -0
- package/dist/evaluator/LlamaCompletion.js.map +1 -0
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +336 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +1919 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
- package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.d.ts +27 -0
- package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.js +130 -0
- package/dist/evaluator/LlamaContext/LlamaContextSequenceCheckpoints.js.map +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js +39 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
- package/dist/evaluator/LlamaContext/TokenPredictor.d.ts +55 -0
- package/dist/evaluator/LlamaContext/TokenPredictor.js +20 -0
- package/dist/evaluator/LlamaContext/TokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.d.ts +56 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js +266 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.d.ts +58 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js +138 -0
- package/dist/evaluator/LlamaContext/tokenPredictors/InputLookupTokenPredictor.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +602 -0
- package/dist/evaluator/LlamaContext/types.js +2 -0
- package/dist/evaluator/LlamaContext/types.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.d.ts +5 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js +16 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.d.ts +5 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js +42 -0
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/padSafeContextSize.d.ts +1 -0
- package/dist/evaluator/LlamaContext/utils/padSafeContextSize.js +18 -0
- package/dist/evaluator/LlamaContext/utils/padSafeContextSize.js.map +1 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js +13 -0
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
- package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
- package/dist/evaluator/LlamaEmbedding.js +53 -0
- package/dist/evaluator/LlamaEmbedding.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +52 -0
- package/dist/evaluator/LlamaEmbeddingContext.js +86 -0
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
- package/dist/evaluator/LlamaGrammar.d.ts +39 -0
- package/dist/evaluator/LlamaGrammar.js +72 -0
- package/dist/evaluator/LlamaGrammar.js.map +1 -0
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +19 -0
- package/dist/evaluator/LlamaGrammarEvaluationState.js +29 -0
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +17 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +35 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +344 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +853 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +29 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +65 -0
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -0
- package/dist/evaluator/LlamaRankingContext.d.ts +91 -0
- package/dist/evaluator/LlamaRankingContext.js +178 -0
- package/dist/evaluator/LlamaRankingContext.js.map +1 -0
- package/dist/evaluator/TokenBias.d.ts +37 -0
- package/dist/evaluator/TokenBias.js +68 -0
- package/dist/evaluator/TokenBias.js.map +1 -0
- package/dist/evaluator/TokenMeter.d.ts +45 -0
- package/dist/evaluator/TokenMeter.js +74 -0
- package/dist/evaluator/TokenMeter.js.map +1 -0
- package/dist/evaluator/utils/chunkDocument.d.ts +86 -0
- package/dist/evaluator/utils/chunkDocument.js +212 -0
- package/dist/evaluator/utils/chunkDocument.js.map +1 -0
- package/dist/gguf/consts.d.ts +4 -0
- package/dist/gguf/consts.js +12 -0
- package/dist/gguf/consts.js.map +1 -0
- package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
- package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
- package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +36 -0
- package/dist/gguf/fileReaders/GgufFileReader.js +106 -0
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +16 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js +62 -0
- package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +28 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +94 -0
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
- package/dist/gguf/insights/GgufInsights.d.ts +78 -0
- package/dist/gguf/insights/GgufInsights.js +854 -0
- package/dist/gguf/insights/GgufInsights.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +203 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +284 -0
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
- package/dist/gguf/insights/GgufInsightsTokens.d.ts +5 -0
- package/dist/gguf/insights/GgufInsightsTokens.js +40 -0
- package/dist/gguf/insights/GgufInsightsTokens.js.map +1 -0
- package/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.d.ts +5 -0
- package/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js +7 -0
- package/dist/gguf/insights/utils/getRamUsageFromUnifiedVram.js.map +1 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +33 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +117 -0
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +20 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +251 -0
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
- package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
- package/dist/gguf/insights/utils/scoreLevels.js +16 -0
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
- package/dist/gguf/parser/GgufV2Parser.d.ts +20 -0
- package/dist/gguf/parser/GgufV2Parser.js +184 -0
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
- package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
- package/dist/gguf/parser/GgufV3Parser.js +4 -0
- package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
- package/dist/gguf/parser/parseGguf.d.ts +8 -0
- package/dist/gguf/parser/parseGguf.js +61 -0
- package/dist/gguf/parser/parseGguf.js.map +1 -0
- package/dist/gguf/readGgufFileInfo.d.ts +54 -0
- package/dist/gguf/readGgufFileInfo.js +82 -0
- package/dist/gguf/readGgufFileInfo.js.map +1 -0
- package/dist/gguf/types/GgufFileInfoTypes.d.ts +85 -0
- package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
- package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
- package/dist/gguf/types/GgufMetadataTypes.d.ts +480 -0
- package/dist/gguf/types/GgufMetadataTypes.js +194 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +63 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js +54 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
- package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
- package/dist/gguf/utils/GgufReadOffset.js +18 -0
- package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +6 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +76 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
- package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
- package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
- package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
- package/dist/gguf/utils/getGgufMetadataKeyValue.d.ts +1 -0
- package/dist/gguf/utils/getGgufMetadataKeyValue.js +27 -0
- package/dist/gguf/utils/getGgufMetadataKeyValue.js.map +1 -0
- package/dist/gguf/utils/ggufQuantNames.d.ts +2 -0
- package/dist/gguf/utils/ggufQuantNames.js +42 -0
- package/dist/gguf/utils/ggufQuantNames.js.map +1 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +2 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +18 -0
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +38 -0
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
- package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js +64 -0
- package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
- package/dist/index.d.ts +71 -0
- package/dist/index.js +64 -0
- package/dist/index.js.map +1 -0
- package/dist/state.d.ts +6 -0
- package/dist/state.js +22 -0
- package/dist/state.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/dist/types.d.ts +408 -0
- package/dist/types.js +13 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/InsufficientMemoryError.d.ts +3 -0
- package/dist/utils/InsufficientMemoryError.js +6 -0
- package/dist/utils/InsufficientMemoryError.js.map +1 -0
- package/dist/utils/LlamaText.d.ts +92 -0
- package/dist/utils/LlamaText.js +527 -0
- package/dist/utils/LlamaText.js.map +1 -0
- package/dist/utils/LruCache.d.ts +12 -0
- package/dist/utils/LruCache.js +44 -0
- package/dist/utils/LruCache.js.map +1 -0
- package/dist/utils/OpenAIFormat.d.ts +177 -0
- package/dist/utils/OpenAIFormat.js +488 -0
- package/dist/utils/OpenAIFormat.js.map +1 -0
- package/dist/utils/OverridesObject.d.ts +7 -0
- package/dist/utils/OverridesObject.js +2 -0
- package/dist/utils/OverridesObject.js.map +1 -0
- package/dist/utils/ReplHistory.d.ts +9 -0
- package/dist/utils/ReplHistory.js +72 -0
- package/dist/utils/ReplHistory.js.map +1 -0
- package/dist/utils/StopGenerationDetector.d.ts +47 -0
- package/dist/utils/StopGenerationDetector.js +291 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -0
- package/dist/utils/ThreadsSplitter.d.ts +32 -0
- package/dist/utils/ThreadsSplitter.js +177 -0
- package/dist/utils/ThreadsSplitter.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.d.ts +38 -0
- package/dist/utils/TokenStreamRegulator.js +200 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -0
- package/dist/utils/UnsupportedError.d.ts +2 -0
- package/dist/utils/UnsupportedError.js +7 -0
- package/dist/utils/UnsupportedError.js.map +1 -0
- package/dist/utils/appendUserMessageToChatHistory.d.ts +6 -0
- package/dist/utils/appendUserMessageToChatHistory.js +22 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
- package/dist/utils/clearTempFolder.d.ts +1 -0
- package/dist/utils/clearTempFolder.js +16 -0
- package/dist/utils/clearTempFolder.js.map +1 -0
- package/dist/utils/cmake.d.ts +10 -0
- package/dist/utils/cmake.js +146 -0
- package/dist/utils/cmake.js.map +1 -0
- package/dist/utils/compareTokens.d.ts +2 -0
- package/dist/utils/compareTokens.js +4 -0
- package/dist/utils/compareTokens.js.map +1 -0
- package/dist/utils/createModelDownloader.d.ts +262 -0
- package/dist/utils/createModelDownloader.js +486 -0
- package/dist/utils/createModelDownloader.js.map +1 -0
- package/dist/utils/findBestOption.d.ts +4 -0
- package/dist/utils/findBestOption.js +15 -0
- package/dist/utils/findBestOption.js.map +1 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +20 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +85 -0
- package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +19 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +60 -0
- package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -0
- package/dist/utils/gbnfJson/GbnfTerminal.d.ts +11 -0
- package/dist/utils/gbnfJson/GbnfTerminal.js +54 -0
- package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
- package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfAnyJson.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfAnyJson.js +53 -0
- package/dist/utils/gbnfJson/terminals/GbnfAnyJson.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +18 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.js +83 -0
- package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +7 -0
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js +22 -0
- package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.d.ts +7 -0
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js +17 -0
- package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.d.ts +11 -0
- package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.js +28 -0
- package/dist/utils/gbnfJson/terminals/GbnfCommaWhitespace.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfFormatString.d.ts +11 -0
- package/dist/utils/gbnfJson/terminals/GbnfFormatString.js +90 -0
- package/dist/utils/gbnfJson/terminals/GbnfFormatString.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js +23 -0
- package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.d.ts +5 -0
- package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.js +24 -0
- package/dist/utils/gbnfJson/terminals/GbnfInsideStringChar.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +5 -0
- package/dist/utils/gbnfJson/terminals/GbnfNull.js +11 -0
- package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js +22 -0
- package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js +21 -0
- package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +28 -0
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +88 -0
- package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfOr.d.ts +9 -0
- package/dist/utils/gbnfJson/terminals/GbnfOr.js +34 -0
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfRef.d.ts +15 -0
- package/dist/utils/gbnfJson/terminals/GbnfRef.js +34 -0
- package/dist/utils/gbnfJson/terminals/GbnfRef.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +15 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +86 -0
- package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +12 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.js +43 -0
- package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.d.ts +8 -0
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js +26 -0
- package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
- package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +13 -0
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +67 -0
- package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -0
- package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +30 -0
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js +72 -0
- package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -0
- package/dist/utils/gbnfJson/types.d.ts +213 -0
- package/dist/utils/gbnfJson/types.js +30 -0
- package/dist/utils/gbnfJson/types.js.map +1 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
- package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
- package/dist/utils/gbnfJson/utils/defsScope.d.ts +7 -0
- package/dist/utils/gbnfJson/utils/defsScope.js +17 -0
- package/dist/utils/gbnfJson/utils/defsScope.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +5 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +143 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForLiteral.d.ts +5 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForLiteral.js +16 -0
- package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForLiteral.js.map +1 -0
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +8 -0
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +242 -0
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -0
- package/dist/utils/getBuildDefaults.d.ts +5 -0
- package/dist/utils/getBuildDefaults.js +9 -0
- package/dist/utils/getBuildDefaults.js.map +1 -0
- package/dist/utils/getChatWrapperSegmentDefinition.d.ts +2 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js +9 -0
- package/dist/utils/getChatWrapperSegmentDefinition.js.map +1 -0
- package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
- package/dist/utils/getConsoleLogPrefix.js +10 -0
- package/dist/utils/getConsoleLogPrefix.js.map +1 -0
- package/dist/utils/getFirstWritableDir.d.ts +8 -0
- package/dist/utils/getFirstWritableDir.js +60 -0
- package/dist/utils/getFirstWritableDir.js.map +1 -0
- package/dist/utils/getGrammarsFolder.d.ts +2 -0
- package/dist/utils/getGrammarsFolder.js +19 -0
- package/dist/utils/getGrammarsFolder.js.map +1 -0
- package/dist/utils/getLlamaClasses.d.ts +9 -0
- package/dist/utils/getLlamaClasses.js +14 -0
- package/dist/utils/getLlamaClasses.js.map +1 -0
- package/dist/utils/getModuleVersion.d.ts +1 -0
- package/dist/utils/getModuleVersion.js +13 -0
- package/dist/utils/getModuleVersion.js.map +1 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
- package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
- package/dist/utils/getReadableContextSize.d.ts +1 -0
- package/dist/utils/getReadableContextSize.js +7 -0
- package/dist/utils/getReadableContextSize.js.map +1 -0
- package/dist/utils/getTempDir.d.ts +10 -0
- package/dist/utils/getTempDir.js +121 -0
- package/dist/utils/getTempDir.js.map +1 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +205 -0
- package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
- package/dist/utils/gitReleaseBundles.d.ts +2 -0
- package/dist/utils/gitReleaseBundles.js +132 -0
- package/dist/utils/gitReleaseBundles.js.map +1 -0
- package/dist/utils/hashString.d.ts +1 -0
- package/dist/utils/hashString.js +8 -0
- package/dist/utils/hashString.js.map +1 -0
- package/dist/utils/includesText.d.ts +1 -0
- package/dist/utils/includesText.js +12 -0
- package/dist/utils/includesText.js.map +1 -0
- package/dist/utils/isLockfileActive.d.ts +4 -0
- package/dist/utils/isLockfileActive.js +12 -0
- package/dist/utils/isLockfileActive.js.map +1 -0
- package/dist/utils/isToken.d.ts +2 -0
- package/dist/utils/isToken.js +4 -0
- package/dist/utils/isToken.js.map +1 -0
- package/dist/utils/isUrl.d.ts +1 -0
- package/dist/utils/isUrl.js +15 -0
- package/dist/utils/isUrl.js.map +1 -0
- package/dist/utils/mergeUnionTypes.d.ts +10 -0
- package/dist/utils/mergeUnionTypes.js +2 -0
- package/dist/utils/mergeUnionTypes.js.map +1 -0
- package/dist/utils/modelDownloadEndpoints.d.ts +13 -0
- package/dist/utils/modelDownloadEndpoints.js +27 -0
- package/dist/utils/modelDownloadEndpoints.js.map +1 -0
- package/dist/utils/modelFileAccessTokens.d.ts +5 -0
- package/dist/utils/modelFileAccessTokens.js +41 -0
- package/dist/utils/modelFileAccessTokens.js.map +1 -0
- package/dist/utils/optionsMatrix.d.ts +58 -0
- package/dist/utils/optionsMatrix.js +97 -0
- package/dist/utils/optionsMatrix.js.map +1 -0
- package/dist/utils/parseModelFileName.d.ts +15 -0
- package/dist/utils/parseModelFileName.js +132 -0
- package/dist/utils/parseModelFileName.js.map +1 -0
- package/dist/utils/parseModelUri.d.ts +40 -0
- package/dist/utils/parseModelUri.js +346 -0
- package/dist/utils/parseModelUri.js.map +1 -0
- package/dist/utils/parseTextTemplate.d.ts +66 -0
- package/dist/utils/parseTextTemplate.js +116 -0
- package/dist/utils/parseTextTemplate.js.map +1 -0
- package/dist/utils/prettyPrintObject.d.ts +10 -0
- package/dist/utils/prettyPrintObject.js +84 -0
- package/dist/utils/prettyPrintObject.js.map +1 -0
- package/dist/utils/pushAll.d.ts +6 -0
- package/dist/utils/pushAll.js +11 -0
- package/dist/utils/pushAll.js.map +1 -0
- package/dist/utils/removeNullFields.d.ts +2 -0
- package/dist/utils/removeNullFields.js +17 -0
- package/dist/utils/removeNullFields.js.map +1 -0
- package/dist/utils/resolveGithubRelease.d.ts +2 -0
- package/dist/utils/resolveGithubRelease.js +77 -0
- package/dist/utils/resolveGithubRelease.js.map +1 -0
- package/dist/utils/resolveLastTokens.d.ts +2 -0
- package/dist/utils/resolveLastTokens.js +12 -0
- package/dist/utils/resolveLastTokens.js.map +1 -0
- package/dist/utils/resolveModelDestination.d.ts +16 -0
- package/dist/utils/resolveModelDestination.js +54 -0
- package/dist/utils/resolveModelDestination.js.map +1 -0
- package/dist/utils/resolveModelFile.d.ts +142 -0
- package/dist/utils/resolveModelFile.js +201 -0
- package/dist/utils/resolveModelFile.js.map +1 -0
- package/dist/utils/runtime.d.ts +4 -0
- package/dist/utils/runtime.js +8 -0
- package/dist/utils/runtime.js.map +1 -0
- package/dist/utils/safeEventCallback.d.ts +6 -0
- package/dist/utils/safeEventCallback.js +29 -0
- package/dist/utils/safeEventCallback.js.map +1 -0
- package/dist/utils/signalSleep.d.ts +1 -0
- package/dist/utils/signalSleep.js +20 -0
- package/dist/utils/signalSleep.js.map +1 -0
- package/dist/utils/spawnCommand.d.ts +11 -0
- package/dist/utils/spawnCommand.js +89 -0
- package/dist/utils/spawnCommand.js.map +1 -0
- package/dist/utils/tokenizeInput.d.ts +3 -0
- package/dist/utils/tokenizeInput.js +14 -0
- package/dist/utils/tokenizeInput.js.map +1 -0
- package/dist/utils/tokenizerUtils.d.ts +12 -0
- package/dist/utils/tokenizerUtils.js +32 -0
- package/dist/utils/tokenizerUtils.js.map +1 -0
- package/dist/utils/transformPromisable.d.ts +54 -0
- package/dist/utils/transformPromisable.js +95 -0
- package/dist/utils/transformPromisable.js.map +1 -0
- package/dist/utils/truncateTextAndRoundToWords.d.ts +11 -0
- package/dist/utils/truncateTextAndRoundToWords.js +110 -0
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
- package/dist/utils/utilTypes.d.ts +18 -0
- package/dist/utils/utilTypes.js +2 -0
- package/dist/utils/utilTypes.js.map +1 -0
- package/dist/utils/waitForLockfileRelease.d.ts +5 -0
- package/dist/utils/waitForLockfileRelease.js +19 -0
- package/dist/utils/waitForLockfileRelease.js.map +1 -0
- package/dist/utils/withLockfile.d.ts +7 -0
- package/dist/utils/withLockfile.js +44 -0
- package/dist/utils/withLockfile.js.map +1 -0
- package/dist/utils/withOra.d.ts +7 -0
- package/dist/utils/withOra.js +37 -0
- package/dist/utils/withOra.js.map +1 -0
- package/dist/utils/withProgressLog.d.ts +22 -0
- package/dist/utils/withProgressLog.js +211 -0
- package/dist/utils/withProgressLog.js.map +1 -0
- package/dist/utils/withStatusLogs.d.ts +6 -0
- package/dist/utils/withStatusLogs.js +25 -0
- package/dist/utils/withStatusLogs.js.map +1 -0
- package/dist/utils/wrapAbortSignal.d.ts +1 -0
- package/dist/utils/wrapAbortSignal.js +14 -0
- package/dist/utils/wrapAbortSignal.js.map +1 -0
- package/llama/.clang-format +46 -0
- package/llama/CMakeLists.txt +141 -0
- package/llama/addon/AddonContext.cpp +1181 -0
- package/llama/addon/AddonContext.h +85 -0
- package/llama/addon/AddonGrammar.cpp +92 -0
- package/llama/addon/AddonGrammar.h +22 -0
- package/llama/addon/AddonGrammarEvaluationState.cpp +36 -0
- package/llama/addon/AddonGrammarEvaluationState.h +17 -0
- package/llama/addon/AddonModel.cpp +691 -0
- package/llama/addon/AddonModel.h +64 -0
- package/llama/addon/AddonModelData.cpp +25 -0
- package/llama/addon/AddonModelData.h +15 -0
- package/llama/addon/AddonModelLora.cpp +103 -0
- package/llama/addon/AddonModelLora.h +28 -0
- package/llama/addon/AddonSampler.cpp +669 -0
- package/llama/addon/AddonSampler.h +75 -0
- package/llama/addon/RingBuffer.h +109 -0
- package/llama/addon/addon.cpp +330 -0
- package/llama/addon/addonGlobals.cpp +22 -0
- package/llama/addon/addonGlobals.h +12 -0
- package/llama/addon/globals/addonLog.cpp +143 -0
- package/llama/addon/globals/addonLog.h +24 -0
- package/llama/addon/globals/addonProgress.cpp +15 -0
- package/llama/addon/globals/addonProgress.h +15 -0
- package/llama/addon/globals/getGpuInfo.cpp +146 -0
- package/llama/addon/globals/getGpuInfo.h +11 -0
- package/llama/addon/globals/getMemoryInfo.cpp +63 -0
- package/llama/addon/globals/getMemoryInfo.h +4 -0
- package/llama/addon/globals/getSwapInfo.cpp +69 -0
- package/llama/addon/globals/getSwapInfo.h +4 -0
- package/llama/binariesGithubRelease.json +3 -0
- package/llama/cmake/addVariantSuffix.cmake +21 -0
- package/llama/cmake/win32.ensureNinjaPath.cmake +68 -0
- package/llama/cmake/win32.ensureNodeLib.cmake +34 -0
- package/llama/cmake/win32.llvmApplyGnuModeAdaptations.cmake +12 -0
- package/llama/cmake/win32.llvmEnsureCmakeAr.cmake +37 -0
- package/llama/cmake/win32.llvmUseGnuModeCompilers.cmake +87 -0
- package/llama/cmake/win32.programFilesPaths.cmake +35 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +207 -0
- package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
- package/llama/package.json +5 -0
- package/llama/profiles/llvm.win32.host-arm64.target-arm64.cmake +14 -0
- package/llama/profiles/llvm.win32.host-x64.target-arm64.cmake +14 -0
- package/llama/profiles/llvm.win32.host-x64.target-x64.cmake +14 -0
- package/llama/toolchains/darwin.host-x64.target-arm64.cmake +8 -0
- package/llama/toolchains/linux.host-arm64.target-x64.cmake +5 -0
- package/llama/toolchains/linux.host-x64.target-arm64.cmake +5 -0
- package/llama/toolchains/linux.host-x64.target-arm71.cmake +5 -0
- package/llama/toolchains/llvm.win32.host-x64.target-x64.cmake +20 -0
- package/llama/toolchains/win32.host-arm64.target-arm64.cmake +21 -0
- package/llama/toolchains/win32.host-x64.target-arm64.cmake +21 -0
- package/llama/xpack/package.json +10 -0
- package/package.json +241 -0
- package/templates/README.md +6 -0
- package/templates/package.json +10 -0
- package/templates/packed/electron-typescript-react.json +1 -0
- package/templates/packed/node-typescript.json +1 -0
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
import type { PickOptions } from "../../utils/utilTypes.js";
|
|
2
|
+
import type { GgmlType } from "../../gguf/types/GgufTensorInfoTypes.js";
|
|
3
|
+
import type { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
|
|
4
|
+
import type { TokenBias } from "../TokenBias.js";
|
|
5
|
+
import type { Token } from "../../types.js";
|
|
6
|
+
import type { LlamaContextSequence } from "./LlamaContext.js";
|
|
7
|
+
export type LlamaContextOptions = {
|
|
8
|
+
/**
|
|
9
|
+
* number of sequences for the context.
|
|
10
|
+
* Each sequence is a different "text generation process" that can run in parallel to other sequences in the same context.
|
|
11
|
+
* Although a single context has multiple sequences, the sequences are separate from each other and do not share data with each other.
|
|
12
|
+
* This is beneficial for performance, as multiple sequences can be evaluated in parallel (on the same batch).
|
|
13
|
+
*
|
|
14
|
+
* Each sequence increases the memory usage of the context.
|
|
15
|
+
*
|
|
16
|
+
* Defaults to `1`.
|
|
17
|
+
*/
|
|
18
|
+
sequences?: number;
|
|
19
|
+
/**
|
|
20
|
+
* The number of tokens the model can see at once.
|
|
21
|
+
* - **`"auto"`** - adapt to the current VRAM state and attemp to set the context size as high as possible up to the size
|
|
22
|
+
* the model was trained on.
|
|
23
|
+
* - **`number`** - set the context size to a specific number of tokens.
|
|
24
|
+
* If there's not enough VRAM, an error will be thrown.
|
|
25
|
+
* Use with caution.
|
|
26
|
+
* - **`{min?: number, max?: number}`** - adapt to the current VRAM state and attemp to set the context size as high as possible
|
|
27
|
+
* up to the size the model was trained on, but at least `min` and at most `max`.
|
|
28
|
+
*
|
|
29
|
+
* The actual context size may be slightly larger than your request (by up to 256) due to the implementation in `llama.cpp` that
|
|
30
|
+
* aligns the context size to multiples of 256 for performance reasons.
|
|
31
|
+
* To check the actual context size that gets created, use the `.contextSize` property
|
|
32
|
+
* of the created context instance or any of its sequences.
|
|
33
|
+
*
|
|
34
|
+
* Defaults to `"auto"`.
|
|
35
|
+
*/
|
|
36
|
+
contextSize?: "auto" | number | {
|
|
37
|
+
min?: number;
|
|
38
|
+
max?: number;
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* The number of tokens that can be processed at once by the GPU.
|
|
42
|
+
*
|
|
43
|
+
* Defaults to `512` or `contextSize` if `contextSize` is less than `512`.
|
|
44
|
+
*/
|
|
45
|
+
batchSize?: number;
|
|
46
|
+
/**
|
|
47
|
+
* Flash attention is an optimization in the attention mechanism that makes inference faster, more efficient and uses less memory.
|
|
48
|
+
*
|
|
49
|
+
* The support for flash attention is currently experimental and may not always work as expected.
|
|
50
|
+
* Use with caution.
|
|
51
|
+
*
|
|
52
|
+
* This option will be ignored if flash attention is not supported by the model.
|
|
53
|
+
*
|
|
54
|
+
* Defaults to `false` (inherited from the model option `defaultContextFlashAttention`).
|
|
55
|
+
*
|
|
56
|
+
* Upon flash attention exiting the experimental status, the default value will become `true`
|
|
57
|
+
* (the inherited value from the model option `defaultContextFlashAttention` will become `true`).
|
|
58
|
+
*/
|
|
59
|
+
flashAttention?: boolean;
|
|
60
|
+
/**
|
|
61
|
+
* number of threads to use to evaluate tokens.
|
|
62
|
+
* set to 0 to use the maximum threads supported by the current machine hardware.
|
|
63
|
+
*
|
|
64
|
+
* This value is considered as a hint, and the actual number of threads used may be lower when other evaluations are running.
|
|
65
|
+
* To ensure the minimum number of threads you want to use are always used,
|
|
66
|
+
* set this to an object with a `min` property (see the `min` property description for more details).
|
|
67
|
+
*
|
|
68
|
+
* If `maxThreads` from the Llama instance is set to `0`, this value will always be the actual number of threads used.
|
|
69
|
+
*
|
|
70
|
+
* If `maxThreads` from the Llama instance is set to `0`, defaults to the `.cpuMathCores` value from the Llama instance,
|
|
71
|
+
* otherwise defaults to `maxThreads` from the Llama instance (see the `maxThreads` option of `getLlama` method for more details).
|
|
72
|
+
*/
|
|
73
|
+
threads?: number | {
|
|
74
|
+
/**
|
|
75
|
+
* The ideal number of threads to use for evaluations.
|
|
76
|
+
*
|
|
77
|
+
* If other evaluations are running, the actual number of threads may be lower than this value.
|
|
78
|
+
*
|
|
79
|
+
* If `maxThreads` from the Llama instance is set to `0`, this value will always be the actual number of threads used.
|
|
80
|
+
*
|
|
81
|
+
* If `maxThreads` from the Llama instance is set to `0`, defaults to the `.cpuMathCores` value from the Llama instance,
|
|
82
|
+
* otherwise defaults to `maxThreads` from the Llama instance (see the `maxThreads` option of `getLlama` method for more details).
|
|
83
|
+
*/
|
|
84
|
+
ideal?: number;
|
|
85
|
+
/**
|
|
86
|
+
* Ensure evaluations always use at least this number of threads.
|
|
87
|
+
*
|
|
88
|
+
* Use with caution, since setting this value too high can lead to the context waiting too much time
|
|
89
|
+
* to reserve this number of threads before the evaluation can start.
|
|
90
|
+
*/
|
|
91
|
+
min?: number;
|
|
92
|
+
};
|
|
93
|
+
/**
|
|
94
|
+
* Control the parallel sequences processing behavior.
|
|
95
|
+
*
|
|
96
|
+
* See {@link BatchingOptions} for more information.
|
|
97
|
+
*/
|
|
98
|
+
batching?: BatchingOptions;
|
|
99
|
+
/**
|
|
100
|
+
* The type of the key for the KV cache tensors used in this context.
|
|
101
|
+
*
|
|
102
|
+
* Set to `"currentQuant"` to use the same type as the current quantization of the model weights tensors.
|
|
103
|
+
*
|
|
104
|
+
* Defaults to `F16` (inherited from the model option `defaultContextKvCacheKeyType`).
|
|
105
|
+
* @deprecated - this option is experimental and highly unstable.
|
|
106
|
+
* Only use with a hard-coded model and on specific hardware that you verify where the type passed to this option works correctly.
|
|
107
|
+
* Avoid allowing end users to configure this option, as it's highly unstable.
|
|
108
|
+
* @experimental - this option is experimental and highly unstable.
|
|
109
|
+
* It may not work as intended or even crash the process.
|
|
110
|
+
* Use with caution.
|
|
111
|
+
* This option may change or get removed in the future without a breaking change version.
|
|
112
|
+
*/
|
|
113
|
+
experimentalKvCacheKeyType?: "currentQuant" | keyof typeof GgmlType | GgmlType;
|
|
114
|
+
/**
|
|
115
|
+
* The type of the value for the KV cache tensors used in this context.
|
|
116
|
+
*
|
|
117
|
+
* Set to `"currentQuant"` to use the same type as the current quantization of the model weights tensors.
|
|
118
|
+
*
|
|
119
|
+
* Defaults to `F16` (inherited from the model option `defaultContextKvCacheValueType`).
|
|
120
|
+
* @deprecated - this option is experimental and highly unstable.
|
|
121
|
+
* Only use with a hard-coded model and on specific hardware that you verify where the type passed to this option works correctly.
|
|
122
|
+
* Avoid allowing end users to configure this option, as it's highly unstable.
|
|
123
|
+
* @experimental - this option is experimental and highly unstable.
|
|
124
|
+
* It may not work as intended or even crash the process.
|
|
125
|
+
* Use with caution.
|
|
126
|
+
* This option may change or get removed in the future without a breaking change version.
|
|
127
|
+
*/
|
|
128
|
+
experimentalKvCacheValueType?: "currentQuant" | keyof typeof GgmlType | GgmlType;
|
|
129
|
+
/**
|
|
130
|
+
* When using SWA (Sliding Window Attention) on a supported model,
|
|
131
|
+
* extend the sliding window size to the current context size (meaning practically disabling SWA).
|
|
132
|
+
*
|
|
133
|
+
* Enabling this option will consume more memory on models that support SWA (Sliding Window Attention),
|
|
134
|
+
* but will allow reusing the evaluation cache of any prefix length of the context sequence state
|
|
135
|
+
* (instead of just the size of the sliding window when SWA is used).
|
|
136
|
+
*
|
|
137
|
+
* This option has no effect on models that do not support SWA (Sliding Window Attention).
|
|
138
|
+
*
|
|
139
|
+
* > **Note:** you can check the SWA size using `model.fileInsights.swaSize`.
|
|
140
|
+
*
|
|
141
|
+
* Defaults to `false` (inherited from the model option `defaultContextSwaFullCache`);
|
|
142
|
+
*/
|
|
143
|
+
swaFullCache?: boolean;
|
|
144
|
+
/**
|
|
145
|
+
* Load the provided LoRA adapters onto the context.
|
|
146
|
+
* LoRA adapters are used to modify the weights of a pretrained model to adapt to new tasks or domains
|
|
147
|
+
* without the need for extensive retraining from scratch.
|
|
148
|
+
*
|
|
149
|
+
* If a string is provided, it will be treated as a path to a single LoRA adapter file.
|
|
150
|
+
*
|
|
151
|
+
* The adapters will be released from memory once the model (not just the context) is disposed.
|
|
152
|
+
*/
|
|
153
|
+
lora?: string | {
|
|
154
|
+
adapters: Array<{
|
|
155
|
+
filePath: string;
|
|
156
|
+
/**
|
|
157
|
+
* Defaults to `1`
|
|
158
|
+
*/
|
|
159
|
+
scale?: number;
|
|
160
|
+
}>;
|
|
161
|
+
/**
|
|
162
|
+
* Called with the LoRA adapters load percentage when the LoRA adapters are being loaded.
|
|
163
|
+
* @param loadProgress - a number between 0 (exclusive) and 1 (inclusive).
|
|
164
|
+
*/
|
|
165
|
+
onLoadProgress?(loadProgress: number): void;
|
|
166
|
+
};
|
|
167
|
+
/** An abort signal to abort the context creation */
|
|
168
|
+
createSignal?: AbortSignal;
|
|
169
|
+
/**
|
|
170
|
+
* Ignore insufficient memory errors and continue with the context creation.
|
|
171
|
+
* Can cause the process to crash if there's not enough VRAM for the new context.
|
|
172
|
+
*
|
|
173
|
+
* Defaults to `false`.
|
|
174
|
+
*/
|
|
175
|
+
ignoreMemorySafetyChecks?: boolean;
|
|
176
|
+
/**
|
|
177
|
+
* On failed context creation, retry the creation with a smaller context size.
|
|
178
|
+
*
|
|
179
|
+
* Only works if `contextSize` is set to `"auto"`, left as default or set to an object with `min` and/or `max` properties.
|
|
180
|
+
*
|
|
181
|
+
* Set `retries` to `false` to disable.
|
|
182
|
+
*/
|
|
183
|
+
failedCreationRemedy?: false | {
|
|
184
|
+
/**
|
|
185
|
+
* Retries to attempt to create the context.
|
|
186
|
+
*
|
|
187
|
+
* Defaults to `6`.
|
|
188
|
+
*/
|
|
189
|
+
retries?: number;
|
|
190
|
+
/**
|
|
191
|
+
* The percentage to decrease the context size by on each retry.
|
|
192
|
+
* Should be a number between `0` and `1`.
|
|
193
|
+
*
|
|
194
|
+
* If a function is provided, it will be called with the current context size and should return the new context size.
|
|
195
|
+
*
|
|
196
|
+
* Defaults to `0.16`.
|
|
197
|
+
*/
|
|
198
|
+
autoContextSizeShrink?: number | ((contextSize: number) => number);
|
|
199
|
+
};
|
|
200
|
+
/**
|
|
201
|
+
* Track the inference performance of the context, so using `.printTimings()` will work.
|
|
202
|
+
*
|
|
203
|
+
* Defaults to `false`.
|
|
204
|
+
*/
|
|
205
|
+
performanceTracking?: boolean;
|
|
206
|
+
};
|
|
207
|
+
export type LlamaContextSequenceRepeatPenalty = {
|
|
208
|
+
/** Tokens to lower the predication probability of to be the next predicted token */
|
|
209
|
+
punishTokens: Token[] | (() => Token[]);
|
|
210
|
+
/**
|
|
211
|
+
* The maximum number of tokens that will be provided in the `punishTokens` array.
|
|
212
|
+
*
|
|
213
|
+
* This is used as a hint for a performance optimization for avoiding frequent memory deallocation and reallocation.
|
|
214
|
+
*
|
|
215
|
+
* Don't set this value too high, as it can allocate too much memory.
|
|
216
|
+
*
|
|
217
|
+
* Defaults to `64`.
|
|
218
|
+
*/
|
|
219
|
+
maxPunishTokens?: number;
|
|
220
|
+
/**
|
|
221
|
+
* The relative amount to lower the probability of the tokens in `punishTokens` by.
|
|
222
|
+
*
|
|
223
|
+
* Defaults to `1.1`.
|
|
224
|
+
* Set to `1` to disable.
|
|
225
|
+
*/
|
|
226
|
+
penalty?: number;
|
|
227
|
+
/**
|
|
228
|
+
* For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`.
|
|
229
|
+
*
|
|
230
|
+
* Disabled by default (`0`).
|
|
231
|
+
* Set to a value between `0` and `1` to enable.
|
|
232
|
+
*/
|
|
233
|
+
frequencyPenalty?: number;
|
|
234
|
+
/**
|
|
235
|
+
* Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`.
|
|
236
|
+
*
|
|
237
|
+
* Disabled by default (`0`).
|
|
238
|
+
* Set to a value between `0` and `1` to enable.
|
|
239
|
+
*/
|
|
240
|
+
presencePenalty?: number;
|
|
241
|
+
};
|
|
242
|
+
export type LlamaContextSequenceDryRepeatPenalty = {
|
|
243
|
+
/**
|
|
244
|
+
* A number between `0` and `1` representing the strength of the DRY (Don't Repeat Yourself) effect.
|
|
245
|
+
*
|
|
246
|
+
* Setting this to `0` will disable the DRY penalty completely.
|
|
247
|
+
*
|
|
248
|
+
* The recommended value is `0.8`.
|
|
249
|
+
*/
|
|
250
|
+
strength: number;
|
|
251
|
+
/**
|
|
252
|
+
* The base value for the exponential penality calculation.
|
|
253
|
+
*
|
|
254
|
+
* A higher value will lead to more aggressive penalization of repetitions.
|
|
255
|
+
*
|
|
256
|
+
* Defaults to `1.75`.
|
|
257
|
+
*/
|
|
258
|
+
base?: number;
|
|
259
|
+
/**
|
|
260
|
+
* The maximum sequence length (in tokens) that will be allowed to be repeated without being penalized.
|
|
261
|
+
*
|
|
262
|
+
* Repetitions shorter than or equal to this length will not be penalized,
|
|
263
|
+
* allowing for natural repetitions of short phrases and common words.
|
|
264
|
+
*
|
|
265
|
+
* Defaults to `2`.
|
|
266
|
+
*/
|
|
267
|
+
allowedLength?: number;
|
|
268
|
+
/**
|
|
269
|
+
* Number of recent tokens generated by the model to consider for sequence repetition matching.
|
|
270
|
+
*
|
|
271
|
+
* When set to `null`, the entire context sequence history will be considered for repetition matching.
|
|
272
|
+
* Setting to `0` will disable DRY (Don't Repeat Yourself) penalty.
|
|
273
|
+
*
|
|
274
|
+
* Defaults to `null`.
|
|
275
|
+
*/
|
|
276
|
+
lastTokens?: number | null;
|
|
277
|
+
/**
|
|
278
|
+
* Text sequences that will be considered as breakers for the repeated sequences.
|
|
279
|
+
* These will never be penalized for being repeated, and are used to mark the boundaries of the repeated sequences.
|
|
280
|
+
*
|
|
281
|
+
* For example, setting this to `["\n", "*"]` will allow the model to make as many lists as it wants,
|
|
282
|
+
* without being penalized for repeating the list item marker (like `*`).
|
|
283
|
+
*
|
|
284
|
+
* Defaults to `["\n", ":", '"', "*"]`.
|
|
285
|
+
*/
|
|
286
|
+
sequenceBreakers?: string[];
|
|
287
|
+
};
|
|
288
|
+
export type BatchingOptions = {
|
|
289
|
+
/**
|
|
290
|
+
* The strategy used to dispatch items to be processed when there are items pending to be processed.
|
|
291
|
+
* - **`"nextCycle"`** - dispatch the items on the next event loop cycle.
|
|
292
|
+
* You can provide a custom function to define a custom dispatch schedule.
|
|
293
|
+
*
|
|
294
|
+
* Defaults to `"nextCycle"`.
|
|
295
|
+
*/
|
|
296
|
+
dispatchSchedule?: "nextCycle" | CustomBatchingDispatchSchedule;
|
|
297
|
+
/**
|
|
298
|
+
* The strategy used to prioritize pending items to be processed.
|
|
299
|
+
* - **`"maximumParallelism"`** - process as many different sequences in parallel as possible.
|
|
300
|
+
* - **`"firstInFirstOut"`** - process items in the order they were added.
|
|
301
|
+
* - **Custom prioritization function** - a custom function that prioritizes the items to be processed.
|
|
302
|
+
* See the {@link CustomBatchingPrioritizationStrategy} type for more information.
|
|
303
|
+
*
|
|
304
|
+
* Defaults to `"maximumParallelism"`.
|
|
305
|
+
*/
|
|
306
|
+
itemPrioritizationStrategy?: "maximumParallelism" | "firstInFirstOut" | CustomBatchingPrioritizationStrategy;
|
|
307
|
+
};
|
|
308
|
+
/**
|
|
309
|
+
* A function that schedules the dispatch of the batch items.
|
|
310
|
+
* Call the `dispatch` function to dispatch the items.
|
|
311
|
+
*/
|
|
312
|
+
export type CustomBatchingDispatchSchedule = (dispatch: () => void) => void;
|
|
313
|
+
/**
|
|
314
|
+
* A function that prioritizes the batch items to be processed.
|
|
315
|
+
* The function receives an array of `items` and the `size` of how many tokens can be processed in this batch.
|
|
316
|
+
*
|
|
317
|
+
* The function should return an array of prioritized items,
|
|
318
|
+
* where the sum of `processAmount` of all the items is less or equal to the given `size` that the function received,
|
|
319
|
+
* and where the `item` of each prioritized item is the same reference to an original item in the `items` array.
|
|
320
|
+
*/
|
|
321
|
+
export type CustomBatchingPrioritizationStrategy = (options: {
|
|
322
|
+
items: readonly BatchItem[];
|
|
323
|
+
size: number;
|
|
324
|
+
}) => PrioritizedBatchItem[];
|
|
325
|
+
export type ContextShiftOptions = {
|
|
326
|
+
size?: number | ((sequence: LlamaContextSequence) => number | Promise<number>);
|
|
327
|
+
strategy?: "eraseBeginning" | ((options: {
|
|
328
|
+
sequence: LlamaContextSequence;
|
|
329
|
+
size: number;
|
|
330
|
+
}) => ContextTokensDeleteRange[] | Promise<ContextTokensDeleteRange[]>);
|
|
331
|
+
};
|
|
332
|
+
export type ContextTokensDeleteRange = {
|
|
333
|
+
start: number;
|
|
334
|
+
end: number;
|
|
335
|
+
};
|
|
336
|
+
export type SequenceEvaluateOptions = {
|
|
337
|
+
temperature?: number;
|
|
338
|
+
minP?: number;
|
|
339
|
+
topK?: number;
|
|
340
|
+
topP?: number;
|
|
341
|
+
/**
|
|
342
|
+
* Used to control the randomness of the generated text.
|
|
343
|
+
*
|
|
344
|
+
* Change the seed to get different results.
|
|
345
|
+
*
|
|
346
|
+
* Defaults to the current epoch time.
|
|
347
|
+
*
|
|
348
|
+
* Only relevant when using `temperature`.
|
|
349
|
+
*/
|
|
350
|
+
seed?: number;
|
|
351
|
+
/**
|
|
352
|
+
* Exclude Top Choices (XTC) removes the top tokens from consideration and avoids more obvious and repetitive generations.
|
|
353
|
+
* Using it leads to more creative responses, but also to increased hallucinations.
|
|
354
|
+
*
|
|
355
|
+
* The `probability` value controls the chance that the top tokens will be removed in the next token generation step.
|
|
356
|
+
* The `threshold` value control the minimum probability of a token for it to be removed.
|
|
357
|
+
*
|
|
358
|
+
* Start with `{probability: 0.5, threshold: 0.1}` and adjust from there.
|
|
359
|
+
*
|
|
360
|
+
* Disabled by default.
|
|
361
|
+
*/
|
|
362
|
+
xtc?: {
|
|
363
|
+
/**
|
|
364
|
+
* A number between `0` and `1` representing the probability of applying Exclude Top Choices (XTC) at each token generation step.
|
|
365
|
+
*/
|
|
366
|
+
probability: number;
|
|
367
|
+
/**
|
|
368
|
+
* A number between `0` and `1` representing the minimum probability
|
|
369
|
+
* of a token for it to be removed when applying Exclude Top Choices (XTC).
|
|
370
|
+
*/
|
|
371
|
+
threshold: number;
|
|
372
|
+
};
|
|
373
|
+
grammarEvaluationState?: LlamaGrammarEvaluationState | (() => LlamaGrammarEvaluationState | undefined);
|
|
374
|
+
repeatPenalty?: LlamaContextSequenceRepeatPenalty;
|
|
375
|
+
/**
|
|
376
|
+
* DRY (Don't Repeat Yourself) penalty is a technique to reduce repetitions in the generated text
|
|
377
|
+
* by penalizing tokens based on recent token usage patterns.
|
|
378
|
+
*
|
|
379
|
+
* With the right parameters choice, it makes it impossible for the model to
|
|
380
|
+
* repeat itself verbatim with the same tokens in the same order (the model can still repeat itself by
|
|
381
|
+
* using different tokens or by paraphrasing, but that is far less of an issue than a broken-record looping).
|
|
382
|
+
*
|
|
383
|
+
* Disabled by default.
|
|
384
|
+
*/
|
|
385
|
+
dryRepeatPenalty?: LlamaContextSequenceDryRepeatPenalty;
|
|
386
|
+
/**
|
|
387
|
+
* Adjust the probability of tokens being generated.
|
|
388
|
+
* Can be used to bias the model to generate tokens that you want it to lean towards,
|
|
389
|
+
* or to avoid generating tokens that you want it to avoid.
|
|
390
|
+
*/
|
|
391
|
+
tokenBias?: TokenBias | (() => TokenBias);
|
|
392
|
+
/**
|
|
393
|
+
* When a lot of tokens are queued for the next batch, more than the configured `batchSize`, the tokens for each sequence will be
|
|
394
|
+
* evaluated based on the strategy chosen for the context.
|
|
395
|
+
* By default, the `"maximumParallelism"` strategy is used, which will try to evaluate as many sequences in parallel as possible,
|
|
396
|
+
* but at some point, it'll have to choose which sequences to evaluate more tokens of, so it'll prioritize the sequences with the
|
|
397
|
+
* highest evaluation priority.
|
|
398
|
+
* Also, a custom strategy can be used to prioritize the sequences differently, but generally, the higher the evaluation priority
|
|
399
|
+
* is, the more likely and more tokens will be evaluated for that sequence in the next queued batch.
|
|
400
|
+
*/
|
|
401
|
+
evaluationPriority?: EvaluationPriority;
|
|
402
|
+
/**
|
|
403
|
+
* Override the sequence context shift options for this evaluation
|
|
404
|
+
*
|
|
405
|
+
* See {@link ContextShiftOptions} for more information.
|
|
406
|
+
*/
|
|
407
|
+
contextShift?: ContextShiftOptions;
|
|
408
|
+
/**
|
|
409
|
+
* Yield an EOG (End Of Generation) token (like EOS and EOT) when it's generated.
|
|
410
|
+
* When `false` the generation will stop when an EOG token is generated and the token won't be yielded.
|
|
411
|
+
* Defaults to `false`.
|
|
412
|
+
*/
|
|
413
|
+
yieldEogToken?: boolean;
|
|
414
|
+
};
|
|
415
|
+
export type SequenceEvaluateMetadataOptions = {
|
|
416
|
+
/**
|
|
417
|
+
* Get the confidence (probability) of the selected token.
|
|
418
|
+
*
|
|
419
|
+
* Same as `probabilities.get(token)` from the output.
|
|
420
|
+
*
|
|
421
|
+
* If you need only this value, you can skip getting the full probabilities list to improve performance.
|
|
422
|
+
*
|
|
423
|
+
* This value might be slightly different when evaluated on different GPUs and configurations.
|
|
424
|
+
*/
|
|
425
|
+
readonly confidence?: boolean;
|
|
426
|
+
/**
|
|
427
|
+
* Get the full probabilities list of tokens from the vocabulary to be the next token, after applying the given options.
|
|
428
|
+
*
|
|
429
|
+
* Only enable when needed, as it impacts the performance.
|
|
430
|
+
*
|
|
431
|
+
* Defaults to `false`.
|
|
432
|
+
*/
|
|
433
|
+
readonly probabilities?: boolean;
|
|
434
|
+
};
|
|
435
|
+
export type SequenceEvaluateOutput<Options extends {
|
|
436
|
+
readonly confidence?: boolean;
|
|
437
|
+
readonly probabilities?: boolean;
|
|
438
|
+
} = {
|
|
439
|
+
readonly confidence: true;
|
|
440
|
+
readonly probabilities: true;
|
|
441
|
+
}> = PickOptions<{
|
|
442
|
+
/**
|
|
443
|
+
* The next token generated by the model and selected using the given options (such a temperature).
|
|
444
|
+
*/
|
|
445
|
+
token: Token;
|
|
446
|
+
/**
|
|
447
|
+
* The confidence (probability) of the selected token.
|
|
448
|
+
*
|
|
449
|
+
* Same as `probabilities.get(token)`.
|
|
450
|
+
*
|
|
451
|
+
* If you need only this value, you can skip getting the full probabilities list to improve performance.
|
|
452
|
+
*
|
|
453
|
+
* This value might be slightly different when evaluated on different GPUs and configurations.
|
|
454
|
+
*/
|
|
455
|
+
confidence: number;
|
|
456
|
+
/**
|
|
457
|
+
* The probabilities of the tokens from the vocabulary to be the next token.
|
|
458
|
+
*
|
|
459
|
+
* A probability is a number from `0` to `1`.
|
|
460
|
+
*
|
|
461
|
+
* The probabilities might be slightly different when evaluated on different GPUs and configurations.
|
|
462
|
+
*
|
|
463
|
+
* The map is sorted by the probability of the tokens from the highest to the lowest,
|
|
464
|
+
* and is reflected in the order of the entries when iterating over the map.
|
|
465
|
+
* Use `.entries().next().value` to get the top probability pair
|
|
466
|
+
* ([learn more](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries)).
|
|
467
|
+
*/
|
|
468
|
+
probabilities: Map<Token, number>;
|
|
469
|
+
}, Options & {
|
|
470
|
+
token: true;
|
|
471
|
+
}>;
|
|
472
|
+
export type ControlledEvaluateInputItem = Token | [
|
|
473
|
+
token: Token,
|
|
474
|
+
options: {
|
|
475
|
+
generateNext?: {
|
|
476
|
+
/**
|
|
477
|
+
* Get the full probabilities list of tokens from the vocabulary to be the next token, after applying the given options.
|
|
478
|
+
*
|
|
479
|
+
* Only enable when needed, as it impacts the performance.
|
|
480
|
+
*
|
|
481
|
+
* Defaults to `false`.
|
|
482
|
+
*/
|
|
483
|
+
probabilities?: boolean;
|
|
484
|
+
/**
|
|
485
|
+
* Get the confidence (probability) of the selected token.
|
|
486
|
+
*
|
|
487
|
+
* Same as `next.probabilities.get(next.token)` from the output.
|
|
488
|
+
*
|
|
489
|
+
* If you need only this value, you can skip getting the full probabilities list to improve performance.
|
|
490
|
+
*
|
|
491
|
+
* This value might be slightly different when evaluated on different GPUs and configurations.
|
|
492
|
+
*/
|
|
493
|
+
confidence?: boolean;
|
|
494
|
+
/**
|
|
495
|
+
* Generate the next token with the provided options using sampling.
|
|
496
|
+
*
|
|
497
|
+
* Setting this to `true` will generate probabilities for the next token and sample it.
|
|
498
|
+
*/
|
|
499
|
+
token?: boolean;
|
|
500
|
+
options?: {
|
|
501
|
+
temperature?: number;
|
|
502
|
+
minP?: number;
|
|
503
|
+
topK?: number;
|
|
504
|
+
topP?: number;
|
|
505
|
+
/**
|
|
506
|
+
* Used to control the randomness of the generated text.
|
|
507
|
+
*
|
|
508
|
+
* Change the seed to get different results.
|
|
509
|
+
*
|
|
510
|
+
* Defaults to the current epoch time.
|
|
511
|
+
*
|
|
512
|
+
* Only relevant when using `temperature`.
|
|
513
|
+
*/
|
|
514
|
+
seed?: number;
|
|
515
|
+
/**
|
|
516
|
+
* Exclude Top Choices (XTC) removes the top tokens from consideration and avoids more obvious and repetitive generations.
|
|
517
|
+
* Using it leads to more creative responses, but also to increased hallucinations.
|
|
518
|
+
*
|
|
519
|
+
* The `probability` value controls the chance that the top tokens will be removed in the next token generation step.
|
|
520
|
+
* The `threshold` value control the minimum probability of a token for it to be removed.
|
|
521
|
+
*
|
|
522
|
+
* Start with `{probability: 0.5, threshold: 0.1}` and adjust from there.
|
|
523
|
+
*
|
|
524
|
+
* Disabled by default.
|
|
525
|
+
*/
|
|
526
|
+
xtc?: {
|
|
527
|
+
/**
|
|
528
|
+
* A number between `0` and `1` representing the probability
|
|
529
|
+
* of applying Exclude Top Choices (XTC) at each token generation step.
|
|
530
|
+
*/
|
|
531
|
+
probability: number;
|
|
532
|
+
/**
|
|
533
|
+
* A number between `0` and `1` representing the minimum probability
|
|
534
|
+
* of a token for it to be removed when applying Exclude Top Choices (XTC).
|
|
535
|
+
*/
|
|
536
|
+
threshold: number;
|
|
537
|
+
};
|
|
538
|
+
repeatPenalty?: LlamaContextSequenceRepeatPenalty;
|
|
539
|
+
/**
|
|
540
|
+
* DRY (Don't Repeat Yourself) penalty is a technique to reduce repetitions in the generated text
|
|
541
|
+
* by penalizing tokens based on recent token usage patterns.
|
|
542
|
+
*
|
|
543
|
+
* With the right parameters choice, it makes it impossible for the model to
|
|
544
|
+
* repeat itself verbatim with the same tokens in the same order (the model can still repeat itself by
|
|
545
|
+
* using different tokens or by paraphrasing, but that is far less of an issue than a broken-record looping).
|
|
546
|
+
*
|
|
547
|
+
* Disabled by default.
|
|
548
|
+
*/
|
|
549
|
+
dryRepeatPenalty?: LlamaContextSequenceDryRepeatPenalty;
|
|
550
|
+
/**
|
|
551
|
+
* Adjust the probability of tokens being generated.
|
|
552
|
+
* Can be used to bias the model to generate tokens that you want it to lean towards,
|
|
553
|
+
* or to avoid generating tokens that you want it to avoid.
|
|
554
|
+
*/
|
|
555
|
+
tokenBias?: TokenBias | (() => TokenBias);
|
|
556
|
+
};
|
|
557
|
+
};
|
|
558
|
+
}
|
|
559
|
+
];
|
|
560
|
+
export type ControlledEvaluateIndexOutput = {
|
|
561
|
+
next: {
|
|
562
|
+
token?: Token | null;
|
|
563
|
+
/**
|
|
564
|
+
* The confidence (probability) of the selected token (the `token` field in this object).
|
|
565
|
+
*
|
|
566
|
+
* Same as `next.probabilities.get(next.token)`.
|
|
567
|
+
*
|
|
568
|
+
* If you need only this value, you can skip getting the full probabilities list to improve performance.
|
|
569
|
+
*
|
|
570
|
+
* This value might be slightly different when evaluated on different GPUs and configurations.
|
|
571
|
+
*/
|
|
572
|
+
confidence?: number;
|
|
573
|
+
/**
|
|
574
|
+
* The probabilities of the tokens from the vocabulary to be the next token.
|
|
575
|
+
*
|
|
576
|
+
* A probability is a number from `0` to `1`.
|
|
577
|
+
*
|
|
578
|
+
* The probabilities might be slightly different when evaluated on different GPUs and configurations.
|
|
579
|
+
*
|
|
580
|
+
* The map is sorted by the probability of the tokens from the highest to the lowest,
|
|
581
|
+
* and is reflected in the order of the entries when iterating over the map.
|
|
582
|
+
* Use `.entries().next().value` to get the top probability pair
|
|
583
|
+
* ([learn more](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries)).
|
|
584
|
+
*/
|
|
585
|
+
probabilities?: Map<Token, number>;
|
|
586
|
+
};
|
|
587
|
+
};
|
|
588
|
+
/**
|
|
589
|
+
* 1 - low
|
|
590
|
+
*
|
|
591
|
+
* 5 - high
|
|
592
|
+
*/
|
|
593
|
+
export type EvaluationPriority = 1 | 2 | 3 | 4 | 5;
|
|
594
|
+
export type BatchItem = {
|
|
595
|
+
readonly tokens: readonly Token[];
|
|
596
|
+
readonly logits: readonly (true | undefined)[];
|
|
597
|
+
readonly evaluationPriority: EvaluationPriority;
|
|
598
|
+
};
|
|
599
|
+
export type PrioritizedBatchItem = {
|
|
600
|
+
item: BatchItem;
|
|
601
|
+
processAmount: number;
|
|
602
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/evaluator/LlamaContext/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export function firstInFirstOutStrategy({ items, size }) {
|
|
2
|
+
const res = [];
|
|
3
|
+
const sortedItems = items
|
|
4
|
+
.slice()
|
|
5
|
+
.sort((a, b) => b.evaluationPriority - a.evaluationPriority);
|
|
6
|
+
let leftFreeTokens = size;
|
|
7
|
+
for (const item of sortedItems) {
|
|
8
|
+
const processAmount = Math.min(item.tokens.length, leftFreeTokens);
|
|
9
|
+
res.push({ item, processAmount });
|
|
10
|
+
leftFreeTokens -= processAmount;
|
|
11
|
+
if (leftFreeTokens === 0)
|
|
12
|
+
break;
|
|
13
|
+
}
|
|
14
|
+
return res;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=firstInFirstOutStrategy.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"firstInFirstOutStrategy.js","sourceRoot":"","sources":["../../../../../src/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,uBAAuB,CAAC,EAAC,KAAK,EAAE,IAAI,EAA8C;IAC9F,MAAM,GAAG,GAA2B,EAAE,CAAC;IAEvC,MAAM,WAAW,GAAG,KAAK;SACpB,KAAK,EAAE;SACP,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,kBAAkB,GAAG,CAAC,CAAC,kBAAkB,CAAC,CAAC;IAEjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QACnE,GAAG,CAAC,IAAI,CAAC,EAAC,IAAI,EAAE,aAAa,EAAC,CAAC,CAAC;QAChC,cAAc,IAAI,aAAa,CAAC;QAEhC,IAAI,cAAc,KAAK,CAAC;YACpB,MAAM;IACd,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
|