node-llama-cpp 3.0.0-beta.43 → 3.0.0-beta.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -21
- package/bins/_linux-arm64.moved.txt +1 -0
- package/bins/_linux-armv7l.moved.txt +1 -0
- package/bins/_linux-x64-vulkan.moved.txt +1 -0
- package/bins/_linux-x64.moved.txt +1 -0
- package/bins/_mac-arm64-metal.moved.txt +1 -0
- package/bins/_mac-x64.moved.txt +1 -0
- package/bins/_win-arm64.moved.txt +1 -0
- package/bins/_win-x64-vulkan.moved.txt +1 -0
- package/bins/_win-x64.moved.txt +1 -0
- package/dist/ChatWrapper.d.ts +11 -1
- package/dist/ChatWrapper.js +1 -1
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +30 -19
- package/dist/bindings/Llama.d.ts +9 -0
- package/dist/bindings/Llama.js +33 -6
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/consts.d.ts +1 -1
- package/dist/bindings/consts.js +2 -0
- package/dist/bindings/consts.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +33 -5
- package/dist/bindings/getLlama.js +14 -3
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +2 -2
- package/dist/bindings/types.js +2 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +0 -1
- package/dist/bindings/utils/compileLLamaCpp.js +45 -7
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +0 -1
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +2 -2
- package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +0 -1
- package/dist/bindings/utils/testCmakeBinary.d.ts +0 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js +4 -3
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js +1 -1
- package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.js +5 -4
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +2 -2
- package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -12
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js +1 -1
- package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.js +5 -4
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.js +5 -6
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3ChatWrapper.js +1 -1
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +13 -9
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +92 -38
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/MistralChatWrapper.d.ts +15 -0
- package/dist/chatWrappers/MistralChatWrapper.js +169 -0
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +25 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +50 -12
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +22 -16
- package/dist/chatWrappers/generic/TemplateChatWrapper.js +28 -24
- package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +1 -1
- package/dist/chatWrappers/utils/chunkChatItems.d.ts +10 -0
- package/dist/chatWrappers/utils/chunkChatItems.js +44 -0
- package/dist/chatWrappers/utils/chunkChatItems.js.map +1 -0
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +37 -26
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/jsonDumps.d.ts +1 -1
- package/dist/chatWrappers/utils/jsonDumps.js +2 -2
- package/dist/chatWrappers/utils/jsonDumps.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +30 -6
- package/dist/chatWrappers/utils/resolveChatWrapper.js +71 -25
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/cli.js +2 -6
- package/dist/cli/cli.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -1
- package/dist/cli/commands/ChatCommand.js +83 -53
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +2 -1
- package/dist/cli/commands/CompleteCommand.js +58 -30
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +1 -1
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +2 -1
- package/dist/cli/commands/InfillCommand.js +58 -30
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/InitCommand.js +1 -1
- package/dist/cli/commands/PullCommand.d.ts +2 -1
- package/dist/cli/commands/PullCommand.js +85 -44
- package/dist/cli/commands/PullCommand.js.map +1 -1
- package/dist/cli/commands/inspect/InspectCommand.js +5 -3
- package/dist/cli/commands/inspect/InspectCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +12 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +225 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -0
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +17 -4
- package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +31 -9
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +7 -4
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/commands/source/SourceCommand.d.ts +4 -0
- package/dist/cli/commands/source/SourceCommand.js +19 -0
- package/dist/cli/commands/source/SourceCommand.js.map +1 -0
- package/dist/cli/commands/{BuildCommand.d.ts → source/commands/BuildCommand.d.ts} +1 -2
- package/dist/cli/commands/{BuildCommand.js → source/commands/BuildCommand.js} +21 -19
- package/dist/cli/commands/source/commands/BuildCommand.js.map +1 -0
- package/dist/cli/commands/{ClearCommand.js → source/commands/ClearCommand.js} +6 -6
- package/dist/cli/commands/source/commands/ClearCommand.js.map +1 -0
- package/dist/cli/commands/{DownloadCommand.d.ts → source/commands/DownloadCommand.d.ts} +1 -2
- package/dist/cli/commands/{DownloadCommand.js → source/commands/DownloadCommand.js} +26 -22
- package/dist/cli/commands/source/commands/DownloadCommand.js.map +1 -0
- package/dist/cli/recommendedModels.js +192 -23
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/ConsoleInteraction.d.ts +0 -1
- package/dist/cli/utils/ConsoleTable.js.map +1 -1
- package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.js +6 -17
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/printCommonInfoLines.js +3 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.d.ts +6 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js +14 -0
- package/dist/cli/utils/renderModelCompatibilityPercentageWithColors.js.map +1 -0
- package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +1 -1
- package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -1
- package/dist/commands.d.ts +3 -3
- package/dist/commands.js +3 -3
- package/dist/commands.js.map +1 -1
- package/dist/config.d.ts +7 -3
- package/dist/config.js +10 -6
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +17 -2
- package/dist/evaluator/LlamaChat/LlamaChat.js +24 -12
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +3 -1
- package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +21 -13
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +15 -14
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +1 -0
- package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -1
- package/dist/evaluator/LlamaCompletion.d.ts +18 -4
- package/dist/evaluator/LlamaCompletion.js +51 -22
- package/dist/evaluator/LlamaCompletion.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +21 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +256 -133
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaSampler.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js +31 -0
- package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -0
- package/dist/evaluator/LlamaContext/types.d.ts +71 -9
- package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js +1 -1
- package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -1
- package/dist/evaluator/LlamaEmbedding.d.ts +21 -0
- package/dist/evaluator/LlamaEmbedding.js +53 -0
- package/dist/evaluator/LlamaEmbedding.js.map +1 -0
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +1 -5
- package/dist/evaluator/LlamaEmbeddingContext.js +6 -8
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.d.ts +9 -10
- package/dist/evaluator/LlamaGrammar.js +10 -5
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaGrammarEvaluationState.d.ts +7 -3
- package/dist/evaluator/LlamaGrammarEvaluationState.js +8 -4
- package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -1
- package/dist/evaluator/LlamaJsonSchemaGrammar.d.ts +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js +3 -0
- package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +28 -15
- package/dist/evaluator/LlamaModel/LlamaModel.js +66 -51
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.d.ts +10 -10
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js +10 -10
- package/dist/evaluator/LlamaModel/utils/TokenAttributes.js.map +1 -1
- package/dist/evaluator/TokenBias.d.ts +20 -8
- package/dist/evaluator/TokenBias.js +44 -12
- package/dist/evaluator/TokenBias.js.map +1 -1
- package/dist/evaluator/TokenMeter.d.ts +3 -12
- package/dist/evaluator/TokenMeter.js +4 -16
- package/dist/evaluator/TokenMeter.js.map +1 -1
- package/dist/gguf/fileReaders/GgufFileReader.d.ts +0 -1
- package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -1
- package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +0 -2
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +5 -3
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +26 -13
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +57 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +86 -4
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/scoreLevels.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.d.ts +18 -6
- package/dist/gguf/readGgufFileInfo.js +8 -3
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +18 -2
- package/dist/gguf/types/GgufMetadataTypes.js +16 -1
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +2 -0
- package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -1
- package/dist/gguf/utils/getGgufFileTypeName.d.ts +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +1 -1
- package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -1
- package/dist/index.d.ts +8 -4
- package/dist/index.js +5 -3
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -0
- package/dist/types.d.ts +1 -0
- package/dist/types.js.map +1 -1
- package/dist/utils/LlamaText.d.ts +3 -0
- package/dist/utils/LlamaText.js +7 -4
- package/dist/utils/LlamaText.js.map +1 -1
- package/dist/utils/LruCache.d.ts +2 -2
- package/dist/utils/LruCache.js.map +1 -1
- package/dist/utils/OverridesObject.d.ts +7 -0
- package/dist/utils/OverridesObject.js +2 -0
- package/dist/utils/OverridesObject.js.map +1 -0
- package/dist/utils/StopGenerationDetector.js.map +1 -1
- package/dist/utils/ThreadsSplitter.d.ts +26 -0
- package/dist/utils/ThreadsSplitter.js +164 -0
- package/dist/utils/ThreadsSplitter.js.map +1 -0
- package/dist/utils/TokenStreamRegulator.js.map +1 -1
- package/dist/utils/appendUserMessageToChatHistory.d.ts +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js +4 -0
- package/dist/utils/appendUserMessageToChatHistory.js.map +1 -1
- package/dist/utils/compareTokens.d.ts +1 -1
- package/dist/utils/compareTokens.js.map +1 -1
- package/dist/utils/createModelDownloader.d.ts +94 -6
- package/dist/utils/createModelDownloader.js +174 -46
- package/dist/utils/createModelDownloader.js.map +1 -1
- package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js +1 -1
- package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
- package/dist/utils/getGrammarsFolder.js +1 -1
- package/dist/utils/getGrammarsFolder.js.map +1 -1
- package/dist/utils/gitReleaseBundles.js.map +1 -1
- package/dist/utils/modelFileAccesTokens.d.ts +4 -0
- package/dist/utils/modelFileAccesTokens.js +40 -0
- package/dist/utils/modelFileAccesTokens.js.map +1 -0
- package/dist/utils/parseModelFileName.js.map +1 -1
- package/dist/utils/parseTextTemplate.js.map +1 -1
- package/dist/utils/resolveGithubRelease.d.ts +1 -1
- package/dist/utils/resolveLastTokens.js.map +1 -1
- package/dist/utils/spawnCommand.d.ts +0 -1
- package/dist/utils/truncateTextAndRoundToWords.js +3 -1
- package/dist/utils/truncateTextAndRoundToWords.js.map +1 -1
- package/dist/utils/withOra.js +1 -1
- package/dist/utils/withOra.js.map +1 -1
- package/dist/utils/withProgressLog.d.ts +0 -1
- package/dist/utils/wrapAbortSignal.d.ts +0 -1
- package/llama/CMakeLists.txt +20 -12
- package/llama/addon/AddonContext.cpp +69 -202
- package/llama/addon/AddonContext.h +4 -5
- package/llama/addon/AddonGrammar.cpp +8 -11
- package/llama/addon/AddonGrammar.h +4 -3
- package/llama/addon/AddonGrammarEvaluationState.cpp +9 -10
- package/llama/addon/AddonGrammarEvaluationState.h +3 -1
- package/llama/addon/AddonModel.cpp +6 -5
- package/llama/addon/AddonSampler.cpp +513 -0
- package/llama/addon/AddonSampler.h +65 -0
- package/llama/addon/RingBuffer.h +109 -0
- package/llama/addon/addon.cpp +7 -0
- package/llama/addon/globals/addonLog.cpp +2 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +71 -46
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
- package/bins/linux-arm64/_nlcBuildMetadata.json +0 -1
- package/bins/linux-arm64/libggml.so +0 -0
- package/bins/linux-arm64/libllama.so +0 -0
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +0 -1
- package/bins/linux-armv7l/libggml.so +0 -0
- package/bins/linux-armv7l/libllama.so +0 -0
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +0 -1
- package/bins/linux-x64/libggml.so +0 -0
- package/bins/linux-x64/libllama.so +0 -0
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +0 -1
- package/bins/linux-x64-vulkan/libggml.so +0 -0
- package/bins/linux-x64-vulkan/libllama.so +0 -0
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +0 -1
- package/bins/mac-arm64-metal/ggml-common.h +0 -1833
- package/bins/mac-arm64-metal/ggml-metal.metal +0 -6168
- package/bins/mac-arm64-metal/libggml.dylib +0 -0
- package/bins/mac-arm64-metal/libllama.dylib +0 -0
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +0 -1
- package/bins/mac-x64/libggml.dylib +0 -0
- package/bins/mac-x64/libllama.dylib +0 -0
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +0 -1
- package/bins/win-arm64/ggml.dll +0 -0
- package/bins/win-arm64/llama-addon.exp +0 -0
- package/bins/win-arm64/llama-addon.lib +0 -0
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-arm64/llama.dll +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +0 -1
- package/bins/win-x64/ggml.dll +0 -0
- package/bins/win-x64/llama-addon.exp +0 -0
- package/bins/win-x64/llama-addon.lib +0 -0
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64/llama.dll +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +0 -1
- package/bins/win-x64-vulkan/ggml.dll +0 -0
- package/bins/win-x64-vulkan/llama-addon.exp +0 -0
- package/bins/win-x64-vulkan/llama-addon.lib +0 -0
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/llama.dll +0 -0
- package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
- package/dist/cli/commands/BuildCommand.js.map +0 -1
- package/dist/cli/commands/ClearCommand.js.map +0 -1
- package/dist/cli/commands/DownloadCommand.js.map +0 -1
- package/dist/utils/DeepPartialObject.d.ts +0 -3
- package/dist/utils/DeepPartialObject.js +0 -2
- package/dist/utils/DeepPartialObject.js.map +0 -1
- /package/dist/cli/commands/{ClearCommand.d.ts → source/commands/ClearCommand.d.ts} +0 -0
package/llama/CMakeLists.txt
CHANGED
|
@@ -29,6 +29,10 @@ include_directories("gpuInfo")
|
|
|
29
29
|
include_directories("llama.cpp")
|
|
30
30
|
include_directories("./llama.cpp/common")
|
|
31
31
|
|
|
32
|
+
unset(GPU_INFO_HEADERS)
|
|
33
|
+
unset(GPU_INFO_SOURCES)
|
|
34
|
+
unset(GPU_INFO_EXTRA_LIBS)
|
|
35
|
+
|
|
32
36
|
if (GGML_CUDA)
|
|
33
37
|
cmake_minimum_required(VERSION 3.17)
|
|
34
38
|
|
|
@@ -38,18 +42,18 @@ if (GGML_CUDA)
|
|
|
38
42
|
|
|
39
43
|
enable_language(CUDA)
|
|
40
44
|
|
|
41
|
-
|
|
42
|
-
|
|
45
|
+
list(APPEND GPU_INFO_HEADERS gpuInfo/cuda-gpu-info.h)
|
|
46
|
+
list(APPEND GPU_INFO_SOURCES gpuInfo/cuda-gpu-info.cu)
|
|
43
47
|
|
|
44
48
|
add_compile_definitions(GPU_INFO_USE_CUDA)
|
|
45
49
|
|
|
46
50
|
if (GGML_STATIC)
|
|
47
|
-
|
|
51
|
+
list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart_static)
|
|
48
52
|
else()
|
|
49
|
-
|
|
53
|
+
list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cudart)
|
|
50
54
|
endif()
|
|
51
55
|
|
|
52
|
-
|
|
56
|
+
list(APPEND GPU_INFO_EXTRA_LIBS CUDA::cuda_driver)
|
|
53
57
|
|
|
54
58
|
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
55
59
|
# copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
|
|
@@ -73,12 +77,12 @@ if (GGML_VULKAN OR GGML_KOMPUTE)
|
|
|
73
77
|
message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
|
|
74
78
|
endif()
|
|
75
79
|
|
|
76
|
-
|
|
77
|
-
|
|
80
|
+
list(APPEND GPU_INFO_HEADERS gpuInfo/vulkan-gpu-info.h)
|
|
81
|
+
list(APPEND GPU_INFO_SOURCES gpuInfo/vulkan-gpu-info.cpp)
|
|
78
82
|
|
|
79
83
|
add_compile_definitions(GPU_INFO_USE_VULKAN)
|
|
80
84
|
|
|
81
|
-
|
|
85
|
+
list(APPEND GPU_INFO_EXTRA_LIBS Vulkan::Vulkan)
|
|
82
86
|
else()
|
|
83
87
|
message(FATAL_ERROR "Vulkan was not found")
|
|
84
88
|
endif()
|
|
@@ -105,7 +109,7 @@ if (GGML_HIPBLAS)
|
|
|
105
109
|
set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
|
|
106
110
|
target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
|
|
107
111
|
|
|
108
|
-
|
|
112
|
+
list(APPEND GPU_INFO_EXTRA_LIBS gpu-info-rocm)
|
|
109
113
|
else()
|
|
110
114
|
message(FATAL_ERROR "hipBLAS or HIP was not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
|
|
111
115
|
endif()
|
|
@@ -117,18 +121,22 @@ if (GGML_METAL)
|
|
|
117
121
|
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
|
118
122
|
|
|
119
123
|
message(STATUS "Using Metal for GPU info")
|
|
120
|
-
|
|
121
|
-
|
|
124
|
+
list(APPEND GPU_INFO_HEADERS gpuInfo/metal-gpu-info.h)
|
|
125
|
+
list(APPEND GPU_INFO_SOURCES gpuInfo/metal-gpu-info.mm)
|
|
122
126
|
|
|
123
127
|
add_compile_definitions(GPU_INFO_USE_METAL)
|
|
124
128
|
|
|
125
|
-
|
|
129
|
+
list(APPEND GPU_INFO_EXTRA_LIBS
|
|
126
130
|
${FOUNDATION_LIBRARY}
|
|
127
131
|
${METAL_FRAMEWORK}
|
|
128
132
|
${METALKIT_FRAMEWORK}
|
|
129
133
|
)
|
|
130
134
|
endif()
|
|
131
135
|
|
|
136
|
+
list(REMOVE_DUPLICATES GPU_INFO_HEADERS)
|
|
137
|
+
list(REMOVE_DUPLICATES GPU_INFO_SOURCES)
|
|
138
|
+
list(REMOVE_DUPLICATES GPU_INFO_EXTRA_LIBS)
|
|
139
|
+
|
|
132
140
|
file(GLOB SOURCE_FILES "addon/*.cpp" "addon/**/*.cpp" ${GPU_INFO_SOURCES})
|
|
133
141
|
|
|
134
142
|
if(APPLE)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include <thread>
|
|
2
2
|
#include <algorithm>
|
|
3
|
-
#include "common.h"
|
|
3
|
+
#include "common/common.h"
|
|
4
|
+
#include "llama-grammar.h"
|
|
4
5
|
#include "llama.h"
|
|
5
6
|
|
|
6
7
|
#include "addonGlobals.h"
|
|
@@ -188,21 +189,10 @@ class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
|
|
|
188
189
|
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
189
190
|
public:
|
|
190
191
|
AddonContext* ctx;
|
|
191
|
-
|
|
192
|
+
AddonSampler* sampler;
|
|
192
193
|
int32_t batchLogitIndex;
|
|
193
|
-
bool use_grammar = false;
|
|
194
194
|
llama_token result;
|
|
195
|
-
|
|
196
|
-
float min_p = 0;
|
|
197
|
-
int32_t top_k = 40;
|
|
198
|
-
float top_p = 0.95f;
|
|
199
|
-
float repeat_penalty = 1.10f; // 1.0 = disabled
|
|
200
|
-
float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
|
|
201
|
-
float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
|
|
202
|
-
std::vector<llama_token> repeat_penalty_tokens;
|
|
203
|
-
std::unordered_map<llama_token, float> tokenBiases;
|
|
204
|
-
bool useTokenBiases = false;
|
|
205
|
-
bool use_repeat_penalty = false;
|
|
195
|
+
bool no_output = false;
|
|
206
196
|
|
|
207
197
|
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
|
208
198
|
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
|
|
@@ -211,77 +201,12 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
211
201
|
ctx->Ref();
|
|
212
202
|
|
|
213
203
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
Napi::Object options = info[1].As<Napi::Object>();
|
|
217
|
-
|
|
218
|
-
if (options.Has("temperature")) {
|
|
219
|
-
temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
if (options.Has("minP")) {
|
|
223
|
-
min_p = options.Get("minP").As<Napi::Number>().FloatValue();
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
if (options.Has("topK")) {
|
|
227
|
-
top_k = options.Get("topK").As<Napi::Number>().Int32Value();
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
if (options.Has("topP")) {
|
|
231
|
-
top_p = options.Get("topP").As<Napi::Number>().FloatValue();
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
if (options.Has("repeatPenalty")) {
|
|
235
|
-
repeat_penalty = options.Get("repeatPenalty").As<Napi::Number>().FloatValue();
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
if (options.Has("repeatPenaltyTokens")) {
|
|
239
|
-
Napi::Uint32Array repeat_penalty_tokens_uint32_array = options.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
|
|
240
|
-
|
|
241
|
-
repeat_penalty_tokens.reserve(repeat_penalty_tokens_uint32_array.ElementLength());
|
|
242
|
-
for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
|
|
243
|
-
repeat_penalty_tokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
use_repeat_penalty = true;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
|
|
250
|
-
Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
|
|
251
|
-
Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
|
|
252
|
-
|
|
253
|
-
if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
|
|
254
|
-
for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
|
|
255
|
-
tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
useTokenBiases = true;
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
if (options.Has("repeatPenaltyPresencePenalty")) {
|
|
263
|
-
repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
if (options.Has("repeatPenaltyFrequencyPenalty")) {
|
|
267
|
-
repeat_penalty_frequency_penalty = options.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue();
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
if (options.Has("grammarEvaluationState")) {
|
|
271
|
-
grammar_evaluation_state =
|
|
272
|
-
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(options.Get("grammarEvaluationState").As<Napi::Object>());
|
|
273
|
-
grammar_evaluation_state->Ref();
|
|
274
|
-
use_grammar = true;
|
|
275
|
-
}
|
|
276
|
-
}
|
|
204
|
+
sampler = Napi::ObjectWrap<AddonSampler>::Unwrap(info[1].As<Napi::Object>());
|
|
205
|
+
sampler->Ref();
|
|
277
206
|
}
|
|
278
207
|
~AddonContextSampleTokenWorker() {
|
|
279
208
|
ctx->Unref();
|
|
280
|
-
|
|
281
|
-
if (use_grammar) {
|
|
282
|
-
grammar_evaluation_state->Unref();
|
|
283
|
-
use_grammar = false;
|
|
284
|
-
}
|
|
209
|
+
sampler->Unref();
|
|
285
210
|
}
|
|
286
211
|
|
|
287
212
|
Napi::Promise GetPromise() {
|
|
@@ -302,93 +227,46 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
|
302
227
|
}
|
|
303
228
|
|
|
304
229
|
void SampleToken() {
|
|
305
|
-
llama_token new_token_id = 0;
|
|
306
|
-
|
|
307
|
-
// Select the best prediction.
|
|
308
230
|
if (llama_get_logits(ctx->ctx) == nullptr) {
|
|
309
231
|
SetError("This model does not support token generation");
|
|
310
232
|
return;
|
|
311
233
|
}
|
|
312
234
|
|
|
313
|
-
|
|
314
|
-
auto n_vocab = llama_n_vocab(ctx->model->model);
|
|
235
|
+
sampler->rebuildChainIfNeeded();
|
|
315
236
|
|
|
316
|
-
|
|
317
|
-
|
|
237
|
+
const auto * logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
|
|
238
|
+
const int n_vocab = llama_n_vocab(ctx->model->model);
|
|
318
239
|
|
|
240
|
+
auto & candidates = sampler->tokenCandidates;
|
|
319
241
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
if (useTokenBiases) {
|
|
323
|
-
bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
|
|
324
|
-
if (hasTokenBias) {
|
|
325
|
-
auto logitBias = tokenBiases.at(token_id);
|
|
326
|
-
if (logitBias == -INFINITY || logitBias < -INFINITY) {
|
|
327
|
-
if (!llama_token_is_eog(ctx->model->model, token_id)) {
|
|
328
|
-
logit = -INFINITY;
|
|
329
|
-
}
|
|
330
|
-
} else {
|
|
331
|
-
logit += logitBias;
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
|
|
242
|
+
candidates[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};;
|
|
337
243
|
}
|
|
338
244
|
|
|
339
|
-
llama_token_data_array
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
repeat_penalty_tokens.data(),
|
|
346
|
-
repeat_penalty_tokens.size(),
|
|
347
|
-
repeat_penalty,
|
|
348
|
-
repeat_penalty_frequency_penalty,
|
|
349
|
-
repeat_penalty_presence_penalty
|
|
350
|
-
);
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
|
|
354
|
-
llama_grammar_sample((grammar_evaluation_state)->grammar, ctx->ctx, &candidates_p);
|
|
355
|
-
|
|
356
|
-
if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
|
|
357
|
-
// logit biases caused grammar sampling to fail, so sampling again without logit biases
|
|
358
|
-
useTokenBiases = false;
|
|
359
|
-
SampleToken();
|
|
360
|
-
return;
|
|
361
|
-
}
|
|
362
|
-
}
|
|
245
|
+
llama_token_data_array cur_p = {
|
|
246
|
+
/* .data = */ candidates.data(),
|
|
247
|
+
/* .size = */ candidates.size(),
|
|
248
|
+
/* .selected = */ -1,
|
|
249
|
+
/* .sorted = */ false,
|
|
250
|
+
};
|
|
363
251
|
|
|
364
|
-
|
|
365
|
-
new_token_id = llama_sample_token_greedy(ctx->ctx, &candidates_p);
|
|
366
|
-
} else {
|
|
367
|
-
const int32_t resolved_top_k =
|
|
368
|
-
top_k <= 0 ? llama_n_vocab(ctx->model->model) : std::min(top_k, llama_n_vocab(ctx->model->model));
|
|
369
|
-
const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
|
|
370
|
-
const float tfs_z = 1.00f; // Tail free sampling - 1.0 = disabled
|
|
371
|
-
const float typical_p = 1.00f; // Typical probability - 1.0 = disabled
|
|
372
|
-
const float resolved_top_p = top_p; // Top p sampling - 1.0 = disabled
|
|
373
|
-
|
|
374
|
-
// Temperature sampling
|
|
375
|
-
size_t min_keep = std::max(1, n_probs);
|
|
376
|
-
llama_sample_top_k(ctx->ctx, &candidates_p, resolved_top_k, min_keep);
|
|
377
|
-
llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
|
|
378
|
-
llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
|
|
379
|
-
llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
|
|
380
|
-
llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
|
|
381
|
-
llama_sample_temp(ctx->ctx, &candidates_p, temperature);
|
|
382
|
-
new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
|
|
383
|
-
}
|
|
252
|
+
llama_sampler_apply(sampler->chain, &cur_p);
|
|
384
253
|
|
|
385
|
-
if (!
|
|
386
|
-
|
|
254
|
+
if (!(cur_p.selected >= 0 && cur_p.selected < (int32_t)cur_p.size)) {
|
|
255
|
+
no_output = true;
|
|
256
|
+
return;
|
|
387
257
|
}
|
|
388
258
|
|
|
259
|
+
auto new_token_id = cur_p.data[cur_p.selected].id;
|
|
260
|
+
sampler->acceptToken(new_token_id);
|
|
389
261
|
result = new_token_id;
|
|
390
262
|
}
|
|
391
263
|
void OnOK() {
|
|
264
|
+
if (no_output) {
|
|
265
|
+
Napi::Number resultValue = Napi::Number::New(Env(), -1);
|
|
266
|
+
deferred.Resolve(resultValue);
|
|
267
|
+
return;
|
|
268
|
+
}
|
|
269
|
+
|
|
392
270
|
Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
393
271
|
deferred.Resolve(resultValue);
|
|
394
272
|
}
|
|
@@ -402,20 +280,14 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
|
|
|
402
280
|
model->Ref();
|
|
403
281
|
|
|
404
282
|
context_params = llama_context_default_params();
|
|
405
|
-
context_params.seed = -1;
|
|
406
283
|
context_params.n_ctx = 4096;
|
|
407
|
-
context_params.n_threads =
|
|
284
|
+
context_params.n_threads = std::max(cpu_get_num_math(), 1);
|
|
408
285
|
context_params.n_threads_batch = context_params.n_threads;
|
|
286
|
+
context_params.no_perf = true;
|
|
409
287
|
|
|
410
288
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
411
289
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
412
290
|
|
|
413
|
-
if (options.Has("noSeed")) {
|
|
414
|
-
context_params.seed = time(NULL);
|
|
415
|
-
} else if (options.Has("seed")) {
|
|
416
|
-
context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
|
|
417
|
-
}
|
|
418
|
-
|
|
419
291
|
if (options.Has("contextSize")) {
|
|
420
292
|
context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
|
|
421
293
|
}
|
|
@@ -438,12 +310,16 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
|
|
|
438
310
|
}
|
|
439
311
|
|
|
440
312
|
if (options.Has("threads")) {
|
|
441
|
-
const auto n_threads = options.Get("threads").As<Napi::Number>().
|
|
442
|
-
const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
|
|
313
|
+
const auto n_threads = options.Get("threads").As<Napi::Number>().Int32Value();
|
|
314
|
+
const auto resolved_n_threads = n_threads == 0 ? std::max((int32_t)std::thread::hardware_concurrency(), context_params.n_threads) : n_threads;
|
|
443
315
|
|
|
444
316
|
context_params.n_threads = resolved_n_threads;
|
|
445
317
|
context_params.n_threads_batch = resolved_n_threads;
|
|
446
318
|
}
|
|
319
|
+
|
|
320
|
+
if (options.Has("performanceTracking")) {
|
|
321
|
+
context_params.no_perf = !(options.Get("performanceTracking").As<Napi::Boolean>().Value());
|
|
322
|
+
}
|
|
447
323
|
}
|
|
448
324
|
}
|
|
449
325
|
AddonContext::~AddonContext() {
|
|
@@ -641,42 +517,6 @@ Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
|
|
|
641
517
|
return worker->GetPromise();
|
|
642
518
|
}
|
|
643
519
|
|
|
644
|
-
Napi::Value AddonContext::AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
|
|
645
|
-
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
646
|
-
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
647
|
-
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
648
|
-
|
|
649
|
-
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
650
|
-
llama_grammar_accept_token((grammar_evaluation_state)->grammar, ctx, tokenId);
|
|
651
|
-
}
|
|
652
|
-
|
|
653
|
-
return info.Env().Undefined();
|
|
654
|
-
}
|
|
655
|
-
|
|
656
|
-
Napi::Value AddonContext::CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
|
|
657
|
-
AddonGrammarEvaluationState* grammar_evaluation_state =
|
|
658
|
-
Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
|
|
659
|
-
llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
|
|
660
|
-
|
|
661
|
-
if ((grammar_evaluation_state)->grammar != nullptr) {
|
|
662
|
-
std::vector<llama_token_data> candidates;
|
|
663
|
-
candidates.reserve(1);
|
|
664
|
-
candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
|
|
665
|
-
|
|
666
|
-
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
|
667
|
-
|
|
668
|
-
llama_grammar_sample((grammar_evaluation_state)->grammar, ctx, &candidates_p);
|
|
669
|
-
|
|
670
|
-
if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
|
|
671
|
-
return Napi::Boolean::New(info.Env(), false);
|
|
672
|
-
}
|
|
673
|
-
|
|
674
|
-
return Napi::Boolean::New(info.Env(), true);
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
return Napi::Boolean::New(info.Env(), false);
|
|
678
|
-
}
|
|
679
|
-
|
|
680
520
|
Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
|
|
681
521
|
if (disposed) {
|
|
682
522
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -718,9 +558,36 @@ Napi::Value AddonContext::GetStateSize(const Napi::CallbackInfo& info) {
|
|
|
718
558
|
return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
|
|
719
559
|
}
|
|
720
560
|
|
|
561
|
+
Napi::Value AddonContext::GetThreads(const Napi::CallbackInfo& info) {
|
|
562
|
+
if (disposed) {
|
|
563
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
564
|
+
return info.Env().Undefined();
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
return Napi::Number::From(info.Env(), llama_n_threads(ctx));
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
|
|
571
|
+
if (disposed) {
|
|
572
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
573
|
+
return info.Env().Undefined();
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
const auto threads = info[0].As<Napi::Number>().Int32Value();
|
|
577
|
+
const auto resolvedThreads = threads == 0
|
|
578
|
+
? std::max((int32_t)std::thread::hardware_concurrency(), std::max(cpu_get_num_math(), 1))
|
|
579
|
+
: threads;
|
|
580
|
+
|
|
581
|
+
if (llama_n_threads(ctx) != resolvedThreads) {
|
|
582
|
+
llama_set_n_threads(ctx, resolvedThreads, resolvedThreads);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
return info.Env().Undefined();
|
|
586
|
+
}
|
|
587
|
+
|
|
721
588
|
Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
|
|
722
|
-
|
|
723
|
-
|
|
589
|
+
llama_perf_context_print(ctx);
|
|
590
|
+
llama_perf_context_reset(ctx);
|
|
724
591
|
return info.Env().Undefined();
|
|
725
592
|
}
|
|
726
593
|
|
|
@@ -749,10 +616,10 @@ void AddonContext::init(Napi::Object exports) {
|
|
|
749
616
|
InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
|
|
750
617
|
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
|
|
751
618
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
752
|
-
InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
|
|
753
|
-
InstanceMethod("canBeNextTokenForGrammarEvaluationState", &AddonContext::CanBeNextTokenForGrammarEvaluationState),
|
|
754
619
|
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
755
620
|
InstanceMethod("getStateSize", &AddonContext::GetStateSize),
|
|
621
|
+
InstanceMethod("getThreads", &AddonContext::GetThreads),
|
|
622
|
+
InstanceMethod("setThreads", &AddonContext::SetThreads),
|
|
756
623
|
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
757
624
|
InstanceMethod("setLora", &AddonContext::SetLora),
|
|
758
625
|
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
#include "llama.h"
|
|
3
3
|
#include "napi.h"
|
|
4
4
|
#include "addonGlobals.h"
|
|
5
|
+
#include "AddonSampler.h"
|
|
5
6
|
|
|
6
7
|
class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
7
8
|
public:
|
|
@@ -38,16 +39,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
38
39
|
Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
|
|
39
40
|
Napi::Value SampleToken(const Napi::CallbackInfo& info);
|
|
40
41
|
|
|
41
|
-
Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info);
|
|
42
|
-
|
|
43
|
-
Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info);
|
|
44
|
-
|
|
45
42
|
Napi::Value GetEmbedding(const Napi::CallbackInfo& info);
|
|
46
43
|
Napi::Value GetStateSize(const Napi::CallbackInfo& info);
|
|
44
|
+
Napi::Value GetThreads(const Napi::CallbackInfo& info);
|
|
45
|
+
Napi::Value SetThreads(const Napi::CallbackInfo& info);
|
|
47
46
|
|
|
48
47
|
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
|
|
49
48
|
|
|
50
49
|
Napi::Value SetLora(const Napi::CallbackInfo& info);
|
|
51
50
|
|
|
52
51
|
static void init(Napi::Object exports);
|
|
53
|
-
};
|
|
52
|
+
};
|
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
#include "AddonGrammar.h"
|
|
3
3
|
|
|
4
4
|
AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
|
|
5
|
-
|
|
6
|
-
std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
|
|
7
|
-
bool should_print_grammar = false;
|
|
5
|
+
grammarCode = info[0].As<Napi::String>().Utf8Value();
|
|
8
6
|
|
|
9
7
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
10
8
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
@@ -14,21 +12,20 @@ AddonGrammar::AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
|
|
|
14
12
|
hasAddonExportsRef = true;
|
|
15
13
|
}
|
|
16
14
|
|
|
17
|
-
if (options.Has("
|
|
18
|
-
|
|
15
|
+
if (options.Has("rootRuleName")) {
|
|
16
|
+
rootRuleName = options.Get("rootRuleName").As<Napi::String>().Utf8Value();
|
|
19
17
|
}
|
|
20
18
|
}
|
|
21
19
|
|
|
22
|
-
parsed_grammar =
|
|
23
|
-
|
|
24
|
-
|
|
20
|
+
auto parsed_grammar = llama_grammar_init_impl(nullptr, grammarCode.c_str(), rootRuleName.c_str());
|
|
21
|
+
|
|
22
|
+
// will be empty if there are parse errors
|
|
23
|
+
if (parsed_grammar == nullptr) {
|
|
25
24
|
Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
|
|
26
25
|
return;
|
|
27
26
|
}
|
|
28
27
|
|
|
29
|
-
|
|
30
|
-
grammar_parser::print_grammar(stderr, parsed_grammar);
|
|
31
|
-
}
|
|
28
|
+
llama_grammar_free_impl(parsed_grammar);
|
|
32
29
|
}
|
|
33
30
|
AddonGrammar::~AddonGrammar() {
|
|
34
31
|
if (hasAddonExportsRef) {
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
#include "llama.h"
|
|
3
|
-
#include "common.h"
|
|
4
|
-
#include "
|
|
3
|
+
#include "common/common.h"
|
|
4
|
+
#include "llama-grammar.h"
|
|
5
5
|
#include "napi.h"
|
|
6
6
|
#include "addonGlobals.h"
|
|
7
7
|
|
|
8
8
|
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
9
9
|
public:
|
|
10
|
-
|
|
10
|
+
std::string grammarCode = "";
|
|
11
|
+
std::string rootRuleName = "root";
|
|
11
12
|
Napi::Reference<Napi::Object> addonExportsRef;
|
|
12
13
|
bool hasAddonExportsRef = false;
|
|
13
14
|
|
|
@@ -1,26 +1,25 @@
|
|
|
1
1
|
#include <sstream>
|
|
2
2
|
#include "addonGlobals.h"
|
|
3
|
-
#include "common.h"
|
|
3
|
+
#include "common/common.h"
|
|
4
4
|
#include "llama.h"
|
|
5
5
|
#include "AddonGrammarEvaluationState.h"
|
|
6
6
|
#include "AddonGrammar.h"
|
|
7
7
|
|
|
8
8
|
AddonGrammarEvaluationState::AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
|
|
9
|
-
|
|
9
|
+
model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
|
|
10
|
+
model->Ref();
|
|
11
|
+
|
|
12
|
+
grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[1].As<Napi::Object>());
|
|
10
13
|
grammarDef->Ref();
|
|
11
14
|
|
|
12
|
-
|
|
13
|
-
grammar = llama_grammar_init(grammar_rules.data(), grammar_rules.size(), grammarDef->parsed_grammar.symbol_ids.at("root"));
|
|
15
|
+
sampler = llama_sampler_init_grammar(model->model, grammarDef->grammarCode.c_str(), grammarDef->rootRuleName.c_str());
|
|
14
16
|
}
|
|
15
17
|
AddonGrammarEvaluationState::~AddonGrammarEvaluationState() {
|
|
18
|
+
llama_sampler_free(sampler);
|
|
16
19
|
grammarDef->Unref();
|
|
17
|
-
|
|
18
|
-
if (grammar != nullptr) {
|
|
19
|
-
llama_grammar_free(grammar);
|
|
20
|
-
grammar = nullptr;
|
|
21
|
-
}
|
|
20
|
+
model->Unref();
|
|
22
21
|
}
|
|
23
22
|
|
|
24
23
|
void AddonGrammarEvaluationState::init(Napi::Object exports) {
|
|
25
24
|
exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
|
|
26
|
-
}
|
|
25
|
+
}
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
#include "llama.h"
|
|
3
3
|
#include "napi.h"
|
|
4
4
|
#include "addonGlobals.h"
|
|
5
|
+
#include "AddonModel.h"
|
|
5
6
|
|
|
6
7
|
class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
|
|
7
8
|
public:
|
|
9
|
+
AddonModel* model;
|
|
8
10
|
AddonGrammar* grammarDef;
|
|
9
|
-
|
|
11
|
+
llama_sampler * sampler = nullptr;
|
|
10
12
|
|
|
11
13
|
AddonGrammarEvaluationState(const Napi::CallbackInfo& info);
|
|
12
14
|
~AddonGrammarEvaluationState();
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
#include <sstream>
|
|
2
2
|
#include "addonGlobals.h"
|
|
3
3
|
#include "globals/addonLog.h"
|
|
4
|
-
#include "
|
|
4
|
+
#include "globals/addonProgress.h"
|
|
5
|
+
#include "common/common.h"
|
|
5
6
|
#include "llama.h"
|
|
6
7
|
#include "AddonModel.h"
|
|
7
8
|
#include "AddonModelData.h"
|
|
@@ -538,7 +539,7 @@ Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
|
|
|
538
539
|
return info.Env().Undefined();
|
|
539
540
|
}
|
|
540
541
|
|
|
541
|
-
return
|
|
542
|
+
return getNapiToken(info, model, llama_token_prefix(model));
|
|
542
543
|
}
|
|
543
544
|
Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
|
|
544
545
|
if (disposed) {
|
|
@@ -546,7 +547,7 @@ Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
|
|
|
546
547
|
return info.Env().Undefined();
|
|
547
548
|
}
|
|
548
549
|
|
|
549
|
-
return
|
|
550
|
+
return getNapiToken(info, model, llama_token_middle(model));
|
|
550
551
|
}
|
|
551
552
|
Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
|
|
552
553
|
if (disposed) {
|
|
@@ -554,7 +555,7 @@ Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
|
|
|
554
555
|
return info.Env().Undefined();
|
|
555
556
|
}
|
|
556
557
|
|
|
557
|
-
return
|
|
558
|
+
return getNapiToken(info, model, llama_token_suffix(model));
|
|
558
559
|
}
|
|
559
560
|
Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
|
|
560
561
|
if (disposed) {
|
|
@@ -562,7 +563,7 @@ Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
|
|
|
562
563
|
return info.Env().Undefined();
|
|
563
564
|
}
|
|
564
565
|
|
|
565
|
-
return
|
|
566
|
+
return getNapiToken(info, model, llama_token_eot(model));
|
|
566
567
|
}
|
|
567
568
|
Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
|
|
568
569
|
if (disposed) {
|