node-llama-cpp 3.6.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ChatWrapper.d.ts +3 -5
- package/dist/ChatWrapper.js +20 -13
- package/dist/ChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +3 -1
- package/dist/bindings/Llama.js +2 -0
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +30 -1
- package/dist/bindings/getLlama.js +32 -8
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.js +7 -0
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/getLlamaGpuTypes.d.ts +13 -0
- package/dist/bindings/utils/getLlamaGpuTypes.js +30 -0
- package/dist/bindings/utils/getLlamaGpuTypes.js.map +1 -0
- package/dist/bindings/utils/testBindingBinary.js +26 -2
- package/dist/bindings/utils/testBindingBinary.js.map +1 -1
- package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
- package/dist/chatWrappers/DeepSeekChatWrapper.js +6 -6
- package/dist/chatWrappers/DeepSeekChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js +1 -1
- package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
- package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_1ChatWrapper.js +19 -9
- package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js +21 -10
- package/dist/chatWrappers/Llama3_2LightweightChatWrapper.js.map +1 -1
- package/dist/chatWrappers/MistralChatWrapper.d.ts +2 -1
- package/dist/chatWrappers/MistralChatWrapper.js +39 -28
- package/dist/chatWrappers/MistralChatWrapper.js.map +1 -1
- package/dist/chatWrappers/QwenChatWrapper.d.ts +28 -0
- package/dist/chatWrappers/QwenChatWrapper.js +201 -0
- package/dist/chatWrappers/QwenChatWrapper.js.map +1 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +32 -3
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +337 -126
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.d.ts +7 -0
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js +30 -0
- package/dist/chatWrappers/generic/utils/UniqueIdGenerator.js.map +1 -0
- package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +5 -4
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.d.ts +19 -0
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +446 -0
- package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.d.ts +2 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +38 -0
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.d.ts +6 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.js +19 -0
- package/dist/chatWrappers/generic/utils/getFirstValidResult.js.map +1 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.d.ts +2 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js +35 -0
- package/dist/chatWrappers/generic/utils/squashChatHistoryItems.js.map +1 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +3 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +25 -0
- package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -1
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +197 -30
- package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -1
- package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +5 -3
- package/dist/chatWrappers/utils/resolveChatWrapper.js +11 -5
- package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +1 -0
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +13 -4
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/config.d.ts +1 -0
- package/dist/config.js +1 -0
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +44 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +31 -6
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +19 -2
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +16 -3
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +1 -8
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +25 -1
- package/dist/evaluator/LlamaContext/LlamaContext.js +59 -1
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.js +3 -1
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +2 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +3 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/evaluator/LlamaRankingContext.js +1 -1
- package/dist/evaluator/LlamaRankingContext.js.map +1 -1
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +5 -2
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +5 -3
- package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.js +24 -10
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +57 -13
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
- package/dist/gguf/parser/GgufV2Parser.js +24 -17
- package/dist/gguf/parser/GgufV2Parser.js.map +1 -1
- package/dist/gguf/readGgufFileInfo.d.ts +11 -2
- package/dist/gguf/readGgufFileInfo.js +6 -5
- package/dist/gguf/readGgufFileInfo.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +23 -2
- package/dist/gguf/types/GgufMetadataTypes.js +17 -0
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +2 -1
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js +4 -2
- package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -1
- package/dist/index.d.ts +5 -3
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/dist/types.d.ts +8 -0
- package/dist/types.js.map +1 -1
- package/dist/utils/OpenAIFormat.d.ts +177 -0
- package/dist/utils/OpenAIFormat.js +488 -0
- package/dist/utils/OpenAIFormat.js.map +1 -0
- package/dist/utils/createModelDownloader.d.ts +12 -3
- package/dist/utils/createModelDownloader.js +9 -5
- package/dist/utils/createModelDownloader.js.map +1 -1
- package/dist/utils/modelDownloadEndpoints.d.ts +13 -0
- package/dist/utils/modelDownloadEndpoints.js +27 -0
- package/dist/utils/modelDownloadEndpoints.js.map +1 -0
- package/dist/utils/modelFileAccessTokens.d.ts +5 -0
- package/dist/utils/{modelFileAccesTokens.js → modelFileAccessTokens.js} +4 -4
- package/dist/utils/modelFileAccessTokens.js.map +1 -0
- package/dist/utils/optionsMatrix.d.ts +58 -0
- package/dist/utils/optionsMatrix.js +97 -0
- package/dist/utils/optionsMatrix.js.map +1 -0
- package/dist/utils/parseModelUri.d.ts +5 -2
- package/dist/utils/parseModelUri.js +24 -22
- package/dist/utils/parseModelUri.js.map +1 -1
- package/dist/utils/resolveModelDestination.d.ts +2 -1
- package/dist/utils/resolveModelDestination.js +3 -3
- package/dist/utils/resolveModelDestination.js.map +1 -1
- package/dist/utils/resolveModelFile.d.ts +7 -1
- package/dist/utils/resolveModelFile.js +5 -4
- package/dist/utils/resolveModelFile.js.map +1 -1
- package/dist/utils/transformPromisable.d.ts +14 -0
- package/dist/utils/transformPromisable.js +32 -0
- package/dist/utils/transformPromisable.js.map +1 -1
- package/llama/CMakeLists.txt +12 -1
- package/llama/addon/AddonContext.cpp +150 -10
- package/llama/addon/AddonContext.h +3 -0
- package/llama/addon/AddonSampler.cpp +0 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/gpuInfo/vulkan-gpu-info.cpp +0 -1
- package/llama/grammars/README.md +7 -7
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +54 -52
- package/templates/packed/electron-typescript-react.json +1 -1
- package/templates/packed/node-typescript.json +1 -1
- package/dist/utils/modelFileAccesTokens.d.ts +0 -4
- package/dist/utils/modelFileAccesTokens.js.map +0 -1
|
@@ -75,7 +75,7 @@ import { pushAll } from "./pushAll.js";
|
|
|
75
75
|
* @returns The resolved model file path
|
|
76
76
|
*/
|
|
77
77
|
export async function resolveModelFile(uriOrPath, optionsOrDirectory) {
|
|
78
|
-
const { directory, download = "auto", verify = false, fileName, headers, cli = true, onProgress, deleteTempFileOnCancel = true, parallel = 4, tokens, signal } = typeof optionsOrDirectory === "string"
|
|
78
|
+
const { directory, download = "auto", verify = false, fileName, headers, cli = true, onProgress, deleteTempFileOnCancel = true, parallel = 4, tokens, endpoints, signal } = typeof optionsOrDirectory === "string"
|
|
79
79
|
? { directory: optionsOrDirectory }
|
|
80
80
|
: (optionsOrDirectory ?? {});
|
|
81
81
|
const resolvedDirectory = directory || cliModelsDirectory;
|
|
@@ -83,7 +83,7 @@ export async function resolveModelFile(uriOrPath, optionsOrDirectory) {
|
|
|
83
83
|
let resolvedVerify = verify ?? false;
|
|
84
84
|
if (download === false)
|
|
85
85
|
resolvedVerify = false;
|
|
86
|
-
const resolvedModelDestination = resolveModelDestination(uriOrPath);
|
|
86
|
+
const resolvedModelDestination = resolveModelDestination(uriOrPath, undefined, endpoints);
|
|
87
87
|
if (resolvedModelDestination.type === "file") {
|
|
88
88
|
const resolvedFilePath = path.resolve(resolvedDirectory, uriOrPath);
|
|
89
89
|
if (await fs.pathExists(resolvedFilePath))
|
|
@@ -100,7 +100,7 @@ export async function resolveModelFile(uriOrPath, optionsOrDirectory) {
|
|
|
100
100
|
pushAll(expectedFileNames, resolvedModelDestination.parsedUri.possibleFullFilenames);
|
|
101
101
|
}
|
|
102
102
|
else if (expectedFileNames.length === 0 && resolvedModelDestination.type === "url") {
|
|
103
|
-
const enforcedParsedUrl = resolveModelDestination(uriOrPath, true);
|
|
103
|
+
const enforcedParsedUrl = resolveModelDestination(uriOrPath, true, endpoints);
|
|
104
104
|
if (enforcedParsedUrl != null && enforcedParsedUrl.type === "uri") {
|
|
105
105
|
if (enforcedParsedUrl.parsedUri.type === "resolved")
|
|
106
106
|
expectedFileNames.push(enforcedParsedUrl.parsedUri.fullFilename);
|
|
@@ -140,7 +140,8 @@ export async function resolveModelFile(uriOrPath, optionsOrDirectory) {
|
|
|
140
140
|
fileName: fileName || undefined,
|
|
141
141
|
parallelDownloads: parallel,
|
|
142
142
|
onProgress,
|
|
143
|
-
tokens
|
|
143
|
+
tokens,
|
|
144
|
+
endpoints
|
|
144
145
|
});
|
|
145
146
|
if (foundExpectedFilePath != null && downloader.totalFiles === 1 && await fs.pathExists(downloader.entrypointFilePath)) {
|
|
146
147
|
const fileStats = await fs.stat(foundExpectedFilePath);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveModelFile.js","sourceRoot":"","sources":["../../src/utils/resolveModelFile.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,kBAAkB,EAAC,MAAM,cAAc,CAAC;AAChD,OAAO,EAAC,eAAe,EAAC,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAC,qBAAqB,EAAC,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAC,uBAAuB,EAAC,MAAM,8BAA8B,CAAC;
|
|
1
|
+
{"version":3,"file":"resolveModelFile.js","sourceRoot":"","sources":["../../src/utils/resolveModelFile.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,kBAAkB,EAAC,MAAM,cAAc,CAAC;AAChD,OAAO,EAAC,eAAe,EAAC,MAAM,iCAAiC,CAAC;AAChE,OAAO,EAAC,qBAAqB,EAAC,MAAM,wCAAwC,CAAC;AAC7E,OAAO,EAAC,uBAAuB,EAAC,MAAM,8BAA8B,CAAC;AAGrE,OAAO,EAAC,qBAAqB,EAAC,MAAM,4BAA4B,CAAC;AACjE,OAAO,EAAC,qBAAqB,EAAC,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAC,cAAc,EAAC,MAAM,yBAAyB,CAAC;AACvD,OAAO,EAAC,OAAO,EAAC,MAAM,cAAc,CAAC;AAsFrC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAClC,SAAiB,EACjB,kBAAqD;IAErD,MAAM,EACF,SAAS,EACT,QAAQ,GAAG,MAAM,EACjB,MAAM,GAAG,KAAK,EACd,QAAQ,EACR,OAAO,EACP,GAAG,GAAG,IAAI,EACV,UAAU,EACV,sBAAsB,GAAG,IAAI,EAC7B,QAAQ,GAAG,CAAC,EACZ,MAAM,EACN,SAAS,EACT,MAAM,EACT,GAAG,OAAO,kBAAkB,KAAK,QAAQ;QACtC,CAAC,CAAC,EAAC,SAAS,EAAE,kBAAkB,EAAC;QACjC,CAAC,CAAC,CAAC,kBAAkB,IAAI,EAAE,CAAC,CAAC;IAEjC,MAAM,iBAAiB,GAAG,SAAS,IAAI,kBAAkB,CAAC;IAC1D,MAAM,WAAW,GAAG,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;IAC7C,IAAI,cAAc,GAAG,MAAM,IAAI,KAAK,CAAC;IAErC,IAAI,QAAQ,KAAK,KAAK;QAClB,cAAc,GAAG,KAAK,CAAC;IAE3B,MAAM,wBAAwB,GAAG,uBAAuB,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IAE1F,IAAI,wBAAwB,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC3C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,SAAS,CAAC,CAAC;QAEpE,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,gBAAgB,CAAC;YACrC,OAAO,gBAAgB,CAAC;QAE5B,MAAM,IAAI,KAAK,CAAC,2BAA2B,gBAAgB,GAAG,CAAC,CAAC;IACpE,CAAC;IAED,MAAM,iBAAiB,GAAa,QAAQ,IAAI,IAAI;QAChD,CAAC,CAAC,CAAC,QAAQ,CAAC;QACZ,CAAC,CAAC,EAAE,CAAC;IAET,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,IAAI,wBAAwB,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QAC5E,IAAI,wBAAwB,CAAC,SAAS,CAAC,IAAI,KAAK,UAAU;YACtD,iBAAiB,CAAC,IAAI,CAAC,wBAAwB,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;;YAExE,OAAO,CAAC,iBAAiB,EAAE,wBAAwB,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;IAC7F,CAAC;SAAM,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC,IAAI,wBAAwB,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QACnF,MAAM,iBAAiB,GAAG,uBAAuB,CAAC,SAAS,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC;QAC9E,IAAI,iBAAiB,IAAI,IAAI,IAAI,iBAAiB,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;YAChE,IAAI,iBAAiB,CAAC,SAAS,CAAC,IAAI,KAAK,UAAU;gBAC/C,iBAAiB,CAAC,IAAI,CAAC,iBAAiB,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;;gBAEjE,OAAO,CAAC,iBAAiB,EAAE,iBAAiB,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;QACtF,CAAC;IACL,CAAC;IAED,MAAM,qBAAqB,GAAG,MAAM,4BAA4B,CAAC,iBAAiB,EAAE,iBAAiB,CAAC,CAAC;IAEvG,IAAI,qBAAqB,IAAI,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;QACnD,MAAM,YAAY,GAAG,qBAAqB,CAAC,qBAAqB,CAAC,CAAC;QAClE,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,KAAK,qBAAqB;YACtE,OAAO,qBAAqB,CAAC;QAEjC,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzF,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,IAAI,aAAa,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC;gBACvC,OAAO,YAAY,CAAC,CAAC,CAAE,CAAC;iBACvB,IAAI,QAAQ,KAAK,KAAK;gBACvB,MAAM,IAAI,KAAK,CAAC,0CAA0C,YAAY,CAAC,CAAC,CAAC,qCAAqC,CAAC,CAAC;QACxH,CAAC;IACL,CAAC;IAED,IAAI,QAAQ,KAAK,KAAK,EAAE,CAAC;QACrB,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,2BAA2B,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE,iBAAiB,CAAC,CAAC,CAAE,CAAC,4BAA4B,CAAC,CAAC;QAEhI,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,SAAS,iBAAiB,4BAA4B,CAAC,CAAC;IACjH,CAAC;IAED,IAAI,MAAM,EAAE,OAAO;QACf,MAAM,MAAM,CAAC,MAAM,CAAC;IAExB,MAAM,UAAU,GAAG,MAAM,qBAAqB,CAAC;QAC3C,QAAQ,EAAE,wBAAwB,CAAC,IAAI,KAAK,KAAK;YAC7C,CAAC,CAAC,wBAAwB,CAAC,GAAG;YAC9B,CAAC,CAAC,wBAAwB,CAAC,GAAG;QAClC,OAAO,EAAE,iBAAiB;QAC1B,OAAO;QACP,eAAe,EAAE,WAAW;QAC5B,sBAAsB;QACtB,YAAY,EAAE,IAAI;QAClB,QAAQ,EAAE,QAAQ,IAAI,SAAS;QAC/B,iBAAiB,EAAE,QAAQ;QAC3B,UAAU;QACV,MAAM;QACN,SAAS;KACZ,CAAC,CAAC;IAEH,IAAI,qBAAqB,IAAI,IAAI,IAAI,UAAU,CAAC,UAAU,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,kBAAkB,CAAC,EAAE,CAAC;QACrH,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAEvD,IAAI,UAAU,CAAC,SAAS,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;YAC1C,MAAM,UAAU,CAAC,MAAM,CAAC,EAAC,cAAc,EAAE,KAAK,EAAC,CAAC,CAAC;YACjD,OAAO,qBAAqB,CAAC;QACjC,CAAC;IACL,CAAC;IAED,IAAI,WAAW;QACX,OAAO,CAAC,IAAI,CAAC,kBAAkB,KAAK,CAAC,MAAM,CAAC,eAAe,CAAC,iBAAiB,CAAC,CAAC,GAC3E,UAAU,CAAC,gBAAgB,IAAI,IAAI;YAC/B,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,eAAe,UAAU,CAAC,gBAAgB,4BAA4B,CAAC;YACpF,CAAC,CAAC,EACV,EAAE,CAAC,CAAC;IAER,MAAM,UAAU,CAAC,QAAQ,CAAC,EAAC,MAAM,EAAC,CAAC,CAAC;IAEpC,IAAI,WAAW;QACX,OAAO,CAAC,IAAI,CAAC,iBAAiB,KAAK,CAAC,MAAM,CAAC,eAAe,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC;IAElG,OAAO,UAAU,CAAC,kBAAkB,CAAC;AACzC,CAAC;AAED,KAAK,UAAU,4BAA4B,CAAC,OAAe,EAAE,SAA2E;IACpI,IAAI,kBAAkB,GAAyB,SAAS,CAAC;IAEzD,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,EAAE;QAC1E,OAAO,SAAS,CAAC;IAErB,KAAK,MAAM,gBAAgB,IAAI,SAAS,EAAE,CAAC;QACvC,IAAI,gBAAgB,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,CAAC;YACnD,MAAM,CAAC,SAAS,EAAE,GAAG,SAAS,CAAC,GAAG,gBAAgB,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;YAChF,MAAM,iBAAiB,GAAG,SAAS,IAAI,EAAE,CAAC;YAC1C,MAAM,iBAAiB,GAAG,SAAS,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;YAEhE,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,kBAAkB,GAAG,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAC,aAAa,EAAE,IAAI,EAAC,CAAC,CAAC;qBAClE,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;qBAC/B,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAElC,KAAK,MAAM,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;gBACjD,IAAI,iBAAiB,CAAC,UAAU,CAAC,iBAAiB,CAAC,IAAI,iBAAiB,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;oBACnG,MAAM,UAAU,GAAG,iBAAiB,CAAC,KAAK,CAAC,iBAAiB,CAAC,MAAM,EAAE,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;oBAChG,IAAI,cAAc,CAAC,UAAU,CAAC;wBAC1B,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC;gBACrD,CAAC;YACL,CAAC;YAED,SAAS;QACb,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QACtD,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC;YAC7B,OAAO,QAAQ,CAAC;IACxB,CAAC;IAED,OAAO,SAAS,CAAC;AACrB,CAAC"}
|
|
@@ -37,4 +37,18 @@ export declare function promisableLoop<R>({ condition, callback, afterthought, r
|
|
|
37
37
|
/** The value to return when the loop is done */
|
|
38
38
|
returnValue: () => Promisable<R>;
|
|
39
39
|
}): Promisable<R>;
|
|
40
|
+
/**
|
|
41
|
+
* Calls the given getters in order, and when a promise is encountered, waits for it to resolve before continuing.
|
|
42
|
+
* The result is transformed using the transformer function.
|
|
43
|
+
*
|
|
44
|
+
* This is used as a performance optimization to avoid adding many microtasks to the event loop,
|
|
45
|
+
* which makes reading from buffers significantly faster.
|
|
46
|
+
* @param getters - An array of functions that return values or promises
|
|
47
|
+
* @param transformer - The transformer function that takes the promisable values and transforms them into the result of this function
|
|
48
|
+
*/
|
|
49
|
+
export declare function transformPromisablesInOrder<const T extends (() => Promisable<any>)[], const R = {
|
|
50
|
+
readonly [Index in keyof T]: Awaited<ReturnType<T[Index]>>;
|
|
51
|
+
}>(getters: T, transformer?: (values: {
|
|
52
|
+
readonly [Index in keyof T]: Awaited<ReturnType<T[Index]>>;
|
|
53
|
+
}) => Promisable<R>): Promisable<R>;
|
|
40
54
|
export type Promisable<T> = T | Promise<T>;
|
|
@@ -60,4 +60,36 @@ export function promisableLoop({ condition, callback, afterthought = () => void
|
|
|
60
60
|
}
|
|
61
61
|
return iterate();
|
|
62
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* Calls the given getters in order, and when a promise is encountered, waits for it to resolve before continuing.
|
|
65
|
+
* The result is transformed using the transformer function.
|
|
66
|
+
*
|
|
67
|
+
* This is used as a performance optimization to avoid adding many microtasks to the event loop,
|
|
68
|
+
* which makes reading from buffers significantly faster.
|
|
69
|
+
* @param getters - An array of functions that return values or promises
|
|
70
|
+
* @param transformer - The transformer function that takes the promisable values and transforms them into the result of this function
|
|
71
|
+
*/
|
|
72
|
+
export function transformPromisablesInOrder(getters, transformer = ((values) => values)) {
|
|
73
|
+
let i = 0;
|
|
74
|
+
const res = [];
|
|
75
|
+
let skipPushingValue = true;
|
|
76
|
+
function iterate(currentValue) {
|
|
77
|
+
if (skipPushingValue)
|
|
78
|
+
skipPushingValue = false;
|
|
79
|
+
else
|
|
80
|
+
res.push(currentValue);
|
|
81
|
+
while (i < getters.length) {
|
|
82
|
+
const getter = getters[i];
|
|
83
|
+
if (getter == null)
|
|
84
|
+
break;
|
|
85
|
+
i++;
|
|
86
|
+
const value = getter();
|
|
87
|
+
if (value instanceof Promise)
|
|
88
|
+
return value.then(iterate);
|
|
89
|
+
res.push(value);
|
|
90
|
+
}
|
|
91
|
+
return transformer(res);
|
|
92
|
+
}
|
|
93
|
+
return iterate(undefined);
|
|
94
|
+
}
|
|
63
95
|
//# sourceMappingURL=transformPromisable.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transformPromisable.js","sourceRoot":"","sources":["../../src/utils/transformPromisable.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,MAAM,UAAU,mBAAmB,CAAO,KAAoB,EAAE,WAAwC;IACpG,IAAI,KAAK,YAAY,OAAO;QACxB,OAAO,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEnC,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC;AAC9B,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,oBAAoB,CAChC,MAA0D,EAC1D,WAA8E;IAE9E,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,YAAY,OAAO,CAAC;QAChD,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEjD,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAI,EAC9B,SAAS,EACT,QAAQ,EACR,YAAY,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,EAC3B,WAAW,EAad;IACG,SAAS,OAAO;QACZ,OAAO,IAAI,EAAE,CAAC;YACV,MAAM,cAAc,GAAG,SAAS,EAAE,CAAC;YAEnC,IAAI,cAAc,YAAY,OAAO;gBACjC,OAAO,cAAc;qBAChB,IAAI,CAAC,CAAC,cAAc,EAAiB,EAAE;oBACpC,IAAI,cAAc,EAAE,CAAC;wBACjB,MAAM,KAAK,GAAG,QAAQ,EAAE,CAAC;wBACzB,IAAI,KAAK,YAAY,OAAO;4BACxB,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,YAAY,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC;wBAE1E,OAAO,mBAAmB,CAAC,YAAY,EAAE,EAAE,OAAO,CAAC,CAAC;oBACxD,CAAC;oBAED,OAAO,WAAW,EAAE,CAAC;gBACzB,CAAC,CAAC,CAAC;YAEX,IAAI,cAAc,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,QAAQ,EAAE,CAAC;gBACzB,IAAI,KAAK,YAAY,OAAO;oBACxB,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,YAAY,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC;gBAE1E,MAAM,iBAAiB,GAAG,YAAY,EAAE,CAAC;gBACzC,IAAI,iBAAiB,YAAY,OAAO;oBACpC,OAAO,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE3C,SAAS;YACb,CAAC;YAED,OAAO,WAAW,EAAE,CAAC;QACzB,CAAC;IACL,CAAC;IAED,OAAO,OAAO,EAAE,CAAC;AACrB,CAAC"}
|
|
1
|
+
{"version":3,"file":"transformPromisable.js","sourceRoot":"","sources":["../../src/utils/transformPromisable.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,MAAM,UAAU,mBAAmB,CAAO,KAAoB,EAAE,WAAwC;IACpG,IAAI,KAAK,YAAY,OAAO;QACxB,OAAO,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEnC,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC;AAC9B,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,oBAAoB,CAChC,MAA0D,EAC1D,WAA8E;IAE9E,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,YAAY,OAAO,CAAC;QAChD,OAAO,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAEjD,OAAO,WAAW,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAI,EAC9B,SAAS,EACT,QAAQ,EACR,YAAY,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,EAC3B,WAAW,EAad;IACG,SAAS,OAAO;QACZ,OAAO,IAAI,EAAE,CAAC;YACV,MAAM,cAAc,GAAG,SAAS,EAAE,CAAC;YAEnC,IAAI,cAAc,YAAY,OAAO;gBACjC,OAAO,cAAc;qBAChB,IAAI,CAAC,CAAC,cAAc,EAAiB,EAAE;oBACpC,IAAI,cAAc,EAAE,CAAC;wBACjB,MAAM,KAAK,GAAG,QAAQ,EAAE,CAAC;wBACzB,IAAI,KAAK,YAAY,OAAO;4BACxB,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,YAAY,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC;wBAE1E,OAAO,mBAAmB,CAAC,YAAY,EAAE,EAAE,OAAO,CAAC,CAAC;oBACxD,CAAC;oBAED,OAAO,WAAW,EAAE,CAAC;gBACzB,CAAC,CAAC,CAAC;YAEX,IAAI,cAAc,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,QAAQ,EAAE,CAAC;gBACzB,IAAI,KAAK,YAAY,OAAO;oBACxB,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,YAAY,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC;gBAE1E,MAAM,iBAAiB,GAAG,YAAY,EAAE,CAAC;gBACzC,IAAI,iBAAiB,YAAY,OAAO;oBACpC,OAAO,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE3C,SAAS;YACb,CAAC;YAED,OAAO,WAAW,EAAE,CAAC;QACzB,CAAC;IACL,CAAC;IAED,OAAO,OAAO,EAAE,CAAC;AACrB,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,2BAA2B,CAIvC,OAAU,EACV,cAEqB,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAW,CAAC;IAE9C,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,MAAM,GAAG,GAAU,EAAE,CAAC;IACtB,IAAI,gBAAgB,GAAG,IAAI,CAAC;IAE5B,SAAS,OAAO,CAAC,YAAiB;QAC9B,IAAI,gBAAgB;YAChB,gBAAgB,GAAG,KAAK,CAAC;;YAEzB,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAE3B,OAAO,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;YACxB,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,MAAM,IAAI,IAAI;gBACd,MAAM;YAEV,CAAC,EAAE,CAAC;YAEJ,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC;YACvB,IAAI,KAAK,YAAY,OAAO;gBACxB,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAE/B,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;QAED,OAAO,WAAW,CAAC,GAA2D,CAAC,CAAC;IACpF,CAAC;IAED,OAAO,OAAO,CAAC,SAAS,CAAC,CAAC;AAC9B,CAAC"}
|
package/llama/CMakeLists.txt
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
|
-
cmake_minimum_required(VERSION 3.
|
|
1
|
+
cmake_minimum_required(VERSION 3.19)
|
|
2
2
|
|
|
3
3
|
if (NLC_CURRENT_PLATFORM STREQUAL "win-x64" OR NLC_CURRENT_PLATFORM STREQUAL "win-arm64")
|
|
4
4
|
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
|
5
5
|
endif()
|
|
6
6
|
|
|
7
|
+
if (NLC_CURRENT_PLATFORM STREQUAL "win-x64")
|
|
8
|
+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
|
9
|
+
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL" CACHE STRING "" FORCE)
|
|
10
|
+
else()
|
|
11
|
+
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL" CACHE STRING "" FORCE)
|
|
12
|
+
endif()
|
|
13
|
+
endif()
|
|
14
|
+
|
|
7
15
|
if (NLC_TARGET_PLATFORM STREQUAL "win-arm64" AND (CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL "Ninja Multi-Config") AND NOT MINGW)
|
|
8
16
|
if(NLC_CURRENT_PLATFORM STREQUAL "win-x64")
|
|
9
17
|
include("./profiles/llvm.win32.host-x64.target-arm64.cmake")
|
|
@@ -70,6 +78,9 @@ add_subdirectory("llama.cpp")
|
|
|
70
78
|
include_directories("llama.cpp")
|
|
71
79
|
include_directories("./llama.cpp/common")
|
|
72
80
|
|
|
81
|
+
# This is needed to use methods in "llama-grammar.h" and "unicode.h"
|
|
82
|
+
target_include_directories(llama PUBLIC "./llama.cpp/src")
|
|
83
|
+
|
|
73
84
|
unset(GPU_INFO_HEADERS)
|
|
74
85
|
unset(GPU_INFO_SOURCES)
|
|
75
86
|
unset(GPU_INFO_EXTRA_LIBS)
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
#include <algorithm>
|
|
3
3
|
#include <cmath>
|
|
4
4
|
#include "common/common.h"
|
|
5
|
-
#include "llama-grammar.h"
|
|
6
5
|
#include "llama.h"
|
|
7
6
|
|
|
8
7
|
#include "addonGlobals.h"
|
|
@@ -583,7 +582,7 @@ Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
|
|
|
583
582
|
|
|
584
583
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
585
584
|
|
|
586
|
-
bool result =
|
|
585
|
+
bool result = llama_kv_self_seq_rm(ctx, sequenceId, -1, -1);
|
|
587
586
|
|
|
588
587
|
if (!result) {
|
|
589
588
|
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
|
|
@@ -602,7 +601,7 @@ Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo&
|
|
|
602
601
|
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
|
603
602
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
604
603
|
|
|
605
|
-
bool result =
|
|
604
|
+
bool result = llama_kv_self_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
606
605
|
|
|
607
606
|
return Napi::Boolean::New(info.Env(), result);
|
|
608
607
|
}
|
|
@@ -617,7 +616,7 @@ Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info
|
|
|
617
616
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
618
617
|
int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
|
|
619
618
|
|
|
620
|
-
|
|
619
|
+
llama_kv_self_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
|
|
621
620
|
|
|
622
621
|
return info.Env().Undefined();
|
|
623
622
|
}
|
|
@@ -639,6 +638,7 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
|
|
|
639
638
|
}
|
|
640
639
|
|
|
641
640
|
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
|
|
641
|
+
int32_t maxVectorSize = (info.Length() > 1 && info[1].IsNumber()) ? info[1].As<Napi::Number>().Int32Value() : 0;
|
|
642
642
|
|
|
643
643
|
if (inputTokensLength <= 0) {
|
|
644
644
|
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
|
|
@@ -650,15 +650,16 @@ Napi::Value AddonContext::GetEmbedding(const Napi::CallbackInfo& info) {
|
|
|
650
650
|
const auto* embeddings = pooling_type == LLAMA_POOLING_TYPE_NONE ? NULL : llama_get_embeddings_seq(ctx, 0);
|
|
651
651
|
if (embeddings == NULL) {
|
|
652
652
|
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
|
|
653
|
+
}
|
|
653
654
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
}
|
|
655
|
+
if (embeddings == NULL) {
|
|
656
|
+
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
|
|
657
|
+
return info.Env().Undefined();
|
|
658
658
|
}
|
|
659
659
|
|
|
660
|
-
|
|
661
|
-
|
|
660
|
+
size_t resultSize = maxVectorSize == 0 ? n_embd : std::min(n_embd, maxVectorSize);
|
|
661
|
+
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), resultSize);
|
|
662
|
+
for (size_t i = 0; i < resultSize; i++) {
|
|
662
663
|
result[i] = embeddings[i];
|
|
663
664
|
}
|
|
664
665
|
|
|
@@ -701,6 +702,143 @@ Napi::Value AddonContext::SetThreads(const Napi::CallbackInfo& info) {
|
|
|
701
702
|
return info.Env().Undefined();
|
|
702
703
|
}
|
|
703
704
|
|
|
705
|
+
class AddonContextSaveSequenceStateToFileWorker : public Napi::AsyncWorker {
|
|
706
|
+
public:
|
|
707
|
+
AddonContext* context;
|
|
708
|
+
std::string filepath;
|
|
709
|
+
llama_seq_id sequenceId;
|
|
710
|
+
std::vector<llama_token> tokens;
|
|
711
|
+
size_t savedFileSize = 0;
|
|
712
|
+
|
|
713
|
+
AddonContextSaveSequenceStateToFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
|
|
714
|
+
: Napi::AsyncWorker(info.Env(), "AddonContextSaveSequenceStateToFileWorker"),
|
|
715
|
+
context(context),
|
|
716
|
+
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
|
717
|
+
context->Ref();
|
|
718
|
+
|
|
719
|
+
filepath = info[0].As<Napi::String>().Utf8Value();
|
|
720
|
+
sequenceId = info[1].As<Napi::Number>().Int32Value();
|
|
721
|
+
Napi::Uint32Array inputTokens = info[2].As<Napi::Uint32Array>();
|
|
722
|
+
|
|
723
|
+
tokens.resize(inputTokens.ElementLength());
|
|
724
|
+
for (size_t i = 0; i < tokens.size(); i++) {
|
|
725
|
+
tokens[i] = inputTokens[i];
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
~AddonContextSaveSequenceStateToFileWorker() {
|
|
729
|
+
context->Unref();
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
Napi::Promise GetPromise() {
|
|
733
|
+
return deferred.Promise();
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
protected:
|
|
737
|
+
Napi::Promise::Deferred deferred;
|
|
738
|
+
|
|
739
|
+
void Execute() {
|
|
740
|
+
try {
|
|
741
|
+
savedFileSize = llama_state_seq_save_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size());
|
|
742
|
+
if (savedFileSize == 0) {
|
|
743
|
+
SetError("Failed to save state to file");
|
|
744
|
+
return;
|
|
745
|
+
}
|
|
746
|
+
} catch (const std::exception& e) {
|
|
747
|
+
SetError(e.what());
|
|
748
|
+
} catch(...) {
|
|
749
|
+
SetError("Unknown error when calling \"llama_state_seq_save_file\"");
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
void OnOK() {
|
|
753
|
+
deferred.Resolve(Napi::Number::New(Env(), savedFileSize));
|
|
754
|
+
}
|
|
755
|
+
void OnError(const Napi::Error& err) {
|
|
756
|
+
deferred.Reject(err.Value());
|
|
757
|
+
}
|
|
758
|
+
};
|
|
759
|
+
Napi::Value AddonContext::SaveSequenceStateToFile(const Napi::CallbackInfo& info) {
|
|
760
|
+
if (disposed) {
|
|
761
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
762
|
+
return info.Env().Undefined();
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
AddonContextSaveSequenceStateToFileWorker* worker = new AddonContextSaveSequenceStateToFileWorker(info, this);
|
|
766
|
+
worker->Queue();
|
|
767
|
+
return worker->GetPromise();
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
class AddonContextLoadSequenceStateFromFileWorker : public Napi::AsyncWorker {
|
|
771
|
+
public:
|
|
772
|
+
AddonContext* context;
|
|
773
|
+
std::string filepath;
|
|
774
|
+
llama_seq_id sequenceId;
|
|
775
|
+
size_t maxContextSize;
|
|
776
|
+
std::vector<llama_token> tokens;
|
|
777
|
+
|
|
778
|
+
AddonContextLoadSequenceStateFromFileWorker(const Napi::CallbackInfo& info, AddonContext* context)
|
|
779
|
+
: Napi::AsyncWorker(info.Env(), "AddonContextLoadSequenceStateFromFileWorker"),
|
|
780
|
+
context(context),
|
|
781
|
+
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
|
782
|
+
context->Ref();
|
|
783
|
+
|
|
784
|
+
filepath = info[0].As<Napi::String>().Utf8Value();
|
|
785
|
+
sequenceId = info[1].As<Napi::Number>().Int32Value();
|
|
786
|
+
maxContextSize = info[2].As<Napi::Number>().Uint32Value();
|
|
787
|
+
|
|
788
|
+
tokens.resize(maxContextSize);
|
|
789
|
+
}
|
|
790
|
+
~AddonContextLoadSequenceStateFromFileWorker() {
|
|
791
|
+
context->Unref();
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
Napi::Promise GetPromise() {
|
|
795
|
+
return deferred.Promise();
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
protected:
|
|
799
|
+
Napi::Promise::Deferred deferred;
|
|
800
|
+
|
|
801
|
+
void Execute() {
|
|
802
|
+
try {
|
|
803
|
+
size_t tokenCount = 0;
|
|
804
|
+
const size_t fileSize = llama_state_seq_load_file(context->ctx, filepath.c_str(), sequenceId, tokens.data(), tokens.size(), &tokenCount);
|
|
805
|
+
if (fileSize == 0) {
|
|
806
|
+
SetError("Failed to load state from file. Current context sequence size may be smaller that the state of the file");
|
|
807
|
+
return;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
tokens.resize(tokenCount);
|
|
811
|
+
} catch (const std::exception& e) {
|
|
812
|
+
SetError(e.what());
|
|
813
|
+
} catch(...) {
|
|
814
|
+
SetError("Unknown error when calling \"llama_state_seq_load_file\"");
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
void OnOK() {
|
|
818
|
+
size_t tokenCount = tokens.size();
|
|
819
|
+
Napi::Uint32Array result = Napi::Uint32Array::New(Env(), tokenCount);
|
|
820
|
+
|
|
821
|
+
for (size_t i = 0; i < tokenCount; i++) {
|
|
822
|
+
result[i] = tokens[i];
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
deferred.Resolve(result);
|
|
826
|
+
}
|
|
827
|
+
void OnError(const Napi::Error& err) {
|
|
828
|
+
deferred.Reject(err.Value());
|
|
829
|
+
}
|
|
830
|
+
};
|
|
831
|
+
Napi::Value AddonContext::LoadSequenceStateFromFile(const Napi::CallbackInfo& info) {
|
|
832
|
+
if (disposed) {
|
|
833
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
834
|
+
return info.Env().Undefined();
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
AddonContextLoadSequenceStateFromFileWorker* worker = new AddonContextLoadSequenceStateFromFileWorker(info, this);
|
|
838
|
+
worker->Queue();
|
|
839
|
+
return worker->GetPromise();
|
|
840
|
+
}
|
|
841
|
+
|
|
704
842
|
Napi::Value AddonContext::PrintTimings(const Napi::CallbackInfo& info) {
|
|
705
843
|
llama_perf_context_print(ctx);
|
|
706
844
|
llama_perf_context_reset(ctx);
|
|
@@ -796,6 +934,8 @@ void AddonContext::init(Napi::Object exports) {
|
|
|
796
934
|
InstanceMethod("setThreads", &AddonContext::SetThreads),
|
|
797
935
|
InstanceMethod("printTimings", &AddonContext::PrintTimings),
|
|
798
936
|
InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
|
|
937
|
+
InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
|
|
938
|
+
InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
|
|
799
939
|
InstanceMethod("setLora", &AddonContext::SetLora),
|
|
800
940
|
InstanceMethod("dispose", &AddonContext::Dispose),
|
|
801
941
|
}
|
|
@@ -44,6 +44,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
44
44
|
Napi::Value GetThreads(const Napi::CallbackInfo& info);
|
|
45
45
|
Napi::Value SetThreads(const Napi::CallbackInfo& info);
|
|
46
46
|
|
|
47
|
+
Napi::Value SaveSequenceStateToFile(const Napi::CallbackInfo& info);
|
|
48
|
+
Napi::Value LoadSequenceStateFromFile(const Napi::CallbackInfo& info);
|
|
49
|
+
|
|
47
50
|
Napi::Value PrintTimings(const Napi::CallbackInfo& info);
|
|
48
51
|
Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
|
|
49
52
|
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
|
@@ -66,7 +66,6 @@ static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedM
|
|
|
66
66
|
features2.pNext = &vk11Features;
|
|
67
67
|
|
|
68
68
|
vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
|
|
69
|
-
VkPhysicalDeviceFeatures2 device_features2;
|
|
70
69
|
|
|
71
70
|
if (!vk11Features.storageBuffer16BitAccess) {
|
|
72
71
|
*checkSupported = false;
|
package/llama/grammars/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# GBNF Guide
|
|
2
2
|
|
|
3
|
-
GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `
|
|
3
|
+
GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `tools/main` and `tools/server`.
|
|
4
4
|
|
|
5
5
|
## Background
|
|
6
6
|
|
|
@@ -110,21 +110,21 @@ While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) ma
|
|
|
110
110
|
|
|
111
111
|
You can use GBNF grammars:
|
|
112
112
|
|
|
113
|
-
- In [llama-server](../
|
|
114
|
-
- In [llama-cli](../
|
|
115
|
-
- With [
|
|
113
|
+
- In [llama-server](../tools/server)'s completion endpoints, passed as the `grammar` body field
|
|
114
|
+
- In [llama-cli](../tools/main), passed as the `--grammar` & `--grammar-file` flags
|
|
115
|
+
- With [test-gbnf-validator](../tests/test-gbnf-validator.cpp), to test them against strings.
|
|
116
116
|
|
|
117
117
|
## JSON Schemas → GBNF
|
|
118
118
|
|
|
119
119
|
`llama.cpp` supports converting a subset of https://json-schema.org/ to GBNF grammars:
|
|
120
120
|
|
|
121
|
-
- In [llama-server](../
|
|
121
|
+
- In [llama-server](../tools/server):
|
|
122
122
|
- For any completion endpoints, passed as the `json_schema` body field
|
|
123
123
|
- For the `/chat/completions` endpoint, passed inside the `response_format` body field (e.g. `{"type", "json_object", "schema": {"items": {}}}` or `{ type: "json_schema", json_schema: {"schema": ...} }`)
|
|
124
|
-
- In [llama-cli](../
|
|
124
|
+
- In [llama-cli](../tools/main), passed as the `--json` / `-j` flag
|
|
125
125
|
- To convert to a grammar ahead of time:
|
|
126
126
|
- in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
|
|
127
|
-
- in JavaScript with [json-schema-to-grammar.mjs](../
|
|
127
|
+
- in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI)
|
|
128
128
|
|
|
129
129
|
Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555).
|
|
130
130
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-llama-cpp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.8.0",
|
|
4
4
|
"description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -39,10 +39,10 @@
|
|
|
39
39
|
}
|
|
40
40
|
},
|
|
41
41
|
"engines": {
|
|
42
|
-
"node": ">=
|
|
42
|
+
"node": ">=20.0.0"
|
|
43
43
|
},
|
|
44
44
|
"scripts": {
|
|
45
|
-
"prepare": "
|
|
45
|
+
"prepare": "node --experimental-require-module -e \"process.env.CI !== 'true' && console.log(require('husky').default())\"",
|
|
46
46
|
"postinstall": "node ./dist/cli/cli.js postinstall",
|
|
47
47
|
"postversion": "vite-node scripts/postVersion.ts",
|
|
48
48
|
"prebuild": "rimraf ./dist ./tsconfig.tsbuildinfo",
|
|
@@ -111,6 +111,8 @@
|
|
|
111
111
|
"catai",
|
|
112
112
|
"mistral",
|
|
113
113
|
"deepseek",
|
|
114
|
+
"qwen",
|
|
115
|
+
"qwq",
|
|
114
116
|
"typescript",
|
|
115
117
|
"lora",
|
|
116
118
|
"batching",
|
|
@@ -131,78 +133,78 @@
|
|
|
131
133
|
},
|
|
132
134
|
"homepage": "https://node-llama-cpp.withcat.ai",
|
|
133
135
|
"devDependencies": {
|
|
134
|
-
"@commitlint/cli": "^19.
|
|
135
|
-
"@commitlint/config-conventional": "^19.
|
|
136
|
-
"@eslint/compat": "^1.2.
|
|
137
|
-
"@fontsource/inter": "^5.
|
|
138
|
-
"@nolebase/vitepress-plugin-git-changelog": "^2.
|
|
139
|
-
"@nolebase/vitepress-plugin-og-image": "^2.
|
|
136
|
+
"@commitlint/cli": "^19.8.1",
|
|
137
|
+
"@commitlint/config-conventional": "^19.8.1",
|
|
138
|
+
"@eslint/compat": "^1.2.9",
|
|
139
|
+
"@fontsource/inter": "^5.2.5",
|
|
140
|
+
"@nolebase/vitepress-plugin-git-changelog": "^2.17.0",
|
|
141
|
+
"@nolebase/vitepress-plugin-og-image": "^2.17.0",
|
|
140
142
|
"@resvg/resvg-js": "^2.6.2",
|
|
141
|
-
"@semantic-release/exec": "^
|
|
142
|
-
"@semantic-release/github": "11.0.
|
|
143
|
+
"@semantic-release/exec": "^7.1.0",
|
|
144
|
+
"@semantic-release/github": "11.0.2",
|
|
143
145
|
"@semantic-release/npm": "12.0.1",
|
|
144
|
-
"@shikijs/vitepress-twoslash": "^
|
|
145
|
-
"@stylistic/eslint-plugin": "^
|
|
146
|
+
"@shikijs/vitepress-twoslash": "^3.4.0",
|
|
147
|
+
"@stylistic/eslint-plugin": "^4.2.0",
|
|
146
148
|
"@types/async-retry": "^1.4.9",
|
|
147
149
|
"@types/bytes": "^3.1.5",
|
|
148
150
|
"@types/cross-spawn": "^6.0.6",
|
|
149
151
|
"@types/fs-extra": "^11.0.4",
|
|
150
|
-
"@types/node": "^22.
|
|
152
|
+
"@types/node": "^22.15.17",
|
|
151
153
|
"@types/proper-lockfile": "^4.1.4",
|
|
152
|
-
"@types/semver": "^7.
|
|
154
|
+
"@types/semver": "^7.7.0",
|
|
153
155
|
"@types/validate-npm-package-name": "^4.0.2",
|
|
154
156
|
"@types/which": "^3.0.4",
|
|
155
157
|
"@types/yargs": "^17.0.33",
|
|
156
|
-
"@vitest/coverage-v8": "^3.
|
|
157
|
-
"@vitest/ui": "^3.
|
|
158
|
-
"electron": "^
|
|
159
|
-
"eslint": "^9.
|
|
160
|
-
"eslint-import-resolver-typescript": "^3.
|
|
158
|
+
"@vitest/coverage-v8": "^3.1.3",
|
|
159
|
+
"@vitest/ui": "^3.1.3",
|
|
160
|
+
"electron": "^36.2.0",
|
|
161
|
+
"eslint": "^9.26.0",
|
|
162
|
+
"eslint-import-resolver-typescript": "^4.3.4",
|
|
161
163
|
"eslint-plugin-import": "^2.31.0",
|
|
162
|
-
"eslint-plugin-jsdoc": "^50.6.
|
|
163
|
-
"eslint-plugin-n": "^17.
|
|
164
|
-
"feed": "^
|
|
164
|
+
"eslint-plugin-jsdoc": "^50.6.14",
|
|
165
|
+
"eslint-plugin-n": "^17.18.0",
|
|
166
|
+
"feed": "^5.0.0",
|
|
165
167
|
"husky": "^9.1.7",
|
|
166
168
|
"rehype": "^13.0.2",
|
|
167
169
|
"rimraf": "^6.0.1",
|
|
168
|
-
"semantic-release": "^24.2.
|
|
169
|
-
"sharp": "^0.
|
|
170
|
+
"semantic-release": "^24.2.3",
|
|
171
|
+
"sharp": "^0.34.1",
|
|
170
172
|
"tslib": "^2.8.1",
|
|
171
|
-
"typedoc": "^0.
|
|
172
|
-
"typedoc-plugin-markdown": "^4.
|
|
173
|
-
"typedoc-plugin-mdn-links": "^
|
|
173
|
+
"typedoc": "^0.28.4",
|
|
174
|
+
"typedoc-plugin-markdown": "^4.6.3",
|
|
175
|
+
"typedoc-plugin-mdn-links": "^5.0.2",
|
|
174
176
|
"typedoc-vitepress-theme": "^1.1.2",
|
|
175
|
-
"typescript": "^5.
|
|
176
|
-
"typescript-eslint": "^8.
|
|
177
|
-
"vite-node": "^3.
|
|
177
|
+
"typescript": "^5.8.3",
|
|
178
|
+
"typescript-eslint": "^8.32.0",
|
|
179
|
+
"vite-node": "^3.1.3",
|
|
178
180
|
"vitepress": "^1.6.3",
|
|
179
|
-
"vitest": "^3.
|
|
180
|
-
"zx": "^8.
|
|
181
|
+
"vitest": "^3.1.3",
|
|
182
|
+
"zx": "^8.5.4"
|
|
181
183
|
},
|
|
182
184
|
"dependencies": {
|
|
183
|
-
"@huggingface/jinja": "^0.
|
|
185
|
+
"@huggingface/jinja": "^0.5.0",
|
|
184
186
|
"async-retry": "^1.3.3",
|
|
185
187
|
"bytes": "^3.1.2",
|
|
186
188
|
"chalk": "^5.4.1",
|
|
187
189
|
"chmodrp": "^1.0.2",
|
|
188
|
-
"cmake-js": "^7.3.
|
|
190
|
+
"cmake-js": "^7.3.1",
|
|
189
191
|
"cross-env": "^7.0.3",
|
|
190
192
|
"cross-spawn": "^7.0.6",
|
|
191
193
|
"env-var": "^7.5.0",
|
|
192
194
|
"filenamify": "^6.0.0",
|
|
193
195
|
"fs-extra": "^11.3.0",
|
|
194
|
-
"ignore": "^7.0.
|
|
196
|
+
"ignore": "^7.0.4",
|
|
195
197
|
"ipull": "^3.9.2",
|
|
196
198
|
"is-unicode-supported": "^2.1.0",
|
|
197
199
|
"lifecycle-utils": "^2.0.0",
|
|
198
200
|
"log-symbols": "^7.0.0",
|
|
199
|
-
"nanoid": "^5.
|
|
200
|
-
"node-addon-api": "^8.3.
|
|
201
|
-
"octokit": "^4.1.
|
|
202
|
-
"ora": "^8.
|
|
201
|
+
"nanoid": "^5.1.5",
|
|
202
|
+
"node-addon-api": "^8.3.1",
|
|
203
|
+
"octokit": "^4.1.3",
|
|
204
|
+
"ora": "^8.2.0",
|
|
203
205
|
"pretty-ms": "^9.2.0",
|
|
204
206
|
"proper-lockfile": "^4.1.2",
|
|
205
|
-
"semver": "^7.7.
|
|
207
|
+
"semver": "^7.7.1",
|
|
206
208
|
"simple-git": "^3.27.0",
|
|
207
209
|
"slice-ansi": "^7.1.0",
|
|
208
210
|
"stdout-update": "^4.0.1",
|
|
@@ -220,16 +222,16 @@
|
|
|
220
222
|
}
|
|
221
223
|
},
|
|
222
224
|
"optionalDependencies": {
|
|
223
|
-
"@node-llama-cpp/linux-arm64": "3.
|
|
224
|
-
"@node-llama-cpp/linux-armv7l": "3.
|
|
225
|
-
"@node-llama-cpp/linux-x64": "3.
|
|
226
|
-
"@node-llama-cpp/linux-x64-cuda": "3.
|
|
227
|
-
"@node-llama-cpp/linux-x64-vulkan": "3.
|
|
228
|
-
"@node-llama-cpp/mac-arm64-metal": "3.
|
|
229
|
-
"@node-llama-cpp/mac-x64": "3.
|
|
230
|
-
"@node-llama-cpp/win-arm64": "3.
|
|
231
|
-
"@node-llama-cpp/win-x64": "3.
|
|
232
|
-
"@node-llama-cpp/win-x64-cuda": "3.
|
|
233
|
-
"@node-llama-cpp/win-x64-vulkan": "3.
|
|
225
|
+
"@node-llama-cpp/linux-arm64": "3.8.0",
|
|
226
|
+
"@node-llama-cpp/linux-armv7l": "3.8.0",
|
|
227
|
+
"@node-llama-cpp/linux-x64": "3.8.0",
|
|
228
|
+
"@node-llama-cpp/linux-x64-cuda": "3.8.0",
|
|
229
|
+
"@node-llama-cpp/linux-x64-vulkan": "3.8.0",
|
|
230
|
+
"@node-llama-cpp/mac-arm64-metal": "3.8.0",
|
|
231
|
+
"@node-llama-cpp/mac-x64": "3.8.0",
|
|
232
|
+
"@node-llama-cpp/win-arm64": "3.8.0",
|
|
233
|
+
"@node-llama-cpp/win-x64": "3.8.0",
|
|
234
|
+
"@node-llama-cpp/win-x64-cuda": "3.8.0",
|
|
235
|
+
"@node-llama-cpp/win-x64-vulkan": "3.8.0"
|
|
234
236
|
}
|
|
235
237
|
}
|