node-llama-cpp 3.0.0-beta.36 → 3.0.0-beta.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/bins/linux-arm64/libllama.so +0 -0
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/bins/linux-armv7l/libllama.so +0 -0
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/bins/linux-x64/libllama.so +0 -0
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/bins/linux-x64-vulkan/libllama.so +0 -0
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/bins/mac-arm64-metal/ggml-common.h +7 -7
- package/bins/mac-arm64-metal/ggml-metal.metal +0 -1
- package/bins/mac-arm64-metal/libggml.dylib +0 -0
- package/bins/mac-arm64-metal/libllama.dylib +0 -0
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/bins/mac-x64/libllama.dylib +0 -0
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +1 -1
- package/bins/win-arm64/ggml.dll +0 -0
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-arm64/llama.dll +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +1 -1
- package/bins/win-x64/ggml.dll +0 -0
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64/llama.dll +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/bins/win-x64-vulkan/ggml.dll +0 -0
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/llama.dll +0 -0
- package/dist/bindings/AddonTypes.d.ts +1 -0
- package/dist/bindings/Llama.d.ts +2 -2
- package/dist/bindings/Llama.js +4 -1
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +5 -2
- package/dist/bindings/getLlama.js +2 -0
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +1 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +2 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +13 -4
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -1
- package/dist/chatWrappers/Llama3ChatWrapper.js +2 -2
- package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +9 -0
- package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +1 -0
- package/dist/cli/commands/ChatCommand.js +15 -6
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +1 -0
- package/dist/cli/commands/CompleteCommand.js +13 -4
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +1 -0
- package/dist/cli/commands/InfillCommand.js +13 -4
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +19 -6
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
- package/dist/cli/utils/interactivelyAskForModel.js +19 -9
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/printCommonInfoLines.js +4 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -1
- package/dist/cli/utils/resolveCommandGgufPath.js +3 -2
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +15 -4
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +14 -0
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +22 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +49 -4
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.d.ts +3 -1
- package/dist/gguf/insights/GgufInsights.js +18 -2
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +6 -3
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +8 -5
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +3 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +16 -10
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +15 -10
- package/dist/gguf/types/GgufMetadataTypes.js +15 -2
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js.map +1 -1
- package/llama/addon.cpp +13 -26
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +3 -3
|
@@ -4,7 +4,7 @@ import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../..
|
|
|
4
4
|
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
|
5
5
|
import { scoreLevels } from "./scoreLevels.js";
|
|
6
6
|
const fitContextExtraMemoryPaddingPercentage = 0.5;
|
|
7
|
-
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }) {
|
|
7
|
+
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention }) {
|
|
8
8
|
if (gpuLayers == null)
|
|
9
9
|
gpuLayers = "auto";
|
|
10
10
|
if (!llamaSupportsGpuOffloading)
|
|
@@ -19,7 +19,8 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
19
19
|
const maxLayersRequirements = getVramRequiredForGpuLayers({
|
|
20
20
|
gpuLayers: resolvedGpuLayers,
|
|
21
21
|
ggufInsights,
|
|
22
|
-
currentVram: vramState.free
|
|
22
|
+
currentVram: vramState.free,
|
|
23
|
+
defaultContextFlashAttention
|
|
23
24
|
});
|
|
24
25
|
if (maxLayersRequirements == null)
|
|
25
26
|
throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
|
|
@@ -48,7 +49,8 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
48
49
|
: undefined,
|
|
49
50
|
maxGpuLayers: typeof gpuLayers === "object"
|
|
50
51
|
? gpuLayers.max
|
|
51
|
-
: undefined
|
|
52
|
+
: undefined,
|
|
53
|
+
defaultContextFlashAttention
|
|
52
54
|
});
|
|
53
55
|
const hasGpuLayersRequirements = typeof gpuLayers === "object" &&
|
|
54
56
|
(gpuLayers.min != null || gpuLayers.max != null || gpuLayers.fitContext?.contextSize != null);
|
|
@@ -58,7 +60,7 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
58
60
|
}
|
|
59
61
|
throw new Error(`Invalid gpuLayers value: ${gpuLayers}`);
|
|
60
62
|
}
|
|
61
|
-
function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers }) {
|
|
63
|
+
function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention }) {
|
|
62
64
|
return findBestOption({
|
|
63
65
|
*generator() {
|
|
64
66
|
const minLayers = Math.floor(Math.max(0, minGpuLayers ?? 0));
|
|
@@ -74,7 +76,8 @@ function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGp
|
|
|
74
76
|
gpuLayers: option.gpuLayers,
|
|
75
77
|
ggufInsights,
|
|
76
78
|
currentVram: freeVram,
|
|
77
|
-
fitContext
|
|
79
|
+
fitContext,
|
|
80
|
+
defaultContextFlashAttention
|
|
78
81
|
});
|
|
79
82
|
if (layersRequirements == null)
|
|
80
83
|
return null;
|
|
@@ -121,7 +124,7 @@ function scoreGpuLayersAndContextCombination({ gpuLayers, contextSize }, { total
|
|
|
121
124
|
}
|
|
122
125
|
return scoreGpuLayers() + scoreContextSize();
|
|
123
126
|
}
|
|
124
|
-
function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext }) {
|
|
127
|
+
function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false }) {
|
|
125
128
|
const modelVram = ggufInsights.estimateModelResourceRequirements({ gpuLayers }).gpuVram;
|
|
126
129
|
if (modelVram > currentVram)
|
|
127
130
|
return null;
|
|
@@ -131,7 +134,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
131
134
|
batchSize: getDefaultContextBatchSize({ contextSize: fitContext.contextSize, sequences: 1 }),
|
|
132
135
|
modelGpuLayers: gpuLayers,
|
|
133
136
|
sequences: 1,
|
|
134
|
-
isEmbeddingContext: fitContext.embeddingContext ?? false
|
|
137
|
+
isEmbeddingContext: fitContext.embeddingContext ?? false,
|
|
138
|
+
flashAttention: defaultContextFlashAttention
|
|
135
139
|
}).gpuVram;
|
|
136
140
|
const totalVram = modelVram + contextVram;
|
|
137
141
|
if (totalVram > currentVram)
|
|
@@ -146,7 +150,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
146
150
|
gpuLayers,
|
|
147
151
|
ggufInsights,
|
|
148
152
|
vram: currentVram - modelVram,
|
|
149
|
-
isEmbeddingContext: fitContext?.embeddingContext ?? false
|
|
153
|
+
isEmbeddingContext: fitContext?.embeddingContext ?? false,
|
|
154
|
+
flashAttention: defaultContextFlashAttention
|
|
150
155
|
});
|
|
151
156
|
if (maxContext == null || modelVram + maxContext.vram > currentVram)
|
|
152
157
|
return null;
|
|
@@ -156,7 +161,7 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
156
161
|
totalVram: modelVram + maxContext.vram
|
|
157
162
|
};
|
|
158
163
|
}
|
|
159
|
-
function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext }) {
|
|
164
|
+
function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention }) {
|
|
160
165
|
const maxContextSize = getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize });
|
|
161
166
|
for (let contextSize = maxContextSize; contextSize >= minAllowedContextSizeInCalculations; contextSize--) {
|
|
162
167
|
const contextVram = ggufInsights.estimateContextResourceRequirements({
|
|
@@ -164,7 +169,8 @@ function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEm
|
|
|
164
169
|
batchSize: getDefaultContextBatchSize({ contextSize, sequences: 1 }),
|
|
165
170
|
modelGpuLayers: gpuLayers,
|
|
166
171
|
sequences: 1,
|
|
167
|
-
isEmbeddingContext
|
|
172
|
+
isEmbeddingContext,
|
|
173
|
+
flashAttention
|
|
168
174
|
}).gpuVram;
|
|
169
175
|
if (contextVram <= vram)
|
|
170
176
|
return {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,
|
|
1
|
+
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,EAAE,4BAA4B,EAKrE;IACG,IAAI,SAAS,IAAI,IAAI;QACjB,SAAS,GAAG,MAAM,CAAC;IAEvB,IAAI,CAAC,0BAA0B;QAC3B,OAAO,CAAC,CAAC;IAEb,IAAI,SAAS,KAAK,KAAK,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvD,MAAM,iBAAiB,GAAG,OAAO,SAAS,KAAK,QAAQ;YACnD,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YAC5D,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC;QAE/B,IAAI,wBAAwB;YACxB,OAAO,iBAAiB,CAAC;QAE7B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,qBAAqB,GAAG,2BAA2B,CAAC;YACtD,SAAS,EAAE,iBAAiB;YAC5B,YAAY;YACZ,WAAW,EAAE,SAAS,CAAC,IAAI;YAC3B,4BAA4B;SAC/B,CAAC,CAAC;QAEH,IAAI,qBAAqB,IAAI,IAAI;YAC7B,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC/D,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,CAAC,CAAC;QAEb,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,CAAC,CAAC;QAEb,IAAI,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAC9B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7E,QAAQ,IAAI,oBAAoB,GAAG,sCAAsC,CAAC;YAE1E,IAAI,QAAQ,GAAG,CAAC;gBACZ,QAAQ,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,MAAM,mBAAmB,GAAG,2BAA2B,CAAC;YACpD,YAAY;YACZ,QAAQ;YACR,UAAU,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACrC,CAAC,CAAC,SAAS,CAAC,UAAU;gBACtB,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,4BAA4B;SAC/B,CAAC,CAAC;QAEH,MAAM,wBAAwB,GAAG,OAAO,SAAS,KAAK,QAAQ;YAC1D,CAAC,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,CAAC,CAAC;QAElG,IAAI,CAAC,wBAAwB,IAAI,mBAAmB,IAAI,IAAI,IAAI,wBAAwB;YACpF,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,mBAAmB,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,4BAA4B,EAQ/B;IACG,OAAO,cAAc,CAAC;QAClB,CAAC,SAAS;YACN,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAE3G,KAAK,IAAI,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;gBACzD,MAAM;oBACF,SAAS,EAAE,MAAM;iBACpB,CAAC;YACN,CAAC;QACL,CAAC;QACD,KAAK,CAAC,MAAM;YACR,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;gBACnD,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,YAAY;gBACZ,WAAW,EAAE,QAAQ;gBACrB,UAAU;gBACV,4BAA4B;aAC/B,CAAC,CAAC;YAEH,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,OAAO,IAAI,CAAC;YAEhB,OAAO,mCAAmC,CAAC,EAAC,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,WAAW,EAAE,kBAAkB,CAAC,WAAW,EAAC,EAAE;gBACnH,cAAc,EAAE,YAAY,CAAC,WAAW;gBACxC,gBAAgB,EAAE,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC;aAClG,CAAC,CAAC;QACP,CAAC;KACJ,CAAC,EAAE,SAAS,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED,SAAS,mCAAmC,CAAC,EAAC,SAAS,EAAE,WAAW,EAA2C,EAAE,EAC7G,cAAc,EAAE,gBAAgB,EAGnC;IACG,SAAS,cAAc;QACnB,OAAO,WAAW,CAAC,SAAS,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,EAAE;aACb,EAAE;gBACC,KAAK,EAAE,cAAc;gBACrB,MAAM,EAAE,EAAE;gBACV,GAAG,EAAE,cAAc;aACtB,CAAC,CAAC,CAAC;IACR,CAAC;IAED,SAAS,gBAAgB;QACrB,MAAM,mBAAmB,GAAG,SAAS,GAAG,cAAc,CAAC;QAEvD,OAAO,WAAW,CAAC,WAAW,EAAE,CAAC;gBAC7B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC;aACzC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,OAAO,cAAc,EAAE,GAAG,gBAAgB,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,4BAA4B,GAAG,KAAK,EAIzF;IACG,MAAM,SAAS,GAAG,YAAY,CAAC,iCAAiC,CAAC,EAAC,SAAS,EAAC,CAAC,CAAC,OAAO,CAAC;IAEtF,IAAI,SAAS,GAAG,WAAW;QACvB,OAAO,IAAI,CAAC;IAEhB,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAC1F,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,UAAU,CAAC,gBAAgB,IAAI,KAAK;YACxD,cAAc,EAAE,4BAA4B;SAC/C,CAAC,CAAC,OAAO,CAAC;QAEX,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,CAAC;QAC1C,IAAI,SAAS,GAAG,WAAW;YACvB,OAAO,IAAI,CAAC;QAEhB,OAAO;YACH,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,WAAW;YACX,SAAS;SACZ,CAAC;IACN,CAAC;IAED,MAAM,UAAU,GAAG,iCAAiC,CAAC;QACjD,SAAS;QACT,YAAY;QACZ,IAAI,EAAE,WAAW,GAAG,SAAS;QAC7B,kBAAkB,EAAE,UAAU,EAAE,gBAAgB,IAAI,KAAK;QACzD,cAAc,EAAE,4BAA4B;KAC/C,CAAC,CAAC;IAEH,IAAI,UAAU,IAAI,IAAI,IAAI,SAAS,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW;QAC/D,OAAO,IAAI,CAAC;IAEhB,OAAO;QACH,WAAW,EAAE,UAAU,CAAC,WAAW;QACnC,WAAW,EAAE,UAAU,CAAC,IAAI;QAC5B,SAAS,EAAE,SAAS,GAAG,UAAU,CAAC,IAAI;KACzC,CAAC;AACN,CAAC;AAED,SAAS,iCAAiC,CAAC,EAAC,SAAS,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAAE,cAAc,EAE5G;IACG,MAAM,cAAc,GAAG,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC,CAAC;IAErG,KAAK,IAAI,WAAW,GAAG,cAAc,EAAE,WAAW,IAAI,mCAAmC,EAAE,WAAW,EAAE,EAAE,CAAC;QACvG,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW;YACX,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAClE,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB;YAClB,cAAc;SACjB,CAAC,CAAC,OAAO,CAAC;QAEX,IAAI,WAAW,IAAI,IAAI;YACnB,OAAO;gBACH,WAAW;gBACX,IAAI,EAAE,WAAW;aACpB,CAAC;IACV,CAAC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
|
|
@@ -1,31 +1,44 @@
|
|
|
1
1
|
export declare const enum GgufArchitectureType {
|
|
2
2
|
llama = "llama",
|
|
3
3
|
falcon = "falcon",
|
|
4
|
+
grok = "grok",
|
|
4
5
|
gpt2 = "gpt2",
|
|
5
6
|
gptj = "gptj",
|
|
6
7
|
gptneox = "gptneox",
|
|
7
8
|
mpt = "mpt",
|
|
8
9
|
baichuan = "baichuan",
|
|
9
10
|
starcoder = "starcoder",
|
|
10
|
-
persimmon = "persimmon",
|
|
11
11
|
refact = "refact",
|
|
12
12
|
bert = "bert",
|
|
13
13
|
nomicBert = "nomic-bert",
|
|
14
|
+
jinaBertV2 = "jina-bert-v2",
|
|
14
15
|
bloom = "bloom",
|
|
15
16
|
stablelm = "stablelm",
|
|
16
17
|
qwen = "qwen",
|
|
17
18
|
qwen2 = "qwen2",
|
|
19
|
+
qwen2moe = "qwen2moe",
|
|
18
20
|
phi2 = "phi2",
|
|
21
|
+
phi3 = "phi3",
|
|
19
22
|
plamo = "plamo",
|
|
20
23
|
codeshell = "codeshell",
|
|
21
24
|
orion = "orion",
|
|
22
25
|
internlm2 = "internlm2",
|
|
23
26
|
minicpm = "minicpm",
|
|
24
27
|
gemma = "gemma",
|
|
28
|
+
gemma2 = "gemma2",
|
|
25
29
|
starcoder2 = "starcoder2",
|
|
26
30
|
mamba = "mamba",
|
|
31
|
+
xverse = "xverse",
|
|
27
32
|
commandR = "command-r",
|
|
28
|
-
|
|
33
|
+
dbrx = "dbrx",
|
|
34
|
+
olmo = "olmo",
|
|
35
|
+
openelm = "openelm",
|
|
36
|
+
arctic = "arctic",
|
|
37
|
+
deepseek2 = "deepseek2",
|
|
38
|
+
bitnet = "bitnet",
|
|
39
|
+
t5 = "t5",
|
|
40
|
+
jais = "jais",
|
|
41
|
+
unknown = "(unknown)"
|
|
29
42
|
}
|
|
30
43
|
export type GgufMetadata<A extends GgufArchitectureType = GgufArchitectureType> = {
|
|
31
44
|
readonly general: GgufMetadataGeneral<A>;
|
|
@@ -44,7 +57,6 @@ export type GgufMetadataLlmToType = {
|
|
|
44
57
|
[GgufArchitectureType.bloom]: GgufMetadataBloom;
|
|
45
58
|
[GgufArchitectureType.falcon]: GgufMetadataFalcon;
|
|
46
59
|
[GgufArchitectureType.mamba]: GgufMetadataMamba;
|
|
47
|
-
[GgufArchitectureType.rwkv]: GgufMetadataRWKV;
|
|
48
60
|
};
|
|
49
61
|
export declare enum GgufFileType {
|
|
50
62
|
ALL_F32 = 0,
|
|
@@ -325,11 +337,4 @@ export type GgufMetadataMamba = {
|
|
|
325
337
|
readonly layer_norm_rms_epsilon: number;
|
|
326
338
|
};
|
|
327
339
|
};
|
|
328
|
-
export type GgufMetadataRWKV = {
|
|
329
|
-
readonly architecture_version: 4 | number;
|
|
330
|
-
readonly context_length: number;
|
|
331
|
-
readonly block_count: number;
|
|
332
|
-
readonly embedding_length: number;
|
|
333
|
-
readonly feed_forward_length: number;
|
|
334
|
-
};
|
|
335
340
|
export declare function isGgufMetadataOfArchitectureType<A extends GgufArchitectureType>(metadata: GgufMetadata, type: A): metadata is GgufMetadata<A>;
|
|
@@ -2,31 +2,44 @@ export var GgufArchitectureType;
|
|
|
2
2
|
(function (GgufArchitectureType) {
|
|
3
3
|
GgufArchitectureType["llama"] = "llama";
|
|
4
4
|
GgufArchitectureType["falcon"] = "falcon";
|
|
5
|
+
GgufArchitectureType["grok"] = "grok";
|
|
5
6
|
GgufArchitectureType["gpt2"] = "gpt2";
|
|
6
7
|
GgufArchitectureType["gptj"] = "gptj";
|
|
7
8
|
GgufArchitectureType["gptneox"] = "gptneox";
|
|
8
9
|
GgufArchitectureType["mpt"] = "mpt";
|
|
9
10
|
GgufArchitectureType["baichuan"] = "baichuan";
|
|
10
11
|
GgufArchitectureType["starcoder"] = "starcoder";
|
|
11
|
-
GgufArchitectureType["persimmon"] = "persimmon";
|
|
12
12
|
GgufArchitectureType["refact"] = "refact";
|
|
13
13
|
GgufArchitectureType["bert"] = "bert";
|
|
14
14
|
GgufArchitectureType["nomicBert"] = "nomic-bert";
|
|
15
|
+
GgufArchitectureType["jinaBertV2"] = "jina-bert-v2";
|
|
15
16
|
GgufArchitectureType["bloom"] = "bloom";
|
|
16
17
|
GgufArchitectureType["stablelm"] = "stablelm";
|
|
17
18
|
GgufArchitectureType["qwen"] = "qwen";
|
|
18
19
|
GgufArchitectureType["qwen2"] = "qwen2";
|
|
20
|
+
GgufArchitectureType["qwen2moe"] = "qwen2moe";
|
|
19
21
|
GgufArchitectureType["phi2"] = "phi2";
|
|
22
|
+
GgufArchitectureType["phi3"] = "phi3";
|
|
20
23
|
GgufArchitectureType["plamo"] = "plamo";
|
|
21
24
|
GgufArchitectureType["codeshell"] = "codeshell";
|
|
22
25
|
GgufArchitectureType["orion"] = "orion";
|
|
23
26
|
GgufArchitectureType["internlm2"] = "internlm2";
|
|
24
27
|
GgufArchitectureType["minicpm"] = "minicpm";
|
|
25
28
|
GgufArchitectureType["gemma"] = "gemma";
|
|
29
|
+
GgufArchitectureType["gemma2"] = "gemma2";
|
|
26
30
|
GgufArchitectureType["starcoder2"] = "starcoder2";
|
|
27
31
|
GgufArchitectureType["mamba"] = "mamba";
|
|
32
|
+
GgufArchitectureType["xverse"] = "xverse";
|
|
28
33
|
GgufArchitectureType["commandR"] = "command-r";
|
|
29
|
-
GgufArchitectureType["
|
|
34
|
+
GgufArchitectureType["dbrx"] = "dbrx";
|
|
35
|
+
GgufArchitectureType["olmo"] = "olmo";
|
|
36
|
+
GgufArchitectureType["openelm"] = "openelm";
|
|
37
|
+
GgufArchitectureType["arctic"] = "arctic";
|
|
38
|
+
GgufArchitectureType["deepseek2"] = "deepseek2";
|
|
39
|
+
GgufArchitectureType["bitnet"] = "bitnet";
|
|
40
|
+
GgufArchitectureType["t5"] = "t5";
|
|
41
|
+
GgufArchitectureType["jais"] = "jais";
|
|
42
|
+
GgufArchitectureType["unknown"] = "(unknown)";
|
|
30
43
|
})(GgufArchitectureType || (GgufArchitectureType = {}));
|
|
31
44
|
// source: `enum llama_ftype` in `llama.h` in the `llama.cpp` source code
|
|
32
45
|
export var GgufFileType;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,
|
|
1
|
+
{"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,oBAyCjB;AAzCD,WAAkB,oBAAoB;IAClC,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,mCAAW,CAAA;IACX,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,gDAAwB,CAAA;IACxB,mDAA2B,CAAA;IAC3B,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,8CAAsB,CAAA;IACtB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,yCAAiB,CAAA;IACjB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,iCAAS,CAAA;IACT,qCAAa,CAAA;IACb,6CAAqB,CAAA;AACzB,CAAC,EAzCiB,oBAAoB,KAApB,oBAAoB,QAyCrC;AA8BD,yEAAyE;AACzE,MAAM,CAAN,IAAY,YAgCX;AAhCD,WAAY,YAAY;IACpB,qDAAW,CAAA;IACX,2DAAc,CAAA;IACd,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,+EAAwB,CAAA;IACxB,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,8DAAgB,CAAA;IAChB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,8DAAgB,CAAA;IAChB,oEAAmB,CAAA;IACnB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,oEAAmB,CAAA;IACnB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;AACtB,CAAC,EAhCW,YAAY,KAAZ,YAAY,QAgCvB;AAyED,MAAM,CAAN,IAAkB,8BAQjB;AARD,WAAkB,8BAA8B;IAC5C,6FAAa,CAAA;IACb,uFAAU,CAAA;IACV,yFAAW,CAAA;IACX,yFAAW,CAAA;IACX,iGAAe,CAAA;IACf,uFAAU,CAAA;IACV,mFAAQ,CAAA;AACZ,CAAC,EARiB,8BAA8B,KAA9B,8BAA8B,QAQ/C;AAiCD,MAAM,CAAN,IAAkB,mCAKjB;AALD,WAAkB,mCAAmC;IACjD,4GAAgB,CAAA;IAChB,6FAAQ,CAAA;IACR,6FAAQ,CAAA;IACR,2FAAO,CAAA;AACX,CAAC,EALiB,mCAAmC,KAAnC,mCAAmC,QAKpD;AA8MD,MAAM,UAAU,gCAAgC,CAC5C,QAAsB,EAAE,IAAO;IAE/B,OAAO,QAAQ,EAAE,OAAO,EAAE,YAAY,KAAK,IAAI,CAAC;AACpD,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { DisposedError } from "lifecycle-utils";
|
|
|
2
2
|
import { Llama } from "./bindings/Llama.js";
|
|
3
3
|
import { getLlama, type LlamaOptions, type LastBuildOptions } from "./bindings/getLlama.js";
|
|
4
4
|
import { NoBinaryFoundError } from "./bindings/utils/NoBinaryFoundError.js";
|
|
5
|
-
import { LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js";
|
|
5
|
+
import { type LlamaGpuType, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js";
|
|
6
6
|
import { LlamaModel, LlamaModelInfillTokens, type LlamaModelOptions, LlamaModelTokens } from "./evaluator/LlamaModel/LlamaModel.js";
|
|
7
7
|
import { TokenAttributes } from "./evaluator/LlamaModel/utils/TokenAttributes.js";
|
|
8
8
|
import { LlamaGrammar, type LlamaGrammarOptions } from "./evaluator/LlamaGrammar.js";
|
|
@@ -46,6 +46,6 @@ import { createModelDownloader, ModelDownloader, type ModelDownloaderOptions } f
|
|
|
46
46
|
import { type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type LLamaContextualRepeatPenalty, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState } from "./types.js";
|
|
47
47
|
import { type GbnfJsonArraySchema, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonObjectSchema, type GbnfJsonOneOfSchema, type GbnfJsonSchema, type GbnfJsonSchemaImmutableType, type GbnfJsonSchemaToType } from "./utils/gbnfJson/types.js";
|
|
48
48
|
import { type GgufFileInfo } from "./gguf/types/GgufFileInfoTypes.js";
|
|
49
|
-
import { type GgufMetadata, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba,
|
|
49
|
+
import { type GgufMetadata, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, isGgufMetadataOfArchitectureType } from "./gguf/types/GgufMetadataTypes.js";
|
|
50
50
|
import { GgmlType, type GgufTensorInfo } from "./gguf/types/GgufTensorInfoTypes.js";
|
|
51
|
-
export { Llama, getLlama, type LlamaOptions, type LastBuildOptions, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, TokenAttributes, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizationStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, TokenBias, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, type LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LLamaChatCompletePromptOptions, type LlamaChatSessionRepeatPenalty, type LLamaChatPreloadPromptOptions, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatLoadAndCompleteUserMessageOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, type LlamaChatLoadAndCompleteUserResponse, LlamaChatSessionPromptCompletionEngine, type LLamaChatPromptCompletionEngineOptions, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, type LlamaCompletionResponse, TokenMeter, type TokenMeterState, UnsupportedError, InsufficientMemoryError, DisposedError, ChatWrapper, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, EmptyChatWrapper, Llama3ChatWrapper, Llama2ChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, type TemplateChatWrapperOptions, JinjaTemplateChatWrapper, type JinjaTemplateChatWrapperOptions, type JinjaTemplateChatWrapperOptionsConvertMessageFormat, type ChatHistoryFunctionCallMessageTemplate, resolveChatWrapper, type ResolveChatWrapperOptions, resolvableChatWrapperTypeNames, type ResolvableChatWrapperTypeName, specializedChatWrapperTypeNames, type SpecializedChatWrapperTypeName, templateChatWrapperTypeNames, type TemplateChatWrapperTypeName, ChatModelFunctionsDocumentationGenerator, LlamaText, SpecialTokensText, SpecialToken, isLlamaText, tokenizeText, type LlamaTextValue, type LlamaTextInputValue, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokensTextJSON, type LlamaTextSpecialTokenJSON, type BuiltinSpecialTokenValue, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema, LlamaVocabularyType, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, readGgufFileInfo, type GgufFileInfo, type GgufMetadata, type GgufTensorInfo, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba,
|
|
51
|
+
export { Llama, getLlama, type LlamaOptions, type LastBuildOptions, type LlamaGpuType, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, TokenAttributes, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizationStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, TokenBias, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, type LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LLamaChatCompletePromptOptions, type LlamaChatSessionRepeatPenalty, type LLamaChatPreloadPromptOptions, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatLoadAndCompleteUserMessageOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, type LlamaChatLoadAndCompleteUserResponse, LlamaChatSessionPromptCompletionEngine, type LLamaChatPromptCompletionEngineOptions, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, type LlamaCompletionResponse, TokenMeter, type TokenMeterState, UnsupportedError, InsufficientMemoryError, DisposedError, ChatWrapper, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, EmptyChatWrapper, Llama3ChatWrapper, Llama2ChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, type TemplateChatWrapperOptions, JinjaTemplateChatWrapper, type JinjaTemplateChatWrapperOptions, type JinjaTemplateChatWrapperOptionsConvertMessageFormat, type ChatHistoryFunctionCallMessageTemplate, resolveChatWrapper, type ResolveChatWrapperOptions, resolvableChatWrapperTypeNames, type ResolvableChatWrapperTypeName, specializedChatWrapperTypeNames, type SpecializedChatWrapperTypeName, templateChatWrapperTypeNames, type TemplateChatWrapperTypeName, ChatModelFunctionsDocumentationGenerator, LlamaText, SpecialTokensText, SpecialToken, isLlamaText, tokenizeText, type LlamaTextValue, type LlamaTextInputValue, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokensTextJSON, type LlamaTextSpecialTokenJSON, type BuiltinSpecialTokenValue, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema, LlamaVocabularyType, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, readGgufFileInfo, type GgufFileInfo, type GgufMetadata, type GgufTensorInfo, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, GgmlType, isGgufMetadataOfArchitectureType, GgufInsights, type GgufInsightsResourceRequirements, GgufInsightsConfigurationResolver, createModelDownloader, ModelDownloader, type ModelDownloaderOptions };
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAC,KAAK,EAAC,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAC,QAAQ,EAA2C,MAAM,wBAAwB,CAAC;AAC1F,OAAO,EAAC,kBAAkB,EAAC,MAAM,wCAAwC,CAAC;AAC1E,OAAO,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAC,KAAK,EAAC,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAC,QAAQ,EAA2C,MAAM,wBAAwB,CAAC;AAC1F,OAAO,EAAC,kBAAkB,EAAC,MAAM,wCAAwC,CAAC;AAC1E,OAAO,EACgB,aAAa,EAAE,wBAAwB,EAAE,+BAA+B,EAAE,mBAAmB,EACnH,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAC,UAAU,EAAE,sBAAsB,EAA0B,gBAAgB,EAAC,MAAM,sCAAsC,CAAC;AAClI,OAAO,EAAC,eAAe,EAAC,MAAM,iDAAiD,CAAC;AAChF,OAAO,EAAC,YAAY,EAA2B,MAAM,6BAA6B,CAAC;AACnF,OAAO,EAAC,sBAAsB,EAAC,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAC,8BAA8B,EAAC,MAAM,2DAA2D,CAAC;AACzG,OAAO,EAAC,2BAA2B,EAAqC,MAAM,4CAA4C,CAAC;AAC3H,OAAO,EAAC,YAAY,EAAE,oBAAoB,EAAC,MAAM,0CAA0C,CAAC;AAC5F,OAAO,EAAC,qBAAqB,EAAyD,MAAM,sCAAsC,CAAC;AAMnI,OAAO,EAAC,SAAS,EAAC,MAAM,0BAA0B,CAAC;AACnD,OAAO,EACH,gBAAgB,EAEnB,MAAM,kDAAkD,CAAC;AAC1D,OAAO,EAAC,yBAAyB,EAAC,MAAM,iEAAiE,CAAC;AAC1G,OAAO,EACH,SAAS,EAEZ,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EACH,sCAAsC,EACzC,MAAM,8EAA8E,CAAC;AACtF,OAAO,EACH,eAAe,EAElB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAC,UAAU,EAAuB,MAAM,2BAA2B,CAAC;AAC3E,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,uBAAuB,EAAC,MAAM,oCAAoC,CAAC;AAC3E,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,kBAAkB,EAAC,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,sBAAsB,EAAC,MAAM,0CAA0C,CAAC;AAChF,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,mBAAmB,EAAkC,MAAM,+CAA+C,CAAC;AACnH,OAAO,EACH,wBAAwB,EAC3B,MAAM,oDAAoD,CAAC;AAE5D,OAAO,EACH,8BAA8B,EAAsC,+BAA+B,EAC9D,4BAA4B,EAAoC,kBAAkB,EAE1H,MAAM,4CAA4C,CAAC;AACpD,OAAO,EAAC,wCAAwC,EAAC,MAAM,kEAAkE,CAAC;AAC1H,OAAO,EACH,SAAS,EAAE,iBAAiB,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAGxE,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAC,8BAA8B,EAAC,MAAM,2CAA2C,CAAC;AACzF,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,gBAAgB,EAAC,MAAM,4BAA4B,CAAC;AAC5D,OAAO,EAAC,YAAY,EAAwC,MAAM,iCAAiC,CAAC;AACpG,OAAO,EAAC,iCAAiC,EAAC,MAAM,sDAAsD,CAAC;AACvG,OAAO,EAAC,qBAAqB,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;AAErH,OAAO,EAG2C,+BAA+B,EAEhF,MAAM,YAAY,CAAC;AAMpB,OAAO,EAC4C,oBAAoB,EAAE,YAAY,EAAE,8BAA8B,EACjH,mCAAmC,EAEsC,gCAAgC,EAC5G,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAC,QAAQ,EAAsB,MAAM,qCAAqC,CAAC;AAGlF,OAAO,EACH,KAAK,EACL,QAAQ,EAIR,aAAa,EACb,kBAAkB,EAClB,UAAU,EACV,gBAAgB,EAChB,sBAAsB,EACtB,eAAe,EAEf,YAAY,EAEZ,sBAAsB,EACtB,8BAA8B,EAC9B,2BAA2B,EAE3B,YAAY,EACZ,oBAAoB,EAWpB,SAAS,EACT,qBAAqB,EAGrB,gBAAgB,EAChB,yBAAyB,EAOzB,SAAS,EAST,sCAAsC,EAEtC,eAAe,EAKf,UAAU,EAEV,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,WAAW,EAIX,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,iBAAiB,EACjB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EAChB,mBAAmB,EAEnB,wBAAwB,EAIxB,kBAAkB,EAElB,8BAA8B,EAE9B,+BAA+B,EAE/B,4BAA4B,EAE5B,wCAAwC,EACxC,SAAS,EACT,iBAAiB,EACjB,YAAY,EACZ,WAAW,EACX,YAAY,EAQZ,8BAA8B,EAC9B,gBAAgB,EAYhB,+BAA+B,EAU/B,mBAAmB,EACnB,wBAAwB,EACxB,+BAA+B,EAC/B,gBAAgB,EAKhB,oBAAoB,EACpB,YAAY,EACZ,8BAA8B,EAC9B,mCAAmC,EAYnC,QAAQ,EACR,gCAAgC,EAChC,YAAY,EAEZ,iCAAiC,EACjC,qBAAqB,EACrB,eAAe,EAElB,CAAC"}
|
package/llama/addon.cpp
CHANGED
|
@@ -108,20 +108,6 @@ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
|
|
|
108
108
|
}
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
-
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token, bool specialTokens) {
|
|
112
|
-
std::vector<char> result(8, 0);
|
|
113
|
-
const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
|
|
114
|
-
if (n_tokens < 0) {
|
|
115
|
-
result.resize(-n_tokens);
|
|
116
|
-
int check = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
|
|
117
|
-
GGML_ASSERT(check == -n_tokens);
|
|
118
|
-
} else {
|
|
119
|
-
result.resize(n_tokens);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return std::string(result.data(), result.size());
|
|
123
|
-
}
|
|
124
|
-
|
|
125
111
|
#ifdef GPU_INFO_USE_CUDA
|
|
126
112
|
void logCudaError(const char* message) {
|
|
127
113
|
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
|
|
@@ -395,21 +381,18 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
395
381
|
? info[1].As<Napi::Boolean>().Value()
|
|
396
382
|
: false;
|
|
397
383
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
// Decode each token and accumulate the result.
|
|
402
|
-
for (size_t i = 0; i < tokens.ElementLength(); i++) {
|
|
403
|
-
const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i], decodeSpecialTokens);
|
|
384
|
+
std::vector<char> result(8, 0);
|
|
385
|
+
const int n_length = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
|
|
404
386
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
387
|
+
if (n_length < 0) {
|
|
388
|
+
result.resize(-n_length);
|
|
389
|
+
int check = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
|
|
390
|
+
GGML_ASSERT(check == -n_length);
|
|
391
|
+
} else {
|
|
392
|
+
result.resize(n_length);
|
|
410
393
|
}
|
|
411
394
|
|
|
412
|
-
return Napi::String::New(info.Env(),
|
|
395
|
+
return Napi::String::New(info.Env(), result.data(), result.size());
|
|
413
396
|
}
|
|
414
397
|
|
|
415
398
|
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
|
|
@@ -987,6 +970,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
987
970
|
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
988
971
|
}
|
|
989
972
|
|
|
973
|
+
if (options.Has("flashAttention")) {
|
|
974
|
+
context_params.flash_attn = options.Get("flashAttention").As<Napi::Boolean>().Value();
|
|
975
|
+
}
|
|
976
|
+
|
|
990
977
|
if (options.Has("threads")) {
|
|
991
978
|
const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
|
|
992
979
|
const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-llama-cpp",
|
|
3
|
-
"version": "3.0.0-beta.
|
|
3
|
+
"version": "3.0.0-beta.38",
|
|
4
4
|
"description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -193,7 +193,7 @@
|
|
|
193
193
|
}
|
|
194
194
|
},
|
|
195
195
|
"optionalDependencies": {
|
|
196
|
-
"@node-llama-cpp/linux-x64-cuda": "3.0.0-beta.
|
|
197
|
-
"@node-llama-cpp/win-x64-cuda": "3.0.0-beta.
|
|
196
|
+
"@node-llama-cpp/linux-x64-cuda": "3.0.0-beta.38",
|
|
197
|
+
"@node-llama-cpp/win-x64-cuda": "3.0.0-beta.38"
|
|
198
198
|
}
|
|
199
199
|
}
|