node-llama-cpp 3.0.0-beta.36 → 3.0.0-beta.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/bins/linux-arm64/libllama.so +0 -0
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/bins/linux-armv7l/libllama.so +0 -0
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/bins/linux-x64/libllama.so +0 -0
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/bins/linux-x64-vulkan/libllama.so +0 -0
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/bins/mac-arm64-metal/ggml-common.h +7 -7
- package/bins/mac-arm64-metal/ggml-metal.metal +0 -1
- package/bins/mac-arm64-metal/libggml.dylib +0 -0
- package/bins/mac-arm64-metal/libllama.dylib +0 -0
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/bins/mac-x64/libllama.dylib +0 -0
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +1 -1
- package/bins/win-arm64/ggml.dll +0 -0
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-arm64/llama.dll +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +1 -1
- package/bins/win-x64/ggml.dll +0 -0
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64/llama.dll +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/bins/win-x64-vulkan/ggml.dll +0 -0
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/llama.dll +0 -0
- package/dist/bindings/AddonTypes.d.ts +1 -0
- package/dist/bindings/Llama.d.ts +2 -2
- package/dist/bindings/Llama.js +4 -1
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.d.ts +5 -2
- package/dist/bindings/getLlama.js +2 -0
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +1 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +2 -1
- package/dist/bindings/utils/getGpuTypesToUseForOption.js +13 -4
- package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +1 -0
- package/dist/cli/commands/ChatCommand.js +12 -5
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +1 -0
- package/dist/cli/commands/CompleteCommand.js +10 -3
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +1 -0
- package/dist/cli/commands/InfillCommand.js +10 -3
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +19 -6
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/utils/printCommonInfoLines.js +4 -0
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +1 -0
- package/dist/evaluator/LlamaContext/LlamaContext.js +15 -4
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +14 -0
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +22 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +49 -4
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.d.ts +3 -1
- package/dist/gguf/insights/GgufInsights.js +18 -2
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +6 -3
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +8 -5
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +3 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +16 -10
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +15 -10
- package/dist/gguf/types/GgufMetadataTypes.js +15 -2
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js.map +1 -1
- package/llama/addon.cpp +4 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +3 -3
|
@@ -24,7 +24,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
24
24
|
*
|
|
25
25
|
* `contextSize` defaults to `4096` (if the model train context size is lower than this, the model train context size is used instead).
|
|
26
26
|
*/
|
|
27
|
-
async scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => ({ total: os.totalmem(), free: os.freemem() })), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
27
|
+
async scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false, flashAttention = false } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => ({ total: os.totalmem(), free: os.freemem() })), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
28
28
|
const defaultTrainContextSize = 4096;
|
|
29
29
|
const [vramState, ramState] = await Promise.all([
|
|
30
30
|
getVramState(),
|
|
@@ -53,7 +53,8 @@ export class GgufInsightsConfigurationResolver {
|
|
|
53
53
|
const estimatedContextResourceUsage = this._ggufInsights.estimateContextResourceRequirements({
|
|
54
54
|
contextSize: resolvedContextSize,
|
|
55
55
|
isEmbeddingContext: embeddingContext,
|
|
56
|
-
modelGpuLayers: resolvedGpuLayers
|
|
56
|
+
modelGpuLayers: resolvedGpuLayers,
|
|
57
|
+
flashAttention
|
|
57
58
|
});
|
|
58
59
|
const rankPoints = {
|
|
59
60
|
gpuLayers: 60,
|
|
@@ -94,17 +95,18 @@ export class GgufInsightsConfigurationResolver {
|
|
|
94
95
|
}
|
|
95
96
|
};
|
|
96
97
|
}
|
|
97
|
-
async resolveModelGpuLayers(gpuLayers, { ignoreMemorySafetyChecks = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
98
|
+
async resolveModelGpuLayers(gpuLayers, { ignoreMemorySafetyChecks = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading, defaultContextFlashAttention = false } = {}) {
|
|
98
99
|
return resolveModelGpuLayersOption(gpuLayers, {
|
|
99
100
|
ggufInsights: this._ggufInsights,
|
|
100
101
|
ignoreMemorySafetyChecks,
|
|
101
102
|
getVramState,
|
|
102
103
|
llamaVramPaddingSize,
|
|
103
104
|
llamaGpu,
|
|
104
|
-
llamaSupportsGpuOffloading
|
|
105
|
+
llamaSupportsGpuOffloading,
|
|
106
|
+
defaultContextFlashAttention
|
|
105
107
|
});
|
|
106
108
|
}
|
|
107
|
-
async resolveContextContextSize(contextSize, { modelGpuLayers, batchSize, modelTrainContextSize, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaGpu = this._ggufInsights._llama.gpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, sequences = getDefaultContextSequences() }) {
|
|
109
|
+
async resolveContextContextSize(contextSize, { modelGpuLayers, batchSize, modelTrainContextSize, flashAttention = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaGpu = this._ggufInsights._llama.gpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, sequences = getDefaultContextSequences() }) {
|
|
108
110
|
return await resolveContextContextSizeOption({
|
|
109
111
|
contextSize,
|
|
110
112
|
batchSize,
|
|
@@ -112,6 +114,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
112
114
|
modelFileInsights: this._ggufInsights,
|
|
113
115
|
modelGpuLayers,
|
|
114
116
|
modelTrainContextSize,
|
|
117
|
+
flashAttention,
|
|
115
118
|
getVramState,
|
|
116
119
|
llamaGpu,
|
|
117
120
|
ignoreMemorySafetyChecks,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GgufInsightsConfigurationResolver.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsConfigurationResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAIpB,OAAO,EAAC,0BAA0B,EAAC,MAAM,8CAA8C,CAAC;AACxF,OAAO,EAAC,2BAA2B,EAAC,MAAM,wCAAwC,CAAC;AACnF,OAAO,EAAC,+BAA+B,EAAC,MAAM,4CAA4C,CAAC;AAC3F,OAAO,EAAC,WAAW,EAAC,MAAM,wBAAwB,CAAC;AAInD,MAAM,OAAO,iCAAiC;IAC1C,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,YAAY;QACnB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED;;;;;;;;;;;;OAYG;IACI,KAAK,CAAC,oCAAoC,CAAC,EAC9C,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB,IAAI,IAAI,CAAC,EACzE,gBAAgB,GAAG,KAAK,
|
|
1
|
+
{"version":3,"file":"GgufInsightsConfigurationResolver.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsConfigurationResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAIpB,OAAO,EAAC,0BAA0B,EAAC,MAAM,8CAA8C,CAAC;AACxF,OAAO,EAAC,2BAA2B,EAAC,MAAM,wCAAwC,CAAC;AACnF,OAAO,EAAC,+BAA+B,EAAC,MAAM,4CAA4C,CAAC;AAC3F,OAAO,EAAC,WAAW,EAAC,MAAM,wBAAwB,CAAC;AAInD,MAAM,OAAO,iCAAiC;IAC1C,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,YAAY;QACnB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED;;;;;;;;;;;;OAYG;IACI,KAAK,CAAC,oCAAoC,CAAC,EAC9C,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB,IAAI,IAAI,CAAC,EACzE,gBAAgB,GAAG,KAAK,EACxB,cAAc,GAAG,KAAK,KAKtB,EAAE,EAAE,EACJ,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,EAAC,KAAK,EAAE,EAAE,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,EAAE,CAAC,OAAO,EAAE,EAAC,CAAC,CAAC,EACxE,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,eAAe,EAChE,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EACxC,0BAA0B,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,qBAAqB,KAO5E,EAAE;QAiCF,MAAM,uBAAuB,GAAG,IAAI,CAAC;QAErC,MAAM,CACF,SAAS,EACT,QAAQ,CACX,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAClB,YAAY,EAAE;YACd,WAAW,EAAE;SAChB,CAAC,CAAC;QACH,MAAM,iBAAiB,GAAG,MAAM,IAAI,CAAC,qBAAqB,CAAC,MAAM,EAAE;YAC/D,YAAY,EAAE,KAAK,IAAI,EAAE,CAAC,SAAS;YACnC,oBAAoB;YACpB,QAAQ;YACR,0BAA0B;SAC7B,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,0BAA0B,IAAI,QAAQ,KAAK,KAAK,CAAC;QACnE,MAAM,2BAA2B,GAAG,IAAI,CAAC,aAAa,CAAC,iCAAiC,CAAC;YACrF,SAAS,EAAE,iBAAiB;SAC/B,CAAC,CAAC;QAEH,MAAM,mBAAmB,GAAG,MAAM,IAAI,CAAC,yBAAyB,CAAC,MAAM,EAAE;YACrE,YAAY,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;gBACvB,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,IAAI,GAAG,2BAA2B,CAAC,OAAO,CAAC;aAC1E,CAAC;YACF,QAAQ;YACR,kBAAkB,EAAE,gBAAgB;YACpC,cAAc,EAAE,iBAAiB;YACjC,qBAAqB,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB,IAAI,uBAAuB;SACxF,CAAC,CAAC;QACH,MAAM,6BAA6B,GAAG,IAAI,CAAC,aAAa,CAAC,mCAAmC,CAAC;YACzF,WAAW,EAAE,mBAAmB;YAChC,kBAAkB,EAAE,gBAAgB;YACpC,cAAc,EAAE,iBAAiB;YACjC,cAAc;SACjB,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG;YACf,SAAS,EAAE,EAAE;YACb,qBAAqB,EAAE,EAAE;YACzB,WAAW,EAAE,EAAE;YACf,iBAAiB,EAAE,EAAE;YACrB,qBAAqB,EAAE,EAAE,EAAE,sDAAsD;YACjF,gBAAgB,EAAE,EAAE;SACd,CAAC;QAEX,MAAM,eAAe,GAAG,UAAU,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,iBAAiB,GAAG,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QAC/G,MAAM,2BAA2B,GAAG,UAAU,CAAC,qBAAqB,GAAG,CACnE,iBAAiB,KAAK,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAC/D,CAAC;QACF,MAAM,iBAAiB,GAAG,UAAU,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,GAAG,WAAW,CAAC,CAAC;QAClG,MAAM,uBAAuB,GAAG,UAAU,CAAC,iBAAiB,GAAG,CAC3D,2BAA2B,CAAC,MAAM,IAAI,QAAQ,CAAC,IAAI;YAC/C,CAAC,CAAC,CAAC;YACH,CAAC,CAAC,2BAA2B,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK;gBAClD,CAAC,CAAC,GAAG;gBACL,CAAC,CAAC,CACE,GAAG,GAAG,IAAI,CAAC,GAAG,CACV,GAAG,EACH,GAAG,GAAG,CACF,CAAC,2BAA2B,CAAC,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,QAAQ,CAAC,KAAK,CACzE,CACJ,CACJ,CACZ,CAAC;QACF,MAAM,sBAAsB,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,GAAG,WAAW,CAAC,GAAG,WAAW,CAAC,CAAC;QAE9G,MAAM,kBAAkB,GAAG,SAAS;YAChC,CAAC,CAAC,CACE,CAAC,eAAe,GAAG,2BAA2B,GAAG,iBAAiB,GAAG,uBAAuB,CAAC;gBAC7F,CAAC,UAAU,CAAC,SAAS,GAAG,UAAU,CAAC,qBAAqB,GAAG,UAAU,CAAC,WAAW,GAAG,UAAU,CAAC,iBAAiB,CAAC,CACpH;YACD,CAAC,CAAC,CACE,CAAC,iBAAiB,GAAG,uBAAuB,GAAG,6BAA6B,CAAC,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;gBAC3G,CAAC,UAAU,CAAC,WAAW,GAAG,UAAU,CAAC,iBAAiB,GAAG,UAAU,CAAC,qBAAqB,CAAC,CAAC,CAAC;QACpG,MAAM,UAAU,GAAG,sBAAsB,GAAG,UAAU,CAAC,gBAAgB,CAAC;QAExE,OAAO;YACH,kBAAkB;YAClB,UAAU;YACV,UAAU,EAAE,kBAAkB,GAAG,UAAU;YAE3C,cAAc,EAAE;gBACZ,SAAS,EAAE,iBAAiB;gBAC5B,WAAW,EAAE,mBAAmB;gBAEhC,aAAa,EAAE,2BAA2B,CAAC,MAAM;gBACjD,eAAe,EAAE,6BAA6B,CAAC,MAAM;gBACrD,aAAa,EAAE,2BAA2B,CAAC,MAAM,GAAG,6BAA6B,CAAC,MAAM;gBAExF,cAAc,EAAE,2BAA2B,CAAC,OAAO;gBACnD,gBAAgB,EAAE,6BAA6B,CAAC,OAAO;gBACvD,cAAc,EAAE,2BAA2B,CAAC,OAAO,GAAG,6BAA6B,CAAC,OAAO;aAC9F;SACJ,CAAC;IACN,CAAC;IAEM,KAAK,CAAC,qBAAqB,CAAC,SAAyC,EAAE,EAC1E,wBAAwB,GAAG,KAAK,EAChC,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,eAAe,EAAE,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EAC1G,0BAA0B,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,qBAAqB,EAC5E,4BAA4B,GAAG,KAAK,KAIpC,EAAE;QACF,OAAO,2BAA2B,CAAC,SAAS,EAAE;YAC1C,YAAY,EAAE,IAAI,CAAC,aAAa;YAChC,wBAAwB;YACxB,YAAY;YACZ,oBAAoB;YACpB,QAAQ;YACR,0BAA0B;YAC1B,4BAA4B;SAC/B,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,yBAAyB,CAAC,WAA+C,EAAE,EACpF,cAAc,EACd,SAAS,EACT,qBAAqB,EACrB,cAAc,GAAG,KAAK,EACtB,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EACxC,wBAAwB,GAAG,KAAK,EAChC,kBAAkB,GAAG,KAAK,EAC1B,SAAS,GAAG,0BAA0B,EAAE,EAW3C;QACG,OAAO,MAAM,+BAA+B,CAAC;YACzC,WAAW;YACX,SAAS;YACT,SAAS;YACT,iBAAiB,EAAE,IAAI,CAAC,aAAa;YACrC,cAAc;YACd,qBAAqB;YACrB,cAAc;YACd,YAAY;YACZ,QAAQ;YACR,wBAAwB;YACxB,kBAAkB;SACrB,CAAC,CAAC;IACP,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,YAA0B;QAC5C,OAAO,IAAI,iCAAiC,CAAC,YAAY,CAAC,CAAC;IAC/D,CAAC;CACJ;AAED,SAAS,6BAA6B,CAAC,SAAiB;IACpD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC/B,OAAO,EAAE,GAAG,WAAW,CAAC,SAAS,EAAE,CAAC;YAChC,KAAK,EAAE,IAAI;YACX,GAAG,EAAE,IAAI,GAAG,GAAG;YACf,MAAM,EAAE,EAAE;SACb,EAAE;YACC,KAAK,EAAE,IAAI,GAAG,GAAG;YACjB,GAAG,EAAE,IAAI,GAAG,CAAC;YACb,MAAM,EAAE,EAAE;SACb,EAAE;YACC,KAAK,EAAE,IAAI,GAAG,CAAC;YACf,MAAM,EAAE,CAAC;SACZ,CAAC,CAAC,CAAC;AACR,CAAC"}
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import { LlamaContextOptions } from "../../../evaluator/LlamaContext/types.js";
|
|
2
2
|
import { GgufInsights } from "../GgufInsights.js";
|
|
3
3
|
import { BuildGpu } from "../../../bindings/types.js";
|
|
4
|
-
export declare function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, getVramState, llamaGpu, ignoreMemorySafetyChecks, isEmbeddingContext }: {
|
|
4
|
+
export declare function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, getVramState, llamaGpu, ignoreMemorySafetyChecks, isEmbeddingContext }: {
|
|
5
5
|
contextSize?: LlamaContextOptions["contextSize"];
|
|
6
6
|
batchSize?: LlamaContextOptions["batchSize"];
|
|
7
7
|
sequences: number;
|
|
8
8
|
modelFileInsights: GgufInsights;
|
|
9
9
|
modelGpuLayers: number;
|
|
10
10
|
modelTrainContextSize: number;
|
|
11
|
+
flashAttention: boolean;
|
|
11
12
|
getVramState(): Promise<{
|
|
12
13
|
total: number;
|
|
13
14
|
free: number;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
|
2
2
|
import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../../evaluator/LlamaContext/LlamaContext.js";
|
|
3
|
-
export async function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, getVramState, llamaGpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false }) {
|
|
3
|
+
export async function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, getVramState, llamaGpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false }) {
|
|
4
4
|
if (contextSize == null)
|
|
5
5
|
contextSize = "auto";
|
|
6
6
|
if (typeof contextSize === "number") {
|
|
@@ -13,6 +13,7 @@ export async function resolveContextContextSizeOption({ contextSize, batchSize,
|
|
|
13
13
|
batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: resolvedContextSize, sequences }),
|
|
14
14
|
modelGpuLayers: modelGpuLayers,
|
|
15
15
|
sequences,
|
|
16
|
+
flashAttention,
|
|
16
17
|
isEmbeddingContext
|
|
17
18
|
}).gpuVram;
|
|
18
19
|
if (contextVram > vramState.free)
|
|
@@ -40,6 +41,7 @@ export async function resolveContextContextSizeOption({ contextSize, batchSize,
|
|
|
40
41
|
batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: testContextSize, sequences }),
|
|
41
42
|
modelGpuLayers: modelGpuLayers,
|
|
42
43
|
sequences,
|
|
44
|
+
flashAttention,
|
|
43
45
|
isEmbeddingContext
|
|
44
46
|
}).gpuVram;
|
|
45
47
|
if (contextVram <= freeVram) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveContextContextSizeOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveContextContextSizeOption.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AAEvH,MAAM,CAAC,KAAK,UAAU,+BAA+B,CAAC,EAClD,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE,iBAAiB,EAAE,cAAc,EAAE,qBAAqB,EAAE,YAAY,EAAE,QAAQ,
|
|
1
|
+
{"version":3,"file":"resolveContextContextSizeOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveContextContextSizeOption.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AAEvH,MAAM,CAAC,KAAK,UAAU,+BAA+B,CAAC,EAClD,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE,iBAAiB,EAAE,cAAc,EAAE,qBAAqB,EAAE,cAAc,EAAE,YAAY,EAAE,QAAQ,EACnI,wBAAwB,GAAG,KAAK,EAAE,kBAAkB,GAAG,KAAK,EAa/D;IACG,IAAI,WAAW,IAAI,IAAI;QACnB,WAAW,GAAG,MAAM,CAAC;IAEzB,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;QAClC,MAAM,mBAAmB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;QAEjE,IAAI,wBAAwB;YACxB,OAAO,mBAAmB,CAAC;QAE/B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,WAAW,GAAG,iBAAiB,CAAC,mCAAmC,CAAC;YACtE,WAAW,EAAE,mBAAmB;YAChC,SAAS,EAAE,SAAS,IAAI,0BAA0B,CAAC,EAAC,WAAW,EAAE,mBAAmB,EAAE,SAAS,EAAC,CAAC;YACjG,cAAc,EAAE,cAAc;YAC9B,SAAS;YACT,cAAc;YACd,kBAAkB;SACrB,CAAC,CAAC,OAAO,CAAC;QAEX,IAAI,WAAW,GAAG,SAAS,CAAC,IAAI;YAC5B,MAAM,IAAI,KAAK,CAAC,uBAAuB,mBAAmB,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,sCAAsC,CAAC,CAAC;QAE5J,OAAO,mBAAmB,CAAC;IAC/B,CAAC;SAAM,IAAI,WAAW,KAAK,MAAM,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;QACnE,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,qBAAqB,CAAC;QAEjC,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QAEvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,qBAAqB,CAAC;QAEjC,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAEhC,MAAM,cAAc,GAAG,WAAW,KAAK,MAAM;YACzC,CAAC,CAAC,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC;YACvE,CAAC,CAAC,IAAI,CAAC,GAAG,CACN,WAAW,CAAC,GAAG,IAAI,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC,EACxF,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC,CACxE,CAAC;QAEN,MAAM,cAAc,GAAG,WAAW,KAAK,MAAM;YACzC,CAAC,CAAC,mCAAmC;YACrC,CAAC,CAAC,IAAI,CAAC,GAAG,CACN,WAAW,CAAC,GAAG,IAAI,mCAAmC,EACtD,mCAAmC,CACtC,CAAC;QAEN,IAAI,4BAA4B,GAAkB,IAAI,CAAC;QACvD,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,cAAc,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC3E,KAAK,IAAI,eAAe,GAAG,cAAc,EAAE,eAAe,IAAI,cAAc,IAAI,eAAe,IAAI,cAAc,GAAG,CAAC;YACjH,MAAM,WAAW,GAAG,iBAAiB,CAAC,mCAAmC,CAAC;gBACtE,WAAW,EAAE,eAAe;gBAC5B,SAAS,EAAE,SAAS,IAAI,0BAA0B,CAAC,EAAC,WAAW,EAAE,eAAe,EAAE,SAAS,EAAC,CAAC;gBAC7F,cAAc,EAAE,cAAc;gBAC9B,SAAS;gBACT,cAAc;gBACd,kBAAkB;aACrB,CAAC,CAAC,OAAO,CAAC;YAEX,IAAI,WAAW,IAAI,QAAQ,EAAE,CAAC;gBAC1B,IAAI,4BAA4B,IAAI,IAAI,IAAI,eAAe,GAAG,4BAA4B,EAAE,CAAC;oBACzF,4BAA4B,GAAG,eAAe,CAAC;oBAE/C,IAAI,IAAI,KAAK,CAAC,CAAC;wBACX,MAAM;yBACL,IAAI,IAAI,GAAG,CAAC;wBACb,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClD,CAAC;YACL,CAAC;iBAAM,IAAI,IAAI,GAAG,CAAC;gBACf,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;YAE9C,IAAI,eAAe,IAAI,cAAc,IAAI,IAAI,KAAK,CAAC,CAAC;gBAChD,MAAM;YAEV,eAAe,IAAI,IAAI,CAAC;YACxB,IAAI,eAAe,GAAG,cAAc,EAAE,CAAC;gBACnC,eAAe,GAAG,cAAc,CAAC;gBACjC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvD,CAAC;iBAAM,IAAI,eAAe,GAAG,cAAc,EAAE,CAAC;gBAC1C,eAAe,GAAG,cAAc,CAAC;gBACjC,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACxD,CAAC;QACL,CAAC;QAED,IAAI,4BAA4B,IAAI,IAAI;YACpC,OAAO,4BAA4B,CAAC;QAExC,IAAI,wBAAwB;YACxB,OAAO,cAAc,CAAC;QAE1B,MAAM,IAAI,KAAK,CAAC,8DAA8D,cAAc,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC1J,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,WAAW,GAAG,CAAC,CAAC;AAC9D,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { LlamaModelOptions } from "../../../evaluator/LlamaModel/LlamaModel.js";
|
|
2
2
|
import { BuildGpu } from "../../../bindings/types.js";
|
|
3
3
|
import type { GgufInsights } from "../GgufInsights.js";
|
|
4
|
-
export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions["gpuLayers"], { ggufInsights, ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }: {
|
|
4
|
+
export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions["gpuLayers"], { ggufInsights, ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention }: {
|
|
5
5
|
ggufInsights: GgufInsights;
|
|
6
6
|
ignoreMemorySafetyChecks?: boolean;
|
|
7
7
|
getVramState(): Promise<{
|
|
@@ -11,4 +11,5 @@ export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions
|
|
|
11
11
|
llamaVramPaddingSize: number;
|
|
12
12
|
llamaGpu: BuildGpu;
|
|
13
13
|
llamaSupportsGpuOffloading: boolean;
|
|
14
|
+
defaultContextFlashAttention: boolean;
|
|
14
15
|
}): Promise<number>;
|
|
@@ -4,7 +4,7 @@ import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../..
|
|
|
4
4
|
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
|
5
5
|
import { scoreLevels } from "./scoreLevels.js";
|
|
6
6
|
const fitContextExtraMemoryPaddingPercentage = 0.5;
|
|
7
|
-
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }) {
|
|
7
|
+
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention }) {
|
|
8
8
|
if (gpuLayers == null)
|
|
9
9
|
gpuLayers = "auto";
|
|
10
10
|
if (!llamaSupportsGpuOffloading)
|
|
@@ -19,7 +19,8 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
19
19
|
const maxLayersRequirements = getVramRequiredForGpuLayers({
|
|
20
20
|
gpuLayers: resolvedGpuLayers,
|
|
21
21
|
ggufInsights,
|
|
22
|
-
currentVram: vramState.free
|
|
22
|
+
currentVram: vramState.free,
|
|
23
|
+
defaultContextFlashAttention
|
|
23
24
|
});
|
|
24
25
|
if (maxLayersRequirements == null)
|
|
25
26
|
throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
|
|
@@ -48,7 +49,8 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
48
49
|
: undefined,
|
|
49
50
|
maxGpuLayers: typeof gpuLayers === "object"
|
|
50
51
|
? gpuLayers.max
|
|
51
|
-
: undefined
|
|
52
|
+
: undefined,
|
|
53
|
+
defaultContextFlashAttention
|
|
52
54
|
});
|
|
53
55
|
const hasGpuLayersRequirements = typeof gpuLayers === "object" &&
|
|
54
56
|
(gpuLayers.min != null || gpuLayers.max != null || gpuLayers.fitContext?.contextSize != null);
|
|
@@ -58,7 +60,7 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
58
60
|
}
|
|
59
61
|
throw new Error(`Invalid gpuLayers value: ${gpuLayers}`);
|
|
60
62
|
}
|
|
61
|
-
function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers }) {
|
|
63
|
+
function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention }) {
|
|
62
64
|
return findBestOption({
|
|
63
65
|
*generator() {
|
|
64
66
|
const minLayers = Math.floor(Math.max(0, minGpuLayers ?? 0));
|
|
@@ -74,7 +76,8 @@ function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGp
|
|
|
74
76
|
gpuLayers: option.gpuLayers,
|
|
75
77
|
ggufInsights,
|
|
76
78
|
currentVram: freeVram,
|
|
77
|
-
fitContext
|
|
79
|
+
fitContext,
|
|
80
|
+
defaultContextFlashAttention
|
|
78
81
|
});
|
|
79
82
|
if (layersRequirements == null)
|
|
80
83
|
return null;
|
|
@@ -121,7 +124,7 @@ function scoreGpuLayersAndContextCombination({ gpuLayers, contextSize }, { total
|
|
|
121
124
|
}
|
|
122
125
|
return scoreGpuLayers() + scoreContextSize();
|
|
123
126
|
}
|
|
124
|
-
function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext }) {
|
|
127
|
+
function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false }) {
|
|
125
128
|
const modelVram = ggufInsights.estimateModelResourceRequirements({ gpuLayers }).gpuVram;
|
|
126
129
|
if (modelVram > currentVram)
|
|
127
130
|
return null;
|
|
@@ -131,7 +134,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
131
134
|
batchSize: getDefaultContextBatchSize({ contextSize: fitContext.contextSize, sequences: 1 }),
|
|
132
135
|
modelGpuLayers: gpuLayers,
|
|
133
136
|
sequences: 1,
|
|
134
|
-
isEmbeddingContext: fitContext.embeddingContext ?? false
|
|
137
|
+
isEmbeddingContext: fitContext.embeddingContext ?? false,
|
|
138
|
+
flashAttention: defaultContextFlashAttention
|
|
135
139
|
}).gpuVram;
|
|
136
140
|
const totalVram = modelVram + contextVram;
|
|
137
141
|
if (totalVram > currentVram)
|
|
@@ -146,7 +150,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
146
150
|
gpuLayers,
|
|
147
151
|
ggufInsights,
|
|
148
152
|
vram: currentVram - modelVram,
|
|
149
|
-
isEmbeddingContext: fitContext?.embeddingContext ?? false
|
|
153
|
+
isEmbeddingContext: fitContext?.embeddingContext ?? false,
|
|
154
|
+
flashAttention: defaultContextFlashAttention
|
|
150
155
|
});
|
|
151
156
|
if (maxContext == null || modelVram + maxContext.vram > currentVram)
|
|
152
157
|
return null;
|
|
@@ -156,7 +161,7 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
156
161
|
totalVram: modelVram + maxContext.vram
|
|
157
162
|
};
|
|
158
163
|
}
|
|
159
|
-
function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext }) {
|
|
164
|
+
function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention }) {
|
|
160
165
|
const maxContextSize = getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize });
|
|
161
166
|
for (let contextSize = maxContextSize; contextSize >= minAllowedContextSizeInCalculations; contextSize--) {
|
|
162
167
|
const contextVram = ggufInsights.estimateContextResourceRequirements({
|
|
@@ -164,7 +169,8 @@ function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEm
|
|
|
164
169
|
batchSize: getDefaultContextBatchSize({ contextSize, sequences: 1 }),
|
|
165
170
|
modelGpuLayers: gpuLayers,
|
|
166
171
|
sequences: 1,
|
|
167
|
-
isEmbeddingContext
|
|
172
|
+
isEmbeddingContext,
|
|
173
|
+
flashAttention
|
|
168
174
|
}).gpuVram;
|
|
169
175
|
if (contextVram <= vram)
|
|
170
176
|
return {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,
|
|
1
|
+
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,EAAE,4BAA4B,EAKrE;IACG,IAAI,SAAS,IAAI,IAAI;QACjB,SAAS,GAAG,MAAM,CAAC;IAEvB,IAAI,CAAC,0BAA0B;QAC3B,OAAO,CAAC,CAAC;IAEb,IAAI,SAAS,KAAK,KAAK,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvD,MAAM,iBAAiB,GAAG,OAAO,SAAS,KAAK,QAAQ;YACnD,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YAC5D,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC;QAE/B,IAAI,wBAAwB;YACxB,OAAO,iBAAiB,CAAC;QAE7B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,qBAAqB,GAAG,2BAA2B,CAAC;YACtD,SAAS,EAAE,iBAAiB;YAC5B,YAAY;YACZ,WAAW,EAAE,SAAS,CAAC,IAAI;YAC3B,4BAA4B;SAC/B,CAAC,CAAC;QAEH,IAAI,qBAAqB,IAAI,IAAI;YAC7B,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC/D,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,CAAC,CAAC;QAEb,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,CAAC,CAAC;QAEb,IAAI,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAC9B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7E,QAAQ,IAAI,oBAAoB,GAAG,sCAAsC,CAAC;YAE1E,IAAI,QAAQ,GAAG,CAAC;gBACZ,QAAQ,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,MAAM,mBAAmB,GAAG,2BAA2B,CAAC;YACpD,YAAY;YACZ,QAAQ;YACR,UAAU,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACrC,CAAC,CAAC,SAAS,CAAC,UAAU;gBACtB,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,4BAA4B;SAC/B,CAAC,CAAC;QAEH,MAAM,wBAAwB,GAAG,OAAO,SAAS,KAAK,QAAQ;YAC1D,CAAC,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,CAAC,CAAC;QAElG,IAAI,CAAC,wBAAwB,IAAI,mBAAmB,IAAI,IAAI,IAAI,wBAAwB;YACpF,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,mBAAmB,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,4BAA4B,EAQ/B;IACG,OAAO,cAAc,CAAC;QAClB,CAAC,SAAS;YACN,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAE3G,KAAK,IAAI,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;gBACzD,MAAM;oBACF,SAAS,EAAE,MAAM;iBACpB,CAAC;YACN,CAAC;QACL,CAAC;QACD,KAAK,CAAC,MAAM;YACR,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;gBACnD,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,YAAY;gBACZ,WAAW,EAAE,QAAQ;gBACrB,UAAU;gBACV,4BAA4B;aAC/B,CAAC,CAAC;YAEH,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,OAAO,IAAI,CAAC;YAEhB,OAAO,mCAAmC,CAAC,EAAC,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,WAAW,EAAE,kBAAkB,CAAC,WAAW,EAAC,EAAE;gBACnH,cAAc,EAAE,YAAY,CAAC,WAAW;gBACxC,gBAAgB,EAAE,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC;aAClG,CAAC,CAAC;QACP,CAAC;KACJ,CAAC,EAAE,SAAS,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED,SAAS,mCAAmC,CAAC,EAAC,SAAS,EAAE,WAAW,EAA2C,EAAE,EAC7G,cAAc,EAAE,gBAAgB,EAGnC;IACG,SAAS,cAAc;QACnB,OAAO,WAAW,CAAC,SAAS,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,EAAE;aACb,EAAE;gBACC,KAAK,EAAE,cAAc;gBACrB,MAAM,EAAE,EAAE;gBACV,GAAG,EAAE,cAAc;aACtB,CAAC,CAAC,CAAC;IACR,CAAC;IAED,SAAS,gBAAgB;QACrB,MAAM,mBAAmB,GAAG,SAAS,GAAG,cAAc,CAAC;QAEvD,OAAO,WAAW,CAAC,WAAW,EAAE,CAAC;gBAC7B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC;aACzC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,OAAO,cAAc,EAAE,GAAG,gBAAgB,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,4BAA4B,GAAG,KAAK,EAIzF;IACG,MAAM,SAAS,GAAG,YAAY,CAAC,iCAAiC,CAAC,EAAC,SAAS,EAAC,CAAC,CAAC,OAAO,CAAC;IAEtF,IAAI,SAAS,GAAG,WAAW;QACvB,OAAO,IAAI,CAAC;IAEhB,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAC1F,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,UAAU,CAAC,gBAAgB,IAAI,KAAK;YACxD,cAAc,EAAE,4BAA4B;SAC/C,CAAC,CAAC,OAAO,CAAC;QAEX,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,CAAC;QAC1C,IAAI,SAAS,GAAG,WAAW;YACvB,OAAO,IAAI,CAAC;QAEhB,OAAO;YACH,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,WAAW;YACX,SAAS;SACZ,CAAC;IACN,CAAC;IAED,MAAM,UAAU,GAAG,iCAAiC,CAAC;QACjD,SAAS;QACT,YAAY;QACZ,IAAI,EAAE,WAAW,GAAG,SAAS;QAC7B,kBAAkB,EAAE,UAAU,EAAE,gBAAgB,IAAI,KAAK;QACzD,cAAc,EAAE,4BAA4B;KAC/C,CAAC,CAAC;IAEH,IAAI,UAAU,IAAI,IAAI,IAAI,SAAS,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW;QAC/D,OAAO,IAAI,CAAC;IAEhB,OAAO;QACH,WAAW,EAAE,UAAU,CAAC,WAAW;QACnC,WAAW,EAAE,UAAU,CAAC,IAAI;QAC5B,SAAS,EAAE,SAAS,GAAG,UAAU,CAAC,IAAI;KACzC,CAAC;AACN,CAAC;AAED,SAAS,iCAAiC,CAAC,EAAC,SAAS,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAAE,cAAc,EAE5G;IACG,MAAM,cAAc,GAAG,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC,CAAC;IAErG,KAAK,IAAI,WAAW,GAAG,cAAc,EAAE,WAAW,IAAI,mCAAmC,EAAE,WAAW,EAAE,EAAE,CAAC;QACvG,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW;YACX,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAClE,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB;YAClB,cAAc;SACjB,CAAC,CAAC,OAAO,CAAC;QAEX,IAAI,WAAW,IAAI,IAAI;YACnB,OAAO;gBACH,WAAW;gBACX,IAAI,EAAE,WAAW;aACpB,CAAC;IACV,CAAC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
|
|
@@ -1,31 +1,44 @@
|
|
|
1
1
|
export declare const enum GgufArchitectureType {
|
|
2
2
|
llama = "llama",
|
|
3
3
|
falcon = "falcon",
|
|
4
|
+
grok = "grok",
|
|
4
5
|
gpt2 = "gpt2",
|
|
5
6
|
gptj = "gptj",
|
|
6
7
|
gptneox = "gptneox",
|
|
7
8
|
mpt = "mpt",
|
|
8
9
|
baichuan = "baichuan",
|
|
9
10
|
starcoder = "starcoder",
|
|
10
|
-
persimmon = "persimmon",
|
|
11
11
|
refact = "refact",
|
|
12
12
|
bert = "bert",
|
|
13
13
|
nomicBert = "nomic-bert",
|
|
14
|
+
jinaBertV2 = "jina-bert-v2",
|
|
14
15
|
bloom = "bloom",
|
|
15
16
|
stablelm = "stablelm",
|
|
16
17
|
qwen = "qwen",
|
|
17
18
|
qwen2 = "qwen2",
|
|
19
|
+
qwen2moe = "qwen2moe",
|
|
18
20
|
phi2 = "phi2",
|
|
21
|
+
phi3 = "phi3",
|
|
19
22
|
plamo = "plamo",
|
|
20
23
|
codeshell = "codeshell",
|
|
21
24
|
orion = "orion",
|
|
22
25
|
internlm2 = "internlm2",
|
|
23
26
|
minicpm = "minicpm",
|
|
24
27
|
gemma = "gemma",
|
|
28
|
+
gemma2 = "gemma2",
|
|
25
29
|
starcoder2 = "starcoder2",
|
|
26
30
|
mamba = "mamba",
|
|
31
|
+
xverse = "xverse",
|
|
27
32
|
commandR = "command-r",
|
|
28
|
-
|
|
33
|
+
dbrx = "dbrx",
|
|
34
|
+
olmo = "olmo",
|
|
35
|
+
openelm = "openelm",
|
|
36
|
+
arctic = "arctic",
|
|
37
|
+
deepseek2 = "deepseek2",
|
|
38
|
+
bitnet = "bitnet",
|
|
39
|
+
t5 = "t5",
|
|
40
|
+
jais = "jais",
|
|
41
|
+
unknown = "(unknown)"
|
|
29
42
|
}
|
|
30
43
|
export type GgufMetadata<A extends GgufArchitectureType = GgufArchitectureType> = {
|
|
31
44
|
readonly general: GgufMetadataGeneral<A>;
|
|
@@ -44,7 +57,6 @@ export type GgufMetadataLlmToType = {
|
|
|
44
57
|
[GgufArchitectureType.bloom]: GgufMetadataBloom;
|
|
45
58
|
[GgufArchitectureType.falcon]: GgufMetadataFalcon;
|
|
46
59
|
[GgufArchitectureType.mamba]: GgufMetadataMamba;
|
|
47
|
-
[GgufArchitectureType.rwkv]: GgufMetadataRWKV;
|
|
48
60
|
};
|
|
49
61
|
export declare enum GgufFileType {
|
|
50
62
|
ALL_F32 = 0,
|
|
@@ -325,11 +337,4 @@ export type GgufMetadataMamba = {
|
|
|
325
337
|
readonly layer_norm_rms_epsilon: number;
|
|
326
338
|
};
|
|
327
339
|
};
|
|
328
|
-
export type GgufMetadataRWKV = {
|
|
329
|
-
readonly architecture_version: 4 | number;
|
|
330
|
-
readonly context_length: number;
|
|
331
|
-
readonly block_count: number;
|
|
332
|
-
readonly embedding_length: number;
|
|
333
|
-
readonly feed_forward_length: number;
|
|
334
|
-
};
|
|
335
340
|
export declare function isGgufMetadataOfArchitectureType<A extends GgufArchitectureType>(metadata: GgufMetadata, type: A): metadata is GgufMetadata<A>;
|
|
@@ -2,31 +2,44 @@ export var GgufArchitectureType;
|
|
|
2
2
|
(function (GgufArchitectureType) {
|
|
3
3
|
GgufArchitectureType["llama"] = "llama";
|
|
4
4
|
GgufArchitectureType["falcon"] = "falcon";
|
|
5
|
+
GgufArchitectureType["grok"] = "grok";
|
|
5
6
|
GgufArchitectureType["gpt2"] = "gpt2";
|
|
6
7
|
GgufArchitectureType["gptj"] = "gptj";
|
|
7
8
|
GgufArchitectureType["gptneox"] = "gptneox";
|
|
8
9
|
GgufArchitectureType["mpt"] = "mpt";
|
|
9
10
|
GgufArchitectureType["baichuan"] = "baichuan";
|
|
10
11
|
GgufArchitectureType["starcoder"] = "starcoder";
|
|
11
|
-
GgufArchitectureType["persimmon"] = "persimmon";
|
|
12
12
|
GgufArchitectureType["refact"] = "refact";
|
|
13
13
|
GgufArchitectureType["bert"] = "bert";
|
|
14
14
|
GgufArchitectureType["nomicBert"] = "nomic-bert";
|
|
15
|
+
GgufArchitectureType["jinaBertV2"] = "jina-bert-v2";
|
|
15
16
|
GgufArchitectureType["bloom"] = "bloom";
|
|
16
17
|
GgufArchitectureType["stablelm"] = "stablelm";
|
|
17
18
|
GgufArchitectureType["qwen"] = "qwen";
|
|
18
19
|
GgufArchitectureType["qwen2"] = "qwen2";
|
|
20
|
+
GgufArchitectureType["qwen2moe"] = "qwen2moe";
|
|
19
21
|
GgufArchitectureType["phi2"] = "phi2";
|
|
22
|
+
GgufArchitectureType["phi3"] = "phi3";
|
|
20
23
|
GgufArchitectureType["plamo"] = "plamo";
|
|
21
24
|
GgufArchitectureType["codeshell"] = "codeshell";
|
|
22
25
|
GgufArchitectureType["orion"] = "orion";
|
|
23
26
|
GgufArchitectureType["internlm2"] = "internlm2";
|
|
24
27
|
GgufArchitectureType["minicpm"] = "minicpm";
|
|
25
28
|
GgufArchitectureType["gemma"] = "gemma";
|
|
29
|
+
GgufArchitectureType["gemma2"] = "gemma2";
|
|
26
30
|
GgufArchitectureType["starcoder2"] = "starcoder2";
|
|
27
31
|
GgufArchitectureType["mamba"] = "mamba";
|
|
32
|
+
GgufArchitectureType["xverse"] = "xverse";
|
|
28
33
|
GgufArchitectureType["commandR"] = "command-r";
|
|
29
|
-
GgufArchitectureType["
|
|
34
|
+
GgufArchitectureType["dbrx"] = "dbrx";
|
|
35
|
+
GgufArchitectureType["olmo"] = "olmo";
|
|
36
|
+
GgufArchitectureType["openelm"] = "openelm";
|
|
37
|
+
GgufArchitectureType["arctic"] = "arctic";
|
|
38
|
+
GgufArchitectureType["deepseek2"] = "deepseek2";
|
|
39
|
+
GgufArchitectureType["bitnet"] = "bitnet";
|
|
40
|
+
GgufArchitectureType["t5"] = "t5";
|
|
41
|
+
GgufArchitectureType["jais"] = "jais";
|
|
42
|
+
GgufArchitectureType["unknown"] = "(unknown)";
|
|
30
43
|
})(GgufArchitectureType || (GgufArchitectureType = {}));
|
|
31
44
|
// source: `enum llama_ftype` in `llama.h` in the `llama.cpp` source code
|
|
32
45
|
export var GgufFileType;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,
|
|
1
|
+
{"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,oBAyCjB;AAzCD,WAAkB,oBAAoB;IAClC,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,mCAAW,CAAA;IACX,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,gDAAwB,CAAA;IACxB,mDAA2B,CAAA;IAC3B,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,8CAAsB,CAAA;IACtB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,yCAAiB,CAAA;IACjB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,iCAAS,CAAA;IACT,qCAAa,CAAA;IACb,6CAAqB,CAAA;AACzB,CAAC,EAzCiB,oBAAoB,KAApB,oBAAoB,QAyCrC;AA8BD,yEAAyE;AACzE,MAAM,CAAN,IAAY,YAgCX;AAhCD,WAAY,YAAY;IACpB,qDAAW,CAAA;IACX,2DAAc,CAAA;IACd,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,+EAAwB,CAAA;IACxB,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,8DAAgB,CAAA;IAChB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,8DAAgB,CAAA;IAChB,oEAAmB,CAAA;IACnB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,oEAAmB,CAAA;IACnB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;AACtB,CAAC,EAhCW,YAAY,KAAZ,YAAY,QAgCvB;AAyED,MAAM,CAAN,IAAkB,8BAQjB;AARD,WAAkB,8BAA8B;IAC5C,6FAAa,CAAA;IACb,uFAAU,CAAA;IACV,yFAAW,CAAA;IACX,yFAAW,CAAA;IACX,iGAAe,CAAA;IACf,uFAAU,CAAA;IACV,mFAAQ,CAAA;AACZ,CAAC,EARiB,8BAA8B,KAA9B,8BAA8B,QAQ/C;AAiCD,MAAM,CAAN,IAAkB,mCAKjB;AALD,WAAkB,mCAAmC;IACjD,4GAAgB,CAAA;IAChB,6FAAQ,CAAA;IACR,6FAAQ,CAAA;IACR,2FAAO,CAAA;AACX,CAAC,EALiB,mCAAmC,KAAnC,mCAAmC,QAKpD;AA8MD,MAAM,UAAU,gCAAgC,CAC5C,QAAsB,EAAE,IAAO;IAE/B,OAAO,QAAQ,EAAE,OAAO,EAAE,YAAY,KAAK,IAAI,CAAC;AACpD,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { DisposedError } from "lifecycle-utils";
|
|
|
2
2
|
import { Llama } from "./bindings/Llama.js";
|
|
3
3
|
import { getLlama, type LlamaOptions, type LastBuildOptions } from "./bindings/getLlama.js";
|
|
4
4
|
import { NoBinaryFoundError } from "./bindings/utils/NoBinaryFoundError.js";
|
|
5
|
-
import { LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js";
|
|
5
|
+
import { type LlamaGpuType, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js";
|
|
6
6
|
import { LlamaModel, LlamaModelInfillTokens, type LlamaModelOptions, LlamaModelTokens } from "./evaluator/LlamaModel/LlamaModel.js";
|
|
7
7
|
import { TokenAttributes } from "./evaluator/LlamaModel/utils/TokenAttributes.js";
|
|
8
8
|
import { LlamaGrammar, type LlamaGrammarOptions } from "./evaluator/LlamaGrammar.js";
|
|
@@ -46,6 +46,6 @@ import { createModelDownloader, ModelDownloader, type ModelDownloaderOptions } f
|
|
|
46
46
|
import { type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type LLamaContextualRepeatPenalty, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState } from "./types.js";
|
|
47
47
|
import { type GbnfJsonArraySchema, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonObjectSchema, type GbnfJsonOneOfSchema, type GbnfJsonSchema, type GbnfJsonSchemaImmutableType, type GbnfJsonSchemaToType } from "./utils/gbnfJson/types.js";
|
|
48
48
|
import { type GgufFileInfo } from "./gguf/types/GgufFileInfoTypes.js";
|
|
49
|
-
import { type GgufMetadata, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba,
|
|
49
|
+
import { type GgufMetadata, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, isGgufMetadataOfArchitectureType } from "./gguf/types/GgufMetadataTypes.js";
|
|
50
50
|
import { GgmlType, type GgufTensorInfo } from "./gguf/types/GgufTensorInfoTypes.js";
|
|
51
|
-
export { Llama, getLlama, type LlamaOptions, type LastBuildOptions, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, TokenAttributes, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizationStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, TokenBias, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, type LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LLamaChatCompletePromptOptions, type LlamaChatSessionRepeatPenalty, type LLamaChatPreloadPromptOptions, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatLoadAndCompleteUserMessageOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, type LlamaChatLoadAndCompleteUserResponse, LlamaChatSessionPromptCompletionEngine, type LLamaChatPromptCompletionEngineOptions, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, type LlamaCompletionResponse, TokenMeter, type TokenMeterState, UnsupportedError, InsufficientMemoryError, DisposedError, ChatWrapper, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, EmptyChatWrapper, Llama3ChatWrapper, Llama2ChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, type TemplateChatWrapperOptions, JinjaTemplateChatWrapper, type JinjaTemplateChatWrapperOptions, type JinjaTemplateChatWrapperOptionsConvertMessageFormat, type ChatHistoryFunctionCallMessageTemplate, resolveChatWrapper, type ResolveChatWrapperOptions, resolvableChatWrapperTypeNames, type ResolvableChatWrapperTypeName, specializedChatWrapperTypeNames, type SpecializedChatWrapperTypeName, templateChatWrapperTypeNames, type TemplateChatWrapperTypeName, ChatModelFunctionsDocumentationGenerator, LlamaText, SpecialTokensText, SpecialToken, isLlamaText, tokenizeText, type LlamaTextValue, type LlamaTextInputValue, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokensTextJSON, type LlamaTextSpecialTokenJSON, type BuiltinSpecialTokenValue, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema, LlamaVocabularyType, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, readGgufFileInfo, type GgufFileInfo, type GgufMetadata, type GgufTensorInfo, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba,
|
|
51
|
+
export { Llama, getLlama, type LlamaOptions, type LastBuildOptions, type LlamaGpuType, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, TokenAttributes, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizationStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, TokenBias, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, type LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LLamaChatCompletePromptOptions, type LlamaChatSessionRepeatPenalty, type LLamaChatPreloadPromptOptions, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatLoadAndCompleteUserMessageOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, type LlamaChatLoadAndCompleteUserResponse, LlamaChatSessionPromptCompletionEngine, type LLamaChatPromptCompletionEngineOptions, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, type LlamaCompletionResponse, TokenMeter, type TokenMeterState, UnsupportedError, InsufficientMemoryError, DisposedError, ChatWrapper, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, EmptyChatWrapper, Llama3ChatWrapper, Llama2ChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, type TemplateChatWrapperOptions, JinjaTemplateChatWrapper, type JinjaTemplateChatWrapperOptions, type JinjaTemplateChatWrapperOptionsConvertMessageFormat, type ChatHistoryFunctionCallMessageTemplate, resolveChatWrapper, type ResolveChatWrapperOptions, resolvableChatWrapperTypeNames, type ResolvableChatWrapperTypeName, specializedChatWrapperTypeNames, type SpecializedChatWrapperTypeName, templateChatWrapperTypeNames, type TemplateChatWrapperTypeName, ChatModelFunctionsDocumentationGenerator, LlamaText, SpecialTokensText, SpecialToken, isLlamaText, tokenizeText, type LlamaTextValue, type LlamaTextInputValue, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokensTextJSON, type LlamaTextSpecialTokenJSON, type BuiltinSpecialTokenValue, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema, LlamaVocabularyType, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, readGgufFileInfo, type GgufFileInfo, type GgufMetadata, type GgufTensorInfo, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, GgmlType, isGgufMetadataOfArchitectureType, GgufInsights, type GgufInsightsResourceRequirements, GgufInsightsConfigurationResolver, createModelDownloader, ModelDownloader, type ModelDownloaderOptions };
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAC,KAAK,EAAC,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAC,QAAQ,EAA2C,MAAM,wBAAwB,CAAC;AAC1F,OAAO,EAAC,kBAAkB,EAAC,MAAM,wCAAwC,CAAC;AAC1E,OAAO,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAC,KAAK,EAAC,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAC,QAAQ,EAA2C,MAAM,wBAAwB,CAAC;AAC1F,OAAO,EAAC,kBAAkB,EAAC,MAAM,wCAAwC,CAAC;AAC1E,OAAO,EACgB,aAAa,EAAE,wBAAwB,EAAE,+BAA+B,EAAE,mBAAmB,EACnH,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAC,UAAU,EAAE,sBAAsB,EAA0B,gBAAgB,EAAC,MAAM,sCAAsC,CAAC;AAClI,OAAO,EAAC,eAAe,EAAC,MAAM,iDAAiD,CAAC;AAChF,OAAO,EAAC,YAAY,EAA2B,MAAM,6BAA6B,CAAC;AACnF,OAAO,EAAC,sBAAsB,EAAC,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAC,8BAA8B,EAAC,MAAM,2DAA2D,CAAC;AACzG,OAAO,EAAC,2BAA2B,EAAqC,MAAM,4CAA4C,CAAC;AAC3H,OAAO,EAAC,YAAY,EAAE,oBAAoB,EAAC,MAAM,0CAA0C,CAAC;AAC5F,OAAO,EAAC,qBAAqB,EAAyD,MAAM,sCAAsC,CAAC;AAMnI,OAAO,EAAC,SAAS,EAAC,MAAM,0BAA0B,CAAC;AACnD,OAAO,EACH,gBAAgB,EAEnB,MAAM,kDAAkD,CAAC;AAC1D,OAAO,EAAC,yBAAyB,EAAC,MAAM,iEAAiE,CAAC;AAC1G,OAAO,EACH,SAAS,EAEZ,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EACH,sCAAsC,EACzC,MAAM,8EAA8E,CAAC;AACtF,OAAO,EACH,eAAe,EAElB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAC,UAAU,EAAuB,MAAM,2BAA2B,CAAC;AAC3E,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,uBAAuB,EAAC,MAAM,oCAAoC,CAAC;AAC3E,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,kBAAkB,EAAC,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,sBAAsB,EAAC,MAAM,0CAA0C,CAAC;AAChF,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,mBAAmB,EAAkC,MAAM,+CAA+C,CAAC;AACnH,OAAO,EACH,wBAAwB,EAC3B,MAAM,oDAAoD,CAAC;AAE5D,OAAO,EACH,8BAA8B,EAAsC,+BAA+B,EAC9D,4BAA4B,EAAoC,kBAAkB,EAE1H,MAAM,4CAA4C,CAAC;AACpD,OAAO,EAAC,wCAAwC,EAAC,MAAM,kEAAkE,CAAC;AAC1H,OAAO,EACH,SAAS,EAAE,iBAAiB,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAGxE,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAC,8BAA8B,EAAC,MAAM,2CAA2C,CAAC;AACzF,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,gBAAgB,EAAC,MAAM,4BAA4B,CAAC;AAC5D,OAAO,EAAC,YAAY,EAAwC,MAAM,iCAAiC,CAAC;AACpG,OAAO,EAAC,iCAAiC,EAAC,MAAM,sDAAsD,CAAC;AACvG,OAAO,EAAC,qBAAqB,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;AAErH,OAAO,EAG2C,+BAA+B,EAEhF,MAAM,YAAY,CAAC;AAMpB,OAAO,EAC4C,oBAAoB,EAAE,YAAY,EAAE,8BAA8B,EACjH,mCAAmC,EAEsC,gCAAgC,EAC5G,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAC,QAAQ,EAAsB,MAAM,qCAAqC,CAAC;AAGlF,OAAO,EACH,KAAK,EACL,QAAQ,EAIR,aAAa,EACb,kBAAkB,EAClB,UAAU,EACV,gBAAgB,EAChB,sBAAsB,EACtB,eAAe,EAEf,YAAY,EAEZ,sBAAsB,EACtB,8BAA8B,EAC9B,2BAA2B,EAE3B,YAAY,EACZ,oBAAoB,EAWpB,SAAS,EACT,qBAAqB,EAGrB,gBAAgB,EAChB,yBAAyB,EAOzB,SAAS,EAST,sCAAsC,EAEtC,eAAe,EAKf,UAAU,EAEV,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,WAAW,EAIX,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,iBAAiB,EACjB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EAChB,mBAAmB,EAEnB,wBAAwB,EAIxB,kBAAkB,EAElB,8BAA8B,EAE9B,+BAA+B,EAE/B,4BAA4B,EAE5B,wCAAwC,EACxC,SAAS,EACT,iBAAiB,EACjB,YAAY,EACZ,WAAW,EACX,YAAY,EAQZ,8BAA8B,EAC9B,gBAAgB,EAYhB,+BAA+B,EAU/B,mBAAmB,EACnB,wBAAwB,EACxB,+BAA+B,EAC/B,gBAAgB,EAKhB,oBAAoB,EACpB,YAAY,EACZ,8BAA8B,EAC9B,mCAAmC,EAYnC,QAAQ,EACR,gCAAgC,EAChC,YAAY,EAEZ,iCAAiC,EACjC,qBAAqB,EACrB,eAAe,EAElB,CAAC"}
|
package/llama/addon.cpp
CHANGED
|
@@ -987,6 +987,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
987
987
|
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
988
988
|
}
|
|
989
989
|
|
|
990
|
+
if (options.Has("flashAttention")) {
|
|
991
|
+
context_params.flash_attn = options.Get("flashAttention").As<Napi::Boolean>().Value();
|
|
992
|
+
}
|
|
993
|
+
|
|
990
994
|
if (options.Has("threads")) {
|
|
991
995
|
const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
|
|
992
996
|
const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-llama-cpp",
|
|
3
|
-
"version": "3.0.0-beta.
|
|
3
|
+
"version": "3.0.0-beta.37",
|
|
4
4
|
"description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -193,7 +193,7 @@
|
|
|
193
193
|
}
|
|
194
194
|
},
|
|
195
195
|
"optionalDependencies": {
|
|
196
|
-
"@node-llama-cpp/linux-x64-cuda": "3.0.0-beta.
|
|
197
|
-
"@node-llama-cpp/win-x64-cuda": "3.0.0-beta.
|
|
196
|
+
"@node-llama-cpp/linux-x64-cuda": "3.0.0-beta.37",
|
|
197
|
+
"@node-llama-cpp/win-x64-cuda": "3.0.0-beta.37"
|
|
198
198
|
}
|
|
199
199
|
}
|