node-llama-cpp 3.8.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bindings/AddonTypes.d.ts +4 -0
- package/dist/bindings/getLlama.js +1 -0
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/utils/getLlamaGpuTypes.js +6 -2
- package/dist/bindings/utils/getLlamaGpuTypes.js.map +1 -1
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +2 -1
- package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +2 -0
- package/dist/cli/commands/ChatCommand.js +28 -5
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.d.ts +1 -0
- package/dist/cli/commands/CompleteCommand.js +13 -3
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +1 -0
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.d.ts +1 -0
- package/dist/cli/commands/InfillCommand.js +13 -3
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js +9 -2
- package/dist/cli/commands/inspect/commands/InspectEstimateCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +19 -6
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/recommendedModels.js +79 -0
- package/dist/cli/recommendedModels.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
- package/dist/cli/utils/interactivelyAskForModel.js +7 -4
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -1
- package/dist/cli/utils/resolveCommandGgufPath.js +2 -1
- package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
- package/dist/evaluator/LlamaChat/LlamaChat.d.ts +17 -0
- package/dist/evaluator/LlamaChat/LlamaChat.js +85 -20
- package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +12 -1
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +15 -5
- package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +22 -4
- package/dist/evaluator/LlamaContext/LlamaContext.js +53 -10
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +15 -0
- package/dist/evaluator/LlamaModel/LlamaModel.d.ts +11 -0
- package/dist/evaluator/LlamaModel/LlamaModel.js +11 -2
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/gguf/insights/GgufInsights.d.ts +8 -1
- package/dist/gguf/insights/GgufInsights.js +118 -77
- package/dist/gguf/insights/GgufInsights.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +8 -4
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +12 -6
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +4 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +2 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +13 -7
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
- package/dist/gguf/types/GgufMetadataTypes.d.ts +2 -1
- package/dist/gguf/types/GgufMetadataTypes.js +1 -1
- package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
- package/dist/gguf/types/GgufTensorInfoTypes.d.ts +11 -1
- package/dist/gguf/types/GgufTensorInfoTypes.js +10 -0
- package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -1
- package/llama/addon/AddonContext.cpp +33 -0
- package/llama/addon/AddonContext.h +2 -0
- package/llama/addon/addon.cpp +14 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +14 -13
|
@@ -16,11 +16,12 @@ export declare class GgufInsightsConfigurationResolver {
|
|
|
16
16
|
* @param options
|
|
17
17
|
* @param hardwareOverrides
|
|
18
18
|
*/
|
|
19
|
-
resolveAndScoreConfig({ targetGpuLayers, targetContextSize, embeddingContext, flashAttention, useMmap }?: {
|
|
19
|
+
resolveAndScoreConfig({ targetGpuLayers, targetContextSize, embeddingContext, flashAttention, swaFullCache, useMmap }?: {
|
|
20
20
|
targetGpuLayers?: number | "max";
|
|
21
21
|
targetContextSize?: number;
|
|
22
22
|
embeddingContext?: boolean;
|
|
23
23
|
flashAttention?: boolean;
|
|
24
|
+
swaFullCache?: boolean;
|
|
24
25
|
useMmap?: boolean;
|
|
25
26
|
}, { getVramState, getRamState, getSwapState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }?: {
|
|
26
27
|
getVramState?(): Promise<{
|
|
@@ -88,10 +89,11 @@ export declare class GgufInsightsConfigurationResolver {
|
|
|
88
89
|
*
|
|
89
90
|
* `contextSize` defaults to `4096` (if the model train context size is lower than this, the model train context size is used instead).
|
|
90
91
|
*/
|
|
91
|
-
scoreModelConfigurationCompatibility({ contextSize, embeddingContext, flashAttention, maximumFittedContextSizeMultiplier, maximumUnfitConfigurationResourceMultiplier, forceStrictContextSize, forceGpuLayers, useMmap }?: {
|
|
92
|
+
scoreModelConfigurationCompatibility({ contextSize, embeddingContext, flashAttention, swaFullCache, maximumFittedContextSizeMultiplier, maximumUnfitConfigurationResourceMultiplier, forceStrictContextSize, forceGpuLayers, useMmap }?: {
|
|
92
93
|
contextSize?: number;
|
|
93
94
|
embeddingContext?: boolean;
|
|
94
95
|
flashAttention?: boolean;
|
|
96
|
+
swaFullCache?: boolean;
|
|
95
97
|
maximumFittedContextSizeMultiplier?: number;
|
|
96
98
|
maximumUnfitConfigurationResourceMultiplier?: number;
|
|
97
99
|
/**
|
|
@@ -147,7 +149,7 @@ export declare class GgufInsightsConfigurationResolver {
|
|
|
147
149
|
totalVramUsage: number;
|
|
148
150
|
};
|
|
149
151
|
}>;
|
|
150
|
-
resolveModelGpuLayers(gpuLayers?: LlamaModelOptions["gpuLayers"], { ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, useMmap }?: {
|
|
152
|
+
resolveModelGpuLayers(gpuLayers?: LlamaModelOptions["gpuLayers"], { ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }?: {
|
|
151
153
|
ignoreMemorySafetyChecks?: boolean;
|
|
152
154
|
getVramState?(): Promise<{
|
|
153
155
|
total: number;
|
|
@@ -157,6 +159,7 @@ export declare class GgufInsightsConfigurationResolver {
|
|
|
157
159
|
llamaGpu?: BuildGpu;
|
|
158
160
|
llamaSupportsGpuOffloading?: boolean;
|
|
159
161
|
defaultContextFlashAttention?: boolean;
|
|
162
|
+
defaultContextSwaFullCache?: boolean;
|
|
160
163
|
useMmap?: boolean;
|
|
161
164
|
}): Promise<number>;
|
|
162
165
|
/**
|
|
@@ -164,10 +167,11 @@ export declare class GgufInsightsConfigurationResolver {
|
|
|
164
167
|
*
|
|
165
168
|
* If there's no context size that can fit the available resources, an `InsufficientMemoryError` is thrown.
|
|
166
169
|
*/
|
|
167
|
-
resolveContextContextSize(contextSize: LlamaContextOptions["contextSize"], { modelGpuLayers, batchSize, modelTrainContextSize, flashAttention, getVramState, getRamState, getSwapState, llamaGpu, ignoreMemorySafetyChecks, isEmbeddingContext, sequences }: {
|
|
170
|
+
resolveContextContextSize(contextSize: LlamaContextOptions["contextSize"], { modelGpuLayers, batchSize, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, llamaGpu, ignoreMemorySafetyChecks, isEmbeddingContext, sequences }: {
|
|
168
171
|
modelGpuLayers: number;
|
|
169
172
|
modelTrainContextSize: number;
|
|
170
173
|
flashAttention?: boolean;
|
|
174
|
+
swaFullCache?: boolean;
|
|
171
175
|
batchSize?: LlamaContextOptions["batchSize"];
|
|
172
176
|
sequences?: number;
|
|
173
177
|
getVramState?(): Promise<{
|
|
@@ -24,9 +24,10 @@ export class GgufInsightsConfigurationResolver {
|
|
|
24
24
|
* @param options
|
|
25
25
|
* @param hardwareOverrides
|
|
26
26
|
*/
|
|
27
|
-
async resolveAndScoreConfig({ targetGpuLayers, targetContextSize, embeddingContext = false, flashAttention = false, useMmap = this._ggufInsights._llama.supportsMmap } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
27
|
+
async resolveAndScoreConfig({ targetGpuLayers, targetContextSize, embeddingContext = false, flashAttention = false, swaFullCache = false, useMmap = this._ggufInsights._llama.supportsMmap } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
28
28
|
const compatibilityScore = await this.scoreModelConfigurationCompatibility({
|
|
29
29
|
flashAttention,
|
|
30
|
+
swaFullCache,
|
|
30
31
|
contextSize: targetContextSize,
|
|
31
32
|
embeddingContext,
|
|
32
33
|
forceGpuLayers: targetGpuLayers,
|
|
@@ -63,7 +64,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
63
64
|
*
|
|
64
65
|
* `contextSize` defaults to `4096` (if the model train context size is lower than this, the model train context size is used instead).
|
|
65
66
|
*/
|
|
66
|
-
async scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false, flashAttention = false, maximumFittedContextSizeMultiplier = 100, maximumUnfitConfigurationResourceMultiplier = 100, forceStrictContextSize = false, forceGpuLayers, useMmap = this._ggufInsights._llama.supportsMmap } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
67
|
+
async scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false, flashAttention = false, swaFullCache = false, maximumFittedContextSizeMultiplier = 100, maximumUnfitConfigurationResourceMultiplier = 100, forceStrictContextSize = false, forceGpuLayers, useMmap = this._ggufInsights._llama.supportsMmap } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
67
68
|
const [vramState, ramState, swapState] = await Promise.all([
|
|
68
69
|
getVramState(),
|
|
69
70
|
getRamState(),
|
|
@@ -93,6 +94,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
93
94
|
llamaGpu,
|
|
94
95
|
llamaSupportsGpuOffloading,
|
|
95
96
|
defaultContextFlashAttention: flashAttention,
|
|
97
|
+
defaultContextSwaFullCache: swaFullCache,
|
|
96
98
|
ignoreMemorySafetyChecks: forceGpuLayers != null,
|
|
97
99
|
useMmap
|
|
98
100
|
});
|
|
@@ -132,7 +134,8 @@ export class GgufInsightsConfigurationResolver {
|
|
|
132
134
|
modelGpuLayers: resolvedGpuLayers,
|
|
133
135
|
modelTrainContextSize: this._ggufInsights.trainContextSize ?? defaultTrainContextSizeForEstimationPurposes,
|
|
134
136
|
ignoreMemorySafetyChecks: forceStrictContextSize,
|
|
135
|
-
flashAttention
|
|
137
|
+
flashAttention,
|
|
138
|
+
swaFullCache
|
|
136
139
|
});
|
|
137
140
|
contextFitsMemory = true;
|
|
138
141
|
}
|
|
@@ -144,7 +147,8 @@ export class GgufInsightsConfigurationResolver {
|
|
|
144
147
|
contextSize: resolvedContextSize,
|
|
145
148
|
isEmbeddingContext: embeddingContext,
|
|
146
149
|
modelGpuLayers: resolvedGpuLayers,
|
|
147
|
-
flashAttention
|
|
150
|
+
flashAttention,
|
|
151
|
+
swaFullCache
|
|
148
152
|
});
|
|
149
153
|
const rankPoints = {
|
|
150
154
|
gpuLayers: 60,
|
|
@@ -200,7 +204,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
200
204
|
}
|
|
201
205
|
};
|
|
202
206
|
}
|
|
203
|
-
async resolveModelGpuLayers(gpuLayers, { ignoreMemorySafetyChecks = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading, defaultContextFlashAttention = false, useMmap = this._ggufInsights._llama.supportsMmap } = {}) {
|
|
207
|
+
async resolveModelGpuLayers(gpuLayers, { ignoreMemorySafetyChecks = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading, defaultContextFlashAttention = false, defaultContextSwaFullCache = false, useMmap = this._ggufInsights._llama.supportsMmap } = {}) {
|
|
204
208
|
return resolveModelGpuLayersOption(gpuLayers, {
|
|
205
209
|
ggufInsights: this._ggufInsights,
|
|
206
210
|
ignoreMemorySafetyChecks,
|
|
@@ -209,6 +213,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
209
213
|
llamaGpu,
|
|
210
214
|
llamaSupportsGpuOffloading,
|
|
211
215
|
defaultContextFlashAttention,
|
|
216
|
+
defaultContextSwaFullCache,
|
|
212
217
|
useMmap
|
|
213
218
|
});
|
|
214
219
|
}
|
|
@@ -217,7 +222,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
217
222
|
*
|
|
218
223
|
* If there's no context size that can fit the available resources, an `InsufficientMemoryError` is thrown.
|
|
219
224
|
*/
|
|
220
|
-
async resolveContextContextSize(contextSize, { modelGpuLayers, batchSize, modelTrainContextSize, flashAttention = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaGpu = this._ggufInsights._llama.gpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, sequences = getDefaultContextSequences() }) {
|
|
225
|
+
async resolveContextContextSize(contextSize, { modelGpuLayers, batchSize, modelTrainContextSize, flashAttention = false, swaFullCache = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => this._ggufInsights._llama._ramOrchestrator.getMemoryState()), getSwapState = (() => this._ggufInsights._llama._swapOrchestrator.getMemoryState()), llamaGpu = this._ggufInsights._llama.gpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, sequences = getDefaultContextSequences() }) {
|
|
221
226
|
return await resolveContextContextSizeOption({
|
|
222
227
|
contextSize,
|
|
223
228
|
batchSize,
|
|
@@ -226,6 +231,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
226
231
|
modelGpuLayers,
|
|
227
232
|
modelTrainContextSize,
|
|
228
233
|
flashAttention,
|
|
234
|
+
swaFullCache,
|
|
229
235
|
getVramState,
|
|
230
236
|
getRamState,
|
|
231
237
|
getSwapState,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GgufInsightsConfigurationResolver.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsConfigurationResolver.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,0BAA0B,EAAC,MAAM,8CAA8C,CAAC;AACxF,OAAO,EAAC,uBAAuB,EAAC,MAAM,wCAAwC,CAAC;AAC/E,OAAO,EAAC,2BAA2B,EAAC,MAAM,wCAAwC,CAAC;AACnF,OAAO,EAAC,+BAA+B,EAAC,MAAM,4CAA4C,CAAC;AAC3F,OAAO,EAAC,WAAW,EAAC,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAC,0BAA0B,EAAC,MAAM,uCAAuC,CAAC;AAGjF,MAAM,CAAC,MAAM,4CAA4C,GAAG,IAAI,CAAC;AACjE,MAAM,kDAAkD,GAAG,IAAI,CAAC;AAGhE,MAAM,OAAO,iCAAiC;IAC1C,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,YAAY;QACnB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED;;;;;;;;;OASG;IACI,KAAK,CAAC,qBAAqB,CAAC,EAC/B,eAAe,EACf,iBAAiB,EACjB,gBAAgB,GAAG,KAAK,EACxB,cAAc,GAAG,KAAK,EACtB,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,YAAY,
|
|
1
|
+
{"version":3,"file":"GgufInsightsConfigurationResolver.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsConfigurationResolver.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,0BAA0B,EAAC,MAAM,8CAA8C,CAAC;AACxF,OAAO,EAAC,uBAAuB,EAAC,MAAM,wCAAwC,CAAC;AAC/E,OAAO,EAAC,2BAA2B,EAAC,MAAM,wCAAwC,CAAC;AACnF,OAAO,EAAC,+BAA+B,EAAC,MAAM,4CAA4C,CAAC;AAC3F,OAAO,EAAC,WAAW,EAAC,MAAM,wBAAwB,CAAC;AACnD,OAAO,EAAC,0BAA0B,EAAC,MAAM,uCAAuC,CAAC;AAGjF,MAAM,CAAC,MAAM,4CAA4C,GAAG,IAAI,CAAC;AACjE,MAAM,kDAAkD,GAAG,IAAI,CAAC;AAGhE,MAAM,OAAO,iCAAiC;IAC1C,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,YAAY;QACnB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED;;;;;;;;;OASG;IACI,KAAK,CAAC,qBAAqB,CAAC,EAC/B,eAAe,EACf,iBAAiB,EACjB,gBAAgB,GAAG,KAAK,EACxB,cAAc,GAAG,KAAK,EACtB,YAAY,GAAG,KAAK,EACpB,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,YAAY,KAQhD,EAAE,EAAE,EACJ,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,gBAAgB,CAAC,cAAc,EAAE,CAAC,EACvF,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,eAAe,EAChE,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EACxC,0BAA0B,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,qBAAqB,KAQ5E,EAAE;QACF,MAAM,kBAAkB,GAAG,MAAM,IAAI,CAAC,oCAAoC,CAAC;YACvE,cAAc;YACd,YAAY;YACZ,WAAW,EAAE,iBAAiB;YAC9B,gBAAgB;YAChB,cAAc,EAAE,eAAe;YAC/B,sBAAsB,EAAE,iBAAiB,IAAI,IAAI;YACjD,OAAO;SACV,EAAE;YACC,YAAY;YACZ,WAAW;YACX,YAAY;YACZ,oBAAoB;YACpB,QAAQ;YACR,0BAA0B;SAC7B,CAAC,CAAC;QAEH,OAAO,kBAAkB,CAAC;IAC9B,CAAC;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACI,KAAK,CAAC,oCAAoC,CAAC,EAC9C,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB,IAAI,IAAI,CAAC,EACzE,gBAAgB,GAAG,KAAK,EACxB,cAAc,GAAG,KAAK,EACtB,YAAY,GAAG,KAAK,EACpB,kCAAkC,GAAG,GAAG,EACxC,2CAA2C,GAAG,GAAG,EACjD,sBAAsB,GAAG,KAAK,EAC9B,cAAc,EACd,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,YAAY,KAkBhD,EAAE,EAAE,EACJ,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,gBAAgB,CAAC,cAAc,EAAE,CAAC,EACvF,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,eAAe,EAChE,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EACxC,0BAA0B,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,qBAAqB,KAQ5E,EAAE;QAiCF,MAAM,CACF,SAAS,EACT,QAAQ,EACR,SAAS,CACZ,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAClB,YAAY,EAAE;YACd,WAAW,EAAE;YACb,YAAY,EAAE;SACjB,CAAC,CAAC;QACH,IAAI,iBAAiB,GAAG,CAAC,cAAc,IAAI,IAAI,IAAI,cAAc,IAAI,KAAK,CAAC;YACvE,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,WAAW;YAC/B,CAAC,CAAC,cAAc,CAAC;QACrB,IAAI,kBAAkB,GAAG,KAAK,CAAC;QAE/B,IAAI,CAAC;YACD,iBAAiB,GAAG,MAAM,IAAI,CAAC,qBAAqB,CAChD,cAAc,IAAI,IAAI;gBAClB,CAAC,CAAC,cAAc;gBAChB,CAAC,CAAC,gBAAgB;oBACd,CAAC,CAAC;wBACE,UAAU,EAAE;4BACR,gBAAgB,EAAE,IAAI;4BACtB,WAAW,EAAE,sBAAsB;gCAC/B,CAAC,CAAC,WAAW;gCACb,CAAC,CAAC,SAAS;yBAClB;qBACJ;oBACD,CAAC,CAAC,sBAAsB,IAAI,IAAI;wBAC5B,CAAC,CAAC,EAAC,UAAU,EAAE,EAAC,WAAW,EAAC,EAAC;wBAC7B,CAAC,CAAC,MAAM,EACpB;gBACI,YAAY,EAAE,KAAK,IAAI,EAAE,CAAC,SAAS;gBACnC,oBAAoB;gBACpB,QAAQ;gBACR,0BAA0B;gBAC1B,4BAA4B,EAAE,cAAc;gBAC5C,0BAA0B,EAAE,YAAY;gBACxC,wBAAwB,EAAE,cAAc,IAAI,IAAI;gBAChD,OAAO;aACV,CACJ,CAAC;YACF,kBAAkB,GAAG,IAAI,CAAC;QAC9B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,CAAC,CAAC,GAAG,YAAY,uBAAuB,CAAC;gBACzC,MAAM,GAAG,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,0BAA0B,IAAI,QAAQ,KAAK,KAAK,CAAC;QACnE,MAAM,2BAA2B,GAAG,IAAI,CAAC,aAAa,CAAC,iCAAiC,CAAC;YACrF,SAAS,EAAE,iBAAiB;YAC5B,OAAO;SACV,CAAC,CAAC;QAEH,IAAI,mBAAmB,GAAG,IAAI,CAAC,GAAG,CAC9B,IAAI,CAAC,YAAY,CAAC,gBAAgB,IAAI,kDAAkD,EACxF,kDAAkD,CACrD,CAAC;QACF,IAAI,iBAAiB,GAAG,KAAK,CAAC;QAE9B,IAAI,CAAC;YACD,mBAAmB,GAAG,MAAM,IAAI,CAAC,yBAAyB,CAAC,MAAM,EAAE;gBAC/D,YAAY,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;oBACvB,KAAK,EAAE,SAAS,CAAC,KAAK;oBACtB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,IAAI,GAAG,2BAA2B,CAAC,OAAO,CAAC;oBACvE,WAAW,EAAE,SAAS,CAAC,WAAW;iBACrC,CAAC;gBACF,WAAW,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;oBACtB,KAAK,EAAE,QAAQ,CAAC,KAAK;oBACrB,IAAI,EAAE,IAAI,CAAC,GAAG,CACV,CAAC,EACD,QAAQ,CAAC,IAAI,GAAG,2BAA2B,CAAC,MAAM;wBAClD,CAAC,CAAC,0BAA0B,CAAC,2BAA2B,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAChF;iBACJ,CAAC;gBACF,YAAY,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;oBACvB,KAAK,EAAE,SAAS,CAAC,KAAK;oBACtB,IAAI,EAAE,IAAI,CAAC,GAAG,CACV,CAAC,EACD,SAAS,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CACrB,CAAC,EACD,2BAA2B,CAAC,MAAM;wBAClC,CAAC,CAAC,0BAA0B,CAAC,2BAA2B,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;wBAC7E,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CACnB,CACJ;iBACJ,CAAC;gBACF,QAAQ;gBACR,kBAAkB,EAAE,gBAAgB;gBACpC,cAAc,EAAE,iBAAiB;gBACjC,qBAAqB,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB,IAAI,4CAA4C;gBAC1G,wBAAwB,EAAE,sBAAsB;gBAChD,cAAc;gBACd,YAAY;aACf,CAAC,CAAC;YACH,iBAAiB,GAAG,IAAI,CAAC;QAC7B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,IAAI,CAAC,CAAC,GAAG,YAAY,uBAAuB,CAAC;gBACzC,MAAM,GAAG,CAAC;QAClB,CAAC;QAED,MAAM,6BAA6B,GAAG,IAAI,CAAC,aAAa,CAAC,mCAAmC,CAAC;YACzF,WAAW,EAAE,mBAAmB;YAChC,kBAAkB,EAAE,gBAAgB;YACpC,cAAc,EAAE,iBAAiB;YACjC,cAAc;YACd,YAAY;SACf,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG;YACf,SAAS,EAAE,EAAE;YACb,qBAAqB,EAAE,EAAE;YACzB,WAAW,EAAE,EAAE;YACf,iBAAiB,EAAE,EAAE;YACrB,qBAAqB,EAAE,EAAE,EAAE,sDAAsD;YACjF,gBAAgB,EAAE,EAAE;SACd,CAAC;QAEX,MAAM,eAAe,GAAG,UAAU,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,iBAAiB,GAAG,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QAC/G,MAAM,2BAA2B,GAAG,UAAU,CAAC,qBAAqB,GAAG,CACnE,iBAAiB,KAAK,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAC/D,CAAC;QACF,MAAM,iBAAiB,GAAG,iBAAiB;YACvC,CAAC,CAAC,UAAU,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,GAAG,WAAW,CAAC;YACzE,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,uBAAuB,GAAG,UAAU,CAAC,iBAAiB,GAAG,CAC3D,2BAA2B,CAAC,MAAM,IAAI,QAAQ,CAAC,IAAI;YAC/C,CAAC,CAAC,CAAC;YACH,CAAC,CAAC,2BAA2B,CAAC,MAAM,IAAI,QAAQ,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI;gBAClE,CAAC,CAAC,GAAG;gBACL,CAAC,CAAC,2BAA2B,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK;oBAClD,CAAC,CAAC,GAAG;oBACL,CAAC,CAAC,CACE,GAAG,GAAG,IAAI,CAAC,GAAG,CACV,GAAG,EACH,GAAG,GAAG,CACF,CAAC,2BAA2B,CAAC,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,QAAQ,CAAC,KAAK,CACzE,CACJ,CACJ,CAChB,CAAC;QACF,MAAM,sBAAsB,GAAG,iBAAiB;YAC5C,CAAC,CAAC,CACE,EAAE,GAAG,IAAI,CAAC,GAAG,CACT,CAAC,EACD,CACI,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,GAAG,WAAW,CAAC,GAAG,WAAW,CAC/D,GAAG,kCAAkC,CACzC,CACJ;YACD,CAAC,CAAC,CAAC,CAAC;QAER,IAAI,kBAAkB,GAAG,SAAS;YAC9B,CAAC,CAAC,CACE,CAAC,eAAe,GAAG,2BAA2B,GAAG,iBAAiB,GAAG,uBAAuB,CAAC;gBAC7F,CAAC,UAAU,CAAC,SAAS,GAAG,UAAU,CAAC,qBAAqB,GAAG,UAAU,CAAC,WAAW,GAAG,UAAU,CAAC,iBAAiB,CAAC,CACpH;YACD,CAAC,CAAC,CACE,CAAC,iBAAiB,GAAG,uBAAuB,GAAG,6BAA6B,CAAC,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;gBAC3G,CAAC,UAAU,CAAC,WAAW,GAAG,UAAU,CAAC,iBAAiB,GAAG,UAAU,CAAC,qBAAqB,CAAC,CAAC,CAAC;QACpG,IAAI,UAAU,GAAG,sBAAsB,GAAG,UAAU,CAAC,gBAAgB,CAAC;QAEtE,IAAI,CAAC,kBAAkB,IAAI,CAAC,iBAAiB;YACzC,2BAA2B,CAAC,OAAO,GAAG,6BAA6B,CAAC,OAAO,GAAG,SAAS,CAAC,KAAK;YAC7F,2BAA2B,CAAC,MAAM,GAAG,6BAA6B,CAAC,MAAM,GAAG,QAAQ,CAAC,KAAK,GAAG,SAAS,CAAC,KAAK,EAC9G,CAAC;YACC,MAAM,oBAAoB,GAAG,2BAA2B,CAAC,OAAO,GAAG,6BAA6B,CAAC,OAAO,CAAC;YACzG,MAAM,mBAAmB,GAAG,2BAA2B,CAAC,MAAM,GAAG,6BAA6B,CAAC,MAAM,CAAC;YAEtG,kBAAkB,GAAG,CAAC,CAAC;YACvB,UAAU,GAAG,CACT,CAAC,CAAC,GAAG,CAAC,oBAAoB,GAAG,CAAC,SAAS,CAAC,KAAK,GAAG,2CAA2C,CAAC,CAAC,CAAC;gBAC9F,CAAC,CAAC,GAAG,CAAC,mBAAmB,GAAG,CAAC,CAAC,QAAQ,CAAC,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,2CAA2C,CAAC,CAAC,CAAC,CACnH,GAAG,CAAC,CAAC;QACV,CAAC;QAED,OAAO;YACH,kBAAkB;YAClB,UAAU;YACV,UAAU,EAAE,kBAAkB,GAAG,UAAU;YAE3C,cAAc,EAAE;gBACZ,SAAS,EAAE,iBAAiB;gBAC5B,WAAW,EAAE,mBAAmB;gBAEhC,aAAa,EAAE,2BAA2B,CAAC,MAAM;gBACjD,eAAe,EAAE,6BAA6B,CAAC,MAAM;gBACrD,aAAa,EAAE,2BAA2B,CAAC,MAAM,GAAG,6BAA6B,CAAC,MAAM;gBAExF,cAAc,EAAE,2BAA2B,CAAC,OAAO;gBACnD,gBAAgB,EAAE,6BAA6B,CAAC,OAAO;gBACvD,cAAc,EAAE,2BAA2B,CAAC,OAAO,GAAG,6BAA6B,CAAC,OAAO;aAC9F;SACJ,CAAC;IACN,CAAC;IAEM,KAAK,CAAC,qBAAqB,CAAC,SAA0C,EAAE,EAC3E,wBAAwB,GAAG,KAAK,EAChC,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,eAAe,EAAE,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EAC1G,0BAA0B,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,qBAAqB,EAC5E,4BAA4B,GAAG,KAAK,EACpC,0BAA0B,GAAG,KAAK,EAClC,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,YAAY,KAKhD,EAAE;QACF,OAAO,2BAA2B,CAAC,SAAS,EAAE;YAC1C,YAAY,EAAE,IAAI,CAAC,aAAa;YAChC,wBAAwB;YACxB,YAAY;YACZ,oBAAoB;YACpB,QAAQ;YACR,0BAA0B;YAC1B,4BAA4B;YAC5B,0BAA0B;YAC1B,OAAO;SACV,CAAC,CAAC;IACP,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,yBAAyB,CAAC,WAA+C,EAAE,EACpF,cAAc,EACd,SAAS,EACT,qBAAqB,EACrB,cAAc,GAAG,KAAK,EACtB,YAAY,GAAG,KAAK,EACpB,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,gBAAgB,CAAC,cAAc,EAAE,CAAC,EACvF,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EACxC,wBAAwB,GAAG,KAAK,EAChC,kBAAkB,GAAG,KAAK,EAC1B,SAAS,GAAG,0BAA0B,EAAE,EAc3C;QACG,OAAO,MAAM,+BAA+B,CAAC;YACzC,WAAW;YACX,SAAS;YACT,SAAS;YACT,iBAAiB,EAAE,IAAI,CAAC,aAAa;YACrC,cAAc;YACd,qBAAqB;YACrB,cAAc;YACd,YAAY;YACZ,YAAY;YACZ,WAAW;YACX,YAAY;YACZ,QAAQ;YACR,wBAAwB;YACxB,kBAAkB;SACrB,CAAC,CAAC;IACP,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,YAA0B;QAC5C,OAAO,IAAI,iCAAiC,CAAC,YAAY,CAAC,CAAC;IAC/D,CAAC;CACJ;AAED,SAAS,6BAA6B,CAAC,SAAiB;IACpD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC/B,OAAO,EAAE,GAAG,WAAW,CAAC,SAAS,EAAE,CAAC;YAChC,KAAK,EAAE,IAAI;YACX,GAAG,EAAE,IAAI,GAAG,GAAG;YACf,MAAM,EAAE,EAAE;SACb,EAAE;YACC,KAAK,EAAE,IAAI,GAAG,GAAG;YACjB,GAAG,EAAE,IAAI,GAAG,CAAC;YACb,MAAM,EAAE,EAAE;SACb,EAAE;YACC,KAAK,EAAE,IAAI,GAAG,CAAC;YACf,MAAM,EAAE,CAAC;SACZ,CAAC,CAAC,CAAC;AACR,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { LlamaContextOptions } from "../../../evaluator/LlamaContext/types.js";
|
|
2
2
|
import { GgufInsights } from "../GgufInsights.js";
|
|
3
3
|
import { BuildGpu } from "../../../bindings/types.js";
|
|
4
|
-
export declare function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks, isEmbeddingContext, maxContextSizeSwapUse }: {
|
|
4
|
+
export declare function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks, isEmbeddingContext, maxContextSizeSwapUse }: {
|
|
5
5
|
contextSize?: LlamaContextOptions["contextSize"];
|
|
6
6
|
batchSize?: LlamaContextOptions["batchSize"];
|
|
7
7
|
sequences: number;
|
|
@@ -9,6 +9,7 @@ export declare function resolveContextContextSizeOption({ contextSize, batchSize
|
|
|
9
9
|
modelGpuLayers: number;
|
|
10
10
|
modelTrainContextSize: number;
|
|
11
11
|
flashAttention: boolean;
|
|
12
|
+
swaFullCache: boolean;
|
|
12
13
|
getVramState(): Promise<{
|
|
13
14
|
total: number;
|
|
14
15
|
free: number;
|
|
@@ -3,7 +3,7 @@ import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../..
|
|
|
3
3
|
import { InsufficientMemoryError } from "../../../utils/InsufficientMemoryError.js";
|
|
4
4
|
import { getRamUsageFromUnifiedVram } from "./getRamUsageFromUnifiedVram.js";
|
|
5
5
|
const defaultMaxContextSizeSwapUse = 2048;
|
|
6
|
-
export async function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, maxContextSizeSwapUse = defaultMaxContextSizeSwapUse }) {
|
|
6
|
+
export async function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, flashAttention, swaFullCache, getVramState, getRamState, getSwapState, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, maxContextSizeSwapUse = defaultMaxContextSizeSwapUse }) {
|
|
7
7
|
if (contextSize == null)
|
|
8
8
|
contextSize = "auto";
|
|
9
9
|
if (typeof contextSize === "number") {
|
|
@@ -21,6 +21,7 @@ export async function resolveContextContextSizeOption({ contextSize, batchSize,
|
|
|
21
21
|
modelGpuLayers: modelGpuLayers,
|
|
22
22
|
sequences,
|
|
23
23
|
flashAttention,
|
|
24
|
+
swaFullCache,
|
|
24
25
|
isEmbeddingContext
|
|
25
26
|
});
|
|
26
27
|
if (contextResourceRequirements.gpuVram > vramState.free)
|
|
@@ -50,6 +51,7 @@ export async function resolveContextContextSizeOption({ contextSize, batchSize,
|
|
|
50
51
|
modelGpuLayers: modelGpuLayers,
|
|
51
52
|
sequences,
|
|
52
53
|
flashAttention,
|
|
54
|
+
swaFullCache,
|
|
53
55
|
isEmbeddingContext
|
|
54
56
|
});
|
|
55
57
|
if (contextResourceRequirements.gpuVram <= vramState.free &&
|
|
@@ -88,6 +90,7 @@ export async function resolveContextContextSizeOption({ contextSize, batchSize,
|
|
|
88
90
|
modelGpuLayers: modelGpuLayers,
|
|
89
91
|
sequences,
|
|
90
92
|
flashAttention,
|
|
93
|
+
swaFullCache,
|
|
91
94
|
isEmbeddingContext
|
|
92
95
|
});
|
|
93
96
|
const unifiedRamUsage = getRamUsageFromUnifiedVram(minContextSizeResourceRequirements.gpuVram, vramState);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveContextContextSizeOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveContextContextSizeOption.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,0BAA0B,EAAC,MAAM,iCAAiC,CAAC;AAE3E,MAAM,4BAA4B,GAAG,IAAI,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,+BAA+B,CAAC,EAClD,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE,iBAAiB,EAAE,cAAc,EAAE,qBAAqB,EAAE,cAAc,
|
|
1
|
+
{"version":3,"file":"resolveContextContextSizeOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveContextContextSizeOption.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,0BAA0B,EAAC,MAAM,iCAAiC,CAAC;AAE3E,MAAM,4BAA4B,GAAG,IAAI,CAAC;AAE1C,MAAM,CAAC,KAAK,UAAU,+BAA+B,CAAC,EAClD,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE,iBAAiB,EAAE,cAAc,EAAE,qBAAqB,EAAE,cAAc,EAAE,YAAY,EACzH,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,kBAAkB,GAAG,KAAK,EACrG,qBAAqB,GAAG,4BAA4B,EAiBvD;IACG,IAAI,WAAW,IAAI,IAAI;QACnB,WAAW,GAAG,MAAM,CAAC;IAEzB,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;QAClC,MAAM,mBAAmB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;QAEjE,IAAI,wBAAwB;YACxB,OAAO,mBAAmB,CAAC;QAE/B,MAAM,CACF,SAAS,EACT,QAAQ,EACR,SAAS,CACZ,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAClB,YAAY,EAAE;YACd,WAAW,EAAE;YACb,YAAY,EAAE;SACjB,CAAC,CAAC;QACH,MAAM,2BAA2B,GAAG,iBAAiB,CAAC,mCAAmC,CAAC;YACtF,WAAW,EAAE,mBAAmB;YAChC,SAAS,EAAE,SAAS,IAAI,0BAA0B,CAAC,EAAC,WAAW,EAAE,mBAAmB,EAAE,SAAS,EAAC,CAAC;YACjG,cAAc,EAAE,cAAc;YAC9B,SAAS;YACT,cAAc;YACd,YAAY;YACZ,kBAAkB;SACrB,CAAC,CAAC;QAEH,IAAI,2BAA2B,CAAC,OAAO,GAAG,SAAS,CAAC,IAAI;YACpD,MAAM,IAAI,uBAAuB,CAAC,qBAAqB,mBAAmB,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,sCAAsC,CAAC,CAAC;aACvK,IAAI,2BAA2B,CAAC,MAAM,GAAG,CAC1C,QAAQ,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,GAAG,0BAA0B,CAAC,2BAA2B,CAAC,OAAO,EAAE,SAAS,CAAC,CAC9G;YACG,MAAM,IAAI,uBAAuB,CAAC,qBAAqB,mBAAmB,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,sCAAsC,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAE5N,OAAO,mBAAmB,CAAC;IAC/B,CAAC;SAAM,IAAI,WAAW,KAAK,MAAM,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;QACnE,MAAM,CACF,SAAS,EACT,QAAQ,EACR,SAAS,CACZ,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAClB,YAAY,EAAE;YACd,WAAW,EAAE;YACb,YAAY,EAAE;SACjB,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,WAAW,KAAK,MAAM;YACzC,CAAC,CAAC,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC;YACvE,CAAC,CAAC,IAAI,CAAC,GAAG,CACN,WAAW,CAAC,GAAG,IAAI,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC,EACxF,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC,CACxE,CAAC;QAEN,MAAM,cAAc,GAAG,WAAW,KAAK,MAAM;YACzC,CAAC,CAAC,mCAAmC;YACrC,CAAC,CAAC,IAAI,CAAC,GAAG,CACN,WAAW,CAAC,GAAG,IAAI,mCAAmC,EACtD,mCAAmC,CACtC,CAAC;QAEN,IAAI,4BAA4B,GAAkB,IAAI,CAAC;QACvD,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,cAAc,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC3E,KAAK,IAAI,eAAe,GAAG,cAAc,EAAE,eAAe,IAAI,cAAc,IAAI,eAAe,IAAI,cAAc,GAAG,CAAC;YACjH,MAAM,2BAA2B,GAAG,iBAAiB,CAAC,mCAAmC,CAAC;gBACtF,WAAW,EAAE,eAAe;gBAC5B,SAAS,EAAE,SAAS,IAAI,0BAA0B,CAAC,EAAC,WAAW,EAAE,eAAe,EAAE,SAAS,EAAC,CAAC;gBAC7F,cAAc,EAAE,cAAc;gBAC9B,SAAS;gBACT,cAAc;gBACd,YAAY;gBACZ,kBAAkB;aACrB,CAAC,CAAC;YAEH,IAAI,2BAA2B,CAAC,OAAO,IAAI,SAAS,CAAC,IAAI;gBACrD,2BAA2B,CAAC,MAAM,IAAI,CAClC,QAAQ,CAAC,IAAI,GAAG,0BAA0B,CAAC,2BAA2B,CAAC,OAAO,EAAE,SAAS,CAAC,GAAG,CACzF,eAAe,IAAI,qBAAqB;oBACpC,CAAC,CAAC,SAAS,CAAC,IAAI;oBAChB,CAAC,CAAC,CAAC,CACV,CACJ,EACH,CAAC;gBACC,IAAI,4BAA4B,IAAI,IAAI,IAAI,eAAe,IAAI,4BAA4B,EAAE,CAAC;oBAC1F,4BAA4B,GAAG,eAAe,CAAC;oBAE/C,IAAI,IAAI,KAAK,CAAC,CAAC;wBACX,MAAM;yBACL,IAAI,IAAI,GAAG,CAAC;wBACb,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClD,CAAC;YACL,CAAC;iBAAM,IAAI,IAAI,GAAG,CAAC;gBACf,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;YAE9C,IAAI,eAAe,IAAI,cAAc,IAAI,IAAI,KAAK,CAAC,CAAC;gBAChD,MAAM;YAEV,eAAe,IAAI,IAAI,CAAC;YACxB,IAAI,eAAe,GAAG,cAAc,EAAE,CAAC;gBACnC,eAAe,GAAG,cAAc,CAAC;gBACjC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvD,CAAC;iBAAM,IAAI,eAAe,GAAG,cAAc,EAAE,CAAC;gBAC1C,eAAe,GAAG,cAAc,CAAC;gBACjC,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACxD,CAAC;QACL,CAAC;QAED,IAAI,4BAA4B,IAAI,IAAI;YACpC,OAAO,4BAA4B,CAAC;QAExC,IAAI,wBAAwB;YACxB,OAAO,cAAc,CAAC;QAE1B,MAAM,kCAAkC,GAAG,iBAAiB,CAAC,mCAAmC,CAAC;YAC7F,WAAW,EAAE,cAAc;YAC3B,SAAS,EAAE,SAAS,IAAI,0BAA0B,CAAC,EAAC,WAAW,EAAE,cAAc,EAAE,SAAS,EAAC,CAAC;YAC5F,cAAc,EAAE,cAAc;YAC9B,SAAS;YACT,cAAc;YACd,YAAY;YACZ,kBAAkB;SACrB,CAAC,CAAC;QAEH,MAAM,eAAe,GAAG,0BAA0B,CAAC,kCAAkC,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC1G,IAAI,kCAAkC,CAAC,OAAO,GAAG,SAAS,CAAC,IAAI;YAC3D,kCAAkC,CAAC,MAAM,GAAG,QAAQ,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,GAAG,eAAe;YAE5F,MAAM,IAAI,uBAAuB,CAAC,qBAAqB,cAAc,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,+CAA+C,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;aAC3N,IAAI,kCAAkC,CAAC,OAAO,GAAG,SAAS,CAAC,IAAI;YAChE,MAAM,IAAI,uBAAuB,CAAC,qBAAqB,cAAc,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,sCAAsC,CAAC,CAAC;aAClK,IAAI,kCAAkC,CAAC,MAAM,GAAG,QAAQ,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,GAAG,eAAe;YACjG,MAAM,IAAI,uBAAuB,CAAC,qBAAqB,cAAc,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,sCAAsC,SAAS,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;aAClN,IAAI,kCAAkC,CAAC,MAAM,GAAG,QAAQ,CAAC,IAAI,GAAG,eAAe;YAChF,MAAM,IAAI,uBAAuB,CAAC,qBAAqB,cAAc,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,qCAAqC,CAAC,CAAC;;YAElK,MAAM,IAAI,uBAAuB,CAAC,qBAAqB,cAAc,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,2CAA2C,CAAC,CAAC;IAChL,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,WAAW,GAAG,CAAC,CAAC;AAC9D,CAAC"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { LlamaModelOptions } from "../../../evaluator/LlamaModel/LlamaModel.js";
|
|
2
2
|
import { BuildGpu } from "../../../bindings/types.js";
|
|
3
3
|
import type { GgufInsights } from "../GgufInsights.js";
|
|
4
|
-
export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions["gpuLayers"], { ggufInsights, ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, useMmap }: {
|
|
4
|
+
export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions["gpuLayers"], { ggufInsights, ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }: {
|
|
5
5
|
ggufInsights: GgufInsights;
|
|
6
6
|
ignoreMemorySafetyChecks?: boolean;
|
|
7
7
|
getVramState(): Promise<{
|
|
@@ -12,5 +12,6 @@ export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions
|
|
|
12
12
|
llamaGpu: BuildGpu;
|
|
13
13
|
llamaSupportsGpuOffloading: boolean;
|
|
14
14
|
defaultContextFlashAttention: boolean;
|
|
15
|
+
defaultContextSwaFullCache: boolean;
|
|
15
16
|
useMmap?: boolean;
|
|
16
17
|
}): Promise<number>;
|
|
@@ -4,7 +4,7 @@ import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../..
|
|
|
4
4
|
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
|
5
5
|
import { scoreLevels } from "./scoreLevels.js";
|
|
6
6
|
const fitContextExtraMemoryPaddingPercentage = 0.5;
|
|
7
|
-
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, useMmap }) {
|
|
7
|
+
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
|
|
8
8
|
if (gpuLayers == null)
|
|
9
9
|
gpuLayers = "auto";
|
|
10
10
|
if (!llamaSupportsGpuOffloading)
|
|
@@ -21,6 +21,7 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
21
21
|
ggufInsights,
|
|
22
22
|
currentVram: vramState.free,
|
|
23
23
|
defaultContextFlashAttention,
|
|
24
|
+
defaultContextSwaFullCache,
|
|
24
25
|
useMmap
|
|
25
26
|
});
|
|
26
27
|
if (maxLayersRequirements == null)
|
|
@@ -52,6 +53,7 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
52
53
|
? gpuLayers.max
|
|
53
54
|
: undefined,
|
|
54
55
|
defaultContextFlashAttention,
|
|
56
|
+
defaultContextSwaFullCache,
|
|
55
57
|
useMmap
|
|
56
58
|
});
|
|
57
59
|
const hasGpuLayersRequirements = typeof gpuLayers === "object" &&
|
|
@@ -62,7 +64,7 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
|
|
|
62
64
|
}
|
|
63
65
|
throw new Error(`Invalid gpuLayers value: ${gpuLayers}`);
|
|
64
66
|
}
|
|
65
|
-
function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention, useMmap }) {
|
|
67
|
+
function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention, defaultContextSwaFullCache, useMmap }) {
|
|
66
68
|
return findBestOption({
|
|
67
69
|
*generator() {
|
|
68
70
|
const minLayers = Math.floor(Math.max(0, minGpuLayers ?? 0));
|
|
@@ -80,6 +82,7 @@ function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGp
|
|
|
80
82
|
currentVram: freeVram,
|
|
81
83
|
fitContext,
|
|
82
84
|
defaultContextFlashAttention,
|
|
85
|
+
defaultContextSwaFullCache,
|
|
83
86
|
useMmap
|
|
84
87
|
});
|
|
85
88
|
if (layersRequirements == null)
|
|
@@ -127,7 +130,7 @@ function scoreGpuLayersAndContextCombination({ gpuLayers, contextSize }, { total
|
|
|
127
130
|
}
|
|
128
131
|
return scoreGpuLayers() + scoreContextSize();
|
|
129
132
|
}
|
|
130
|
-
function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false, useMmap }) {
|
|
133
|
+
function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false, defaultContextSwaFullCache = false, useMmap }) {
|
|
131
134
|
const modelVram = ggufInsights.estimateModelResourceRequirements({
|
|
132
135
|
gpuLayers,
|
|
133
136
|
useMmap
|
|
@@ -141,7 +144,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
141
144
|
modelGpuLayers: gpuLayers,
|
|
142
145
|
sequences: 1,
|
|
143
146
|
isEmbeddingContext: fitContext.embeddingContext ?? false,
|
|
144
|
-
flashAttention: defaultContextFlashAttention
|
|
147
|
+
flashAttention: defaultContextFlashAttention,
|
|
148
|
+
swaFullCache: defaultContextSwaFullCache
|
|
145
149
|
}).gpuVram;
|
|
146
150
|
const totalVram = modelVram + contextVram;
|
|
147
151
|
if (totalVram > currentVram)
|
|
@@ -157,7 +161,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
157
161
|
ggufInsights,
|
|
158
162
|
vram: currentVram - modelVram,
|
|
159
163
|
isEmbeddingContext: fitContext?.embeddingContext ?? false,
|
|
160
|
-
flashAttention: defaultContextFlashAttention
|
|
164
|
+
flashAttention: defaultContextFlashAttention,
|
|
165
|
+
swaFullCache: defaultContextSwaFullCache
|
|
161
166
|
});
|
|
162
167
|
if (maxContext == null || modelVram + maxContext.vram > currentVram)
|
|
163
168
|
return null;
|
|
@@ -167,7 +172,7 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
|
|
|
167
172
|
totalVram: modelVram + maxContext.vram
|
|
168
173
|
};
|
|
169
174
|
}
|
|
170
|
-
function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention }) {
|
|
175
|
+
function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention, swaFullCache }) {
|
|
171
176
|
const maxContextSize = getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize });
|
|
172
177
|
return findMaxValidValue({
|
|
173
178
|
maxValue: maxContextSize,
|
|
@@ -180,7 +185,8 @@ function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEm
|
|
|
180
185
|
modelGpuLayers: gpuLayers,
|
|
181
186
|
sequences: 1,
|
|
182
187
|
isEmbeddingContext,
|
|
183
|
-
flashAttention
|
|
188
|
+
flashAttention,
|
|
189
|
+
swaFullCache
|
|
184
190
|
}).gpuVram;
|
|
185
191
|
if (contextVram <= vram)
|
|
186
192
|
return {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,EAAE,4BAA4B,EAAE,OAAO,
|
|
1
|
+
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,EAAE,4BAA4B,EAAE,0BAA0B,EAAE,OAAO,EAK1G;IACG,IAAI,SAAS,IAAI,IAAI;QACjB,SAAS,GAAG,MAAM,CAAC;IAEvB,IAAI,CAAC,0BAA0B;QAC3B,OAAO,CAAC,CAAC;IAEb,IAAI,SAAS,KAAK,KAAK,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvD,MAAM,iBAAiB,GAAG,OAAO,SAAS,KAAK,QAAQ;YACnD,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YAC5D,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC;QAE/B,IAAI,wBAAwB;YACxB,OAAO,iBAAiB,CAAC;QAE7B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,qBAAqB,GAAG,2BAA2B,CAAC;YACtD,SAAS,EAAE,iBAAiB;YAC5B,YAAY;YACZ,WAAW,EAAE,SAAS,CAAC,IAAI;YAC3B,4BAA4B;YAC5B,0BAA0B;YAC1B,OAAO;SACV,CAAC,CAAC;QAEH,IAAI,qBAAqB,IAAI,IAAI;YAC7B,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC/D,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,CAAC,CAAC;QAEb,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,CAAC,CAAC;QAEb,IAAI,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAC9B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7E,QAAQ,IAAI,oBAAoB,GAAG,sCAAsC,CAAC;YAE1E,IAAI,QAAQ,GAAG,CAAC;gBACZ,QAAQ,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,MAAM,mBAAmB,GAAG,2BAA2B,CAAC;YACpD,YAAY;YACZ,QAAQ;YACR,UAAU,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACrC,CAAC,CAAC,SAAS,CAAC,UAAU;gBACtB,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,4BAA4B;YAC5B,0BAA0B;YAC1B,OAAO;SACV,CAAC,CAAC;QAEH,MAAM,wBAAwB,GAAG,OAAO,SAAS,KAAK,QAAQ;YAC1D,CAAC,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,CAAC,CAAC;QAElG,IAAI,CAAC,wBAAwB,IAAI,mBAAmB,IAAI,IAAI,IAAI,wBAAwB;YACpF,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,mBAAmB,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,4BAA4B,EAC5B,0BAA0B,EAC1B,OAAO,EAUV;IACG,OAAO,cAAc,CAAC;QAClB,CAAC,SAAS;YACN,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAE3G,KAAK,IAAI,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;gBACzD,MAAM;oBACF,SAAS,EAAE,MAAM;iBACpB,CAAC;YACN,CAAC;QACL,CAAC;QACD,KAAK,CAAC,MAAM;YACR,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;gBACnD,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,YAAY;gBACZ,WAAW,EAAE,QAAQ;gBACrB,UAAU;gBACV,4BAA4B;gBAC5B,0BAA0B;gBAC1B,OAAO;aACV,CAAC,CAAC;YAEH,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,OAAO,IAAI,CAAC;YAEhB,OAAO,mCAAmC,CAAC,EAAC,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,WAAW,EAAE,kBAAkB,CAAC,WAAW,EAAC,EAAE;gBACnH,cAAc,EAAE,YAAY,CAAC,WAAW;gBACxC,gBAAgB,EAAE,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC;aAClG,CAAC,CAAC;QACP,CAAC;KACJ,CAAC,EAAE,SAAS,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED,SAAS,mCAAmC,CAAC,EAAC,SAAS,EAAE,WAAW,EAA2C,EAAE,EAC7G,cAAc,EAAE,gBAAgB,EAGnC;IACG,SAAS,cAAc;QACnB,OAAO,WAAW,CAAC,SAAS,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,EAAE;aACb,EAAE;gBACC,KAAK,EAAE,cAAc;gBACrB,MAAM,EAAE,EAAE;gBACV,GAAG,EAAE,cAAc;aACtB,CAAC,CAAC,CAAC;IACR,CAAC;IAED,SAAS,gBAAgB;QACrB,MAAM,mBAAmB,GAAG,SAAS,GAAG,cAAc,CAAC;QAEvD,OAAO,WAAW,CAAC,WAAW,EAAE,CAAC;gBAC7B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC;aACzC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,OAAO,cAAc,EAAE,GAAG,gBAAgB,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,4BAA4B,GAAG,KAAK,EAAE,0BAA0B,GAAG,KAAK,EAAE,OAAO,EAItI;IACG,MAAM,SAAS,GAAG,YAAY,CAAC,iCAAiC,CAAC;QAC7D,SAAS;QACT,OAAO;KACV,CAAC,CAAC,OAAO,CAAC;IAEX,IAAI,SAAS,GAAG,WAAW;QACvB,OAAO,IAAI,CAAC;IAEhB,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAC1F,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,UAAU,CAAC,gBAAgB,IAAI,KAAK;YACxD,cAAc,EAAE,4BAA4B;YAC5C,YAAY,EAAE,0BAA0B;SAC3C,CAAC,CAAC,OAAO,CAAC;QAEX,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,CAAC;QAC1C,IAAI,SAAS,GAAG,WAAW;YACvB,OAAO,IAAI,CAAC;QAEhB,OAAO;YACH,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,WAAW;YACX,SAAS;SACZ,CAAC;IACN,CAAC;IAED,MAAM,UAAU,GAAG,iCAAiC,CAAC;QACjD,SAAS;QACT,YAAY;QACZ,IAAI,EAAE,WAAW,GAAG,SAAS;QAC7B,kBAAkB,EAAE,UAAU,EAAE,gBAAgB,IAAI,KAAK;QACzD,cAAc,EAAE,4BAA4B;QAC5C,YAAY,EAAE,0BAA0B;KAC3C,CAAC,CAAC;IAEH,IAAI,UAAU,IAAI,IAAI,IAAI,SAAS,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW;QAC/D,OAAO,IAAI,CAAC;IAEhB,OAAO;QACH,WAAW,EAAE,UAAU,CAAC,WAAW;QACnC,WAAW,EAAE,UAAU,CAAC,IAAI;QAC5B,SAAS,EAAE,SAAS,GAAG,UAAU,CAAC,IAAI;KACzC,CAAC;AACN,CAAC;AAED,SAAS,iCAAiC,CAAC,EAAC,SAAS,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAAE,cAAc,EAAE,YAAY,EAE1H;IACG,MAAM,cAAc,GAAG,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC,CAAC;IAErG,OAAO,iBAAiB,CAAC;QACrB,QAAQ,EAAE,cAAc;QACxB,QAAQ,EAAE,mCAAmC;QAC7C,OAAO,EAAE,CAAC;QACV,IAAI,CAAC,WAAW;YACZ,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;gBACjE,WAAW;gBACX,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;gBAClE,cAAc,EAAE,SAAS;gBACzB,SAAS,EAAE,CAAC;gBACZ,kBAAkB;gBAClB,cAAc;gBACd,YAAY;aACf,CAAC,CAAC,OAAO,CAAC;YAEX,IAAI,WAAW,IAAI,IAAI;gBACnB,OAAO;oBACH,WAAW;oBACX,IAAI,EAAE,WAAW;iBACpB,CAAC;YAEN,OAAO,IAAI,CAAC;QAChB,CAAC;KACJ,CAAC,CAAC;AACP,CAAC;AAED,SAAS,iBAAiB,CAAI,EAC1B,QAAQ,EACR,QAAQ,EACR,OAAO,GAAG,CAAC,EACX,IAAI,EAMP;IACG,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACrE,IAAI,SAAS,GAAsC,IAAI,CAAC;IAExD,KAAK,IAAI,KAAK,GAAG,QAAQ,EAAE,KAAK,IAAI,QAAQ,GAAG,CAAC;QAC5C,MAAM,MAAM,GAAa,CAAC,SAAS,IAAI,IAAI,IAAI,KAAK,KAAK,SAAS,CAAC,KAAK,CAAC;YACrE,CAAC,CAAC,SAAS,CAAC,MAAM;YAClB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAElB,IAAI,MAAM,IAAI,IAAI,EAAE,CAAC;YACjB,IAAI,SAAS,IAAI,IAAI,IAAI,KAAK,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;gBAChD,SAAS,GAAG,EAAC,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAC,CAAC;gBAE3C,IAAI,IAAI,KAAK,CAAC,OAAO;oBACjB,MAAM;qBACL,IAAI,IAAI,GAAG,CAAC;oBACb,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;YACxD,CAAC;QACL,CAAC;aAAM,IAAI,SAAS,IAAI,IAAI,IAAI,KAAK,GAAG,SAAS,CAAC,KAAK,EAAE,CAAC;YACtD,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC;YACxB,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACzD,SAAS;QACb,CAAC;aAAM,IAAI,IAAI,GAAG,CAAC;YACf,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;QAEpD,IAAI,KAAK,KAAK,QAAQ,IAAI,IAAI,KAAK,CAAC,OAAO;YACvC,MAAM;QAEV,KAAK,IAAI,IAAI,CAAC;QACd,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACnB,KAAK,GAAG,QAAQ,CAAC;YACjB,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC7D,CAAC;aAAM,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YAC1B,KAAK,GAAG,QAAQ,CAAC;YACjB,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC9D,CAAC;IACL,CAAC;IAED,IAAI,SAAS,IAAI,IAAI;QACjB,OAAO,SAAS,CAAC,MAAM,CAAC;IAE5B,OAAO,IAAI,CAAC;AAChB,CAAC"}
|
|
@@ -123,7 +123,7 @@ export declare enum GgufFileType {
|
|
|
123
123
|
MOSTLY_Q4_0_4_4 = 33,// deprecated
|
|
124
124
|
MOSTLY_Q4_0_4_8 = 34,// deprecated
|
|
125
125
|
MOSTLY_Q4_0_8_8 = 35,// deprecated
|
|
126
|
-
MOSTLY_TQ1_0 = 36
|
|
126
|
+
MOSTLY_TQ1_0 = 36,
|
|
127
127
|
MOSTLY_TQ2_0 = 37
|
|
128
128
|
}
|
|
129
129
|
export type GgufMetadataGeneral<A extends GgufArchitectureType = GgufArchitectureType> = {
|
|
@@ -283,6 +283,7 @@ export type GgufMetadataDefaultArchitectureType = {
|
|
|
283
283
|
readonly layer_norm_rms_epsilon?: number;
|
|
284
284
|
readonly key_length?: number;
|
|
285
285
|
readonly value_length?: number;
|
|
286
|
+
readonly sliding_window?: number;
|
|
286
287
|
readonly causal?: boolean;
|
|
287
288
|
};
|
|
288
289
|
readonly rope?: {
|
|
@@ -109,7 +109,7 @@ export var GgufFileType;
|
|
|
109
109
|
GgufFileType[GgufFileType["MOSTLY_Q4_0_4_8"] = 34] = "MOSTLY_Q4_0_4_8";
|
|
110
110
|
GgufFileType[GgufFileType["MOSTLY_Q4_0_8_8"] = 35] = "MOSTLY_Q4_0_8_8";
|
|
111
111
|
GgufFileType[GgufFileType["MOSTLY_TQ1_0"] = 36] = "MOSTLY_TQ1_0";
|
|
112
|
-
GgufFileType[GgufFileType["MOSTLY_TQ2_0"] = 37] = "MOSTLY_TQ2_0";
|
|
112
|
+
GgufFileType[GgufFileType["MOSTLY_TQ2_0"] = 37] = "MOSTLY_TQ2_0";
|
|
113
113
|
})(GgufFileType || (GgufFileType = {}));
|
|
114
114
|
export var GgufMetadataTokenizerTokenType;
|
|
115
115
|
(function (GgufMetadataTokenizerTokenType) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,oBAqEjB;AArED,WAAkB,oBAAoB;IAClC,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,mCAAW,CAAA;IACX,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,gDAAwB,CAAA;IACxB,uDAA+B,CAAA;IAC/B,mDAA2B,CAAA;IAC3B,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,2CAAmB,CAAA;IACnB,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,yCAAiB,CAAA;IACjB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,6CAAqB,CAAA;IACrB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,yCAAiB,CAAA;IACjB,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,8CAAsB,CAAA;IACtB,2CAAmB,CAAA;IACnB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,uCAAe,CAAA;IACf,2CAAmB,CAAA;IACnB,yCAAiB,CAAA;IACjB,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,qCAAa,CAAA;IACb,yCAAiB,CAAA;IACjB,iCAAS,CAAA;IACT,+CAAuB,CAAA;IACvB,qCAAa,CAAA;IACb,6CAAqB,CAAA;IACrB,yCAAiB,CAAA;IACjB,uCAAe,CAAA;IACf,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,2CAAmB,CAAA;IACnB,iDAAyB,CAAA;IACzB,+CAAuB,CAAA;IACvB,4DAAoC,CAAA;IACpC,mCAAW,CAAA;IACX,iDAAyB,CAAA;IACzB,6CAAqB,CAAA;AACzB,CAAC,EArEiB,oBAAoB,KAApB,oBAAoB,QAqErC;AA8BD,yEAAyE;AACzE,MAAM,CAAN,IAAY,YAuCX;AAvCD,WAAY,YAAY;IACpB,qDAAW,CAAA;IACX,2DAAc,CAAA;IACd,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,+EAAwB,CAAA;IACxB,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,8DAAgB,CAAA;IAChB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,8DAAgB,CAAA;IAChB,oEAAmB,CAAA;IACnB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,oEAAmB,CAAA;IACnB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,8DAAgB,CAAA;IAChB,sEAAoB,CAAA;IACpB,sEAAoB,CAAA;IACpB,sEAAoB,CAAA;IACpB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA
|
|
1
|
+
{"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,oBAqEjB;AArED,WAAkB,oBAAoB;IAClC,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,mCAAW,CAAA;IACX,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,gDAAwB,CAAA;IACxB,uDAA+B,CAAA;IAC/B,mDAA2B,CAAA;IAC3B,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,2CAAmB,CAAA;IACnB,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,yCAAiB,CAAA;IACjB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,6CAAqB,CAAA;IACrB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,yCAAiB,CAAA;IACjB,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,8CAAsB,CAAA;IACtB,2CAAmB,CAAA;IACnB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,uCAAe,CAAA;IACf,2CAAmB,CAAA;IACnB,yCAAiB,CAAA;IACjB,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,qCAAa,CAAA;IACb,yCAAiB,CAAA;IACjB,iCAAS,CAAA;IACT,+CAAuB,CAAA;IACvB,qCAAa,CAAA;IACb,6CAAqB,CAAA;IACrB,yCAAiB,CAAA;IACjB,uCAAe,CAAA;IACf,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,2CAAmB,CAAA;IACnB,iDAAyB,CAAA;IACzB,+CAAuB,CAAA;IACvB,4DAAoC,CAAA;IACpC,mCAAW,CAAA;IACX,iDAAyB,CAAA;IACzB,6CAAqB,CAAA;AACzB,CAAC,EArEiB,oBAAoB,KAApB,oBAAoB,QAqErC;AA8BD,yEAAyE;AACzE,MAAM,CAAN,IAAY,YAuCX;AAvCD,WAAY,YAAY;IACpB,qDAAW,CAAA;IACX,2DAAc,CAAA;IACd,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,+EAAwB,CAAA;IACxB,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,8DAAgB,CAAA;IAChB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,8DAAgB,CAAA;IAChB,oEAAmB,CAAA;IACnB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,oEAAmB,CAAA;IACnB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,8DAAgB,CAAA;IAChB,sEAAoB,CAAA;IACpB,sEAAoB,CAAA;IACpB,sEAAoB,CAAA;IACpB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;AACrB,CAAC,EAvCW,YAAY,KAAZ,YAAY,QAuCvB;AA2FD,MAAM,CAAN,IAAkB,8BAQjB;AARD,WAAkB,8BAA8B;IAC5C,6FAAa,CAAA;IACb,uFAAU,CAAA;IACV,yFAAW,CAAA;IACX,yFAAW,CAAA;IACX,iGAAe,CAAA;IACf,uFAAU,CAAA;IACV,mFAAQ,CAAA;AACZ,CAAC,EARiB,8BAA8B,KAA9B,8BAA8B,QAQ/C;AAgDD,MAAM,CAAN,IAAkB,mCAOjB;AAPD,WAAkB,mCAAmC;IACjD,4GAAgB,CAAA;IAChB,6FAAQ,CAAA;IACR,6FAAQ,CAAA;IACR,2FAAO,CAAA;IACP,6FAAQ,CAAA;IACR,6FAAQ,CAAA;AACZ,CAAC,EAPiB,mCAAmC,KAAnC,mCAAmC,QAOpD;AAoND,MAAM,UAAU,gCAAgC,CAC5C,QAAsB,EAAE,IAAO;IAE/B,OAAO,QAAQ,EAAE,OAAO,EAAE,YAAY,KAAK,IAAI,CAAC;AACpD,CAAC"}
|
|
@@ -46,5 +46,15 @@ export declare const enum GgmlType {
|
|
|
46
46
|
I16 = 25,
|
|
47
47
|
I32 = 26,
|
|
48
48
|
I64 = 27,
|
|
49
|
-
F64 = 28
|
|
49
|
+
F64 = 28,
|
|
50
|
+
IQ1_M = 29,
|
|
51
|
+
BF16 = 30,
|
|
52
|
+
Q4_0_4_4 = 31,
|
|
53
|
+
Q4_0_4_8 = 32,
|
|
54
|
+
Q4_0_8_8 = 33,
|
|
55
|
+
TQ1_0 = 34,
|
|
56
|
+
TQ2_0 = 35,
|
|
57
|
+
IQ4_NL_4_4 = 36,
|
|
58
|
+
IQ4_NL_4_8 = 37,
|
|
59
|
+
IQ4_NL_8_8 = 38
|
|
50
60
|
}
|
|
@@ -29,5 +29,15 @@ export var GgmlType;
|
|
|
29
29
|
GgmlType[GgmlType["I32"] = 26] = "I32";
|
|
30
30
|
GgmlType[GgmlType["I64"] = 27] = "I64";
|
|
31
31
|
GgmlType[GgmlType["F64"] = 28] = "F64";
|
|
32
|
+
GgmlType[GgmlType["IQ1_M"] = 29] = "IQ1_M";
|
|
33
|
+
GgmlType[GgmlType["BF16"] = 30] = "BF16";
|
|
34
|
+
GgmlType[GgmlType["Q4_0_4_4"] = 31] = "Q4_0_4_4";
|
|
35
|
+
GgmlType[GgmlType["Q4_0_4_8"] = 32] = "Q4_0_4_8";
|
|
36
|
+
GgmlType[GgmlType["Q4_0_8_8"] = 33] = "Q4_0_8_8";
|
|
37
|
+
GgmlType[GgmlType["TQ1_0"] = 34] = "TQ1_0";
|
|
38
|
+
GgmlType[GgmlType["TQ2_0"] = 35] = "TQ2_0";
|
|
39
|
+
GgmlType[GgmlType["IQ4_NL_4_4"] = 36] = "IQ4_NL_4_4";
|
|
40
|
+
GgmlType[GgmlType["IQ4_NL_4_8"] = 37] = "IQ4_NL_4_8";
|
|
41
|
+
GgmlType[GgmlType["IQ4_NL_8_8"] = 38] = "IQ4_NL_8_8";
|
|
32
42
|
})(GgmlType || (GgmlType = {}));
|
|
33
43
|
//# sourceMappingURL=GgufTensorInfoTypes.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GgufTensorInfoTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufTensorInfoTypes.ts"],"names":[],"mappings":"AAsBA,MAAM,CAAN,IAAkB,
|
|
1
|
+
{"version":3,"file":"GgufTensorInfoTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufTensorInfoTypes.ts"],"names":[],"mappings":"AAsBA,MAAM,CAAN,IAAkB,QAwCjB;AAxCD,WAAkB,QAAQ;IACtB,qCAAO,CAAA;IACP,qCAAO,CAAA;IACP,uCAAQ,CAAA;IACR,uCAAQ,CAAA;IACR,uCAAQ,CAAA;IACR,uCAAQ,CAAA;IACR,uCAAQ,CAAA;IACR,uCAAQ,CAAA;IACR,uCAAQ,CAAA;IACR,uCAAQ,CAAA;IACR,wCAAS,CAAA;IACT,wCAAS,CAAA;IACT,wCAAS,CAAA;IACT,wCAAS,CAAA;IACT,wCAAS,CAAA;IACT,wCAAS,CAAA;IACT,8CAAY,CAAA;IACZ,4CAAW,CAAA;IACX,8CAAY,CAAA;IACZ,0CAAU,CAAA;IACV,4CAAW,CAAA;IACX,0CAAU,CAAA;IACV,0CAAU,CAAA;IACV,4CAAW,CAAA;IACX,oCAAO,CAAA;IACP,sCAAQ,CAAA;IACR,sCAAQ,CAAA;IACR,sCAAQ,CAAA;IACR,sCAAQ,CAAA;IACR,0CAAU,CAAA;IACV,wCAAS,CAAA;IACT,gDAAa,CAAA;IACb,gDAAa,CAAA;IACb,gDAAa,CAAA;IACb,0CAAU,CAAA;IACV,0CAAU,CAAA;IACV,oDAAe,CAAA;IACf,oDAAe,CAAA;IACf,oDAAe,CAAA;AACnB,CAAC,EAxCiB,QAAQ,KAAR,QAAQ,QAwCzB"}
|
|
@@ -393,6 +393,7 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
|
|
|
393
393
|
context_params.n_threads = std::max(cpu_get_num_math(), 1);
|
|
394
394
|
context_params.n_threads_batch = context_params.n_threads;
|
|
395
395
|
context_params.no_perf = true;
|
|
396
|
+
context_params.swa_full = false;
|
|
396
397
|
|
|
397
398
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
398
399
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
@@ -433,6 +434,10 @@ AddonContext::AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<Ad
|
|
|
433
434
|
if (options.Has("performanceTracking")) {
|
|
434
435
|
context_params.no_perf = !(options.Get("performanceTracking").As<Napi::Boolean>().Value());
|
|
435
436
|
}
|
|
437
|
+
|
|
438
|
+
if (options.Has("swaFullCache")) {
|
|
439
|
+
context_params.swa_full = options.Get("swaFullCache").As<Napi::Boolean>().Value();
|
|
440
|
+
}
|
|
436
441
|
}
|
|
437
442
|
}
|
|
438
443
|
AddonContext::~AddonContext() {
|
|
@@ -620,6 +625,32 @@ Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info
|
|
|
620
625
|
|
|
621
626
|
return info.Env().Undefined();
|
|
622
627
|
}
|
|
628
|
+
Napi::Value AddonContext::GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info) {
|
|
629
|
+
if (disposed) {
|
|
630
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
631
|
+
return info.Env().Undefined();
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
const auto minPosition = llama_kv_self_seq_pos_min(ctx, sequenceId);
|
|
638
|
+
|
|
639
|
+
return Napi::Number::New(info.Env(), minPosition);
|
|
640
|
+
}
|
|
641
|
+
Napi::Value AddonContext::GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info) {
|
|
642
|
+
if (disposed) {
|
|
643
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
644
|
+
return info.Env().Undefined();
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
const auto maxPosition = llama_kv_self_seq_pos_max(ctx, sequenceId);
|
|
651
|
+
|
|
652
|
+
return Napi::Number::New(info.Env(), maxPosition);
|
|
653
|
+
}
|
|
623
654
|
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
|
|
624
655
|
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
|
|
625
656
|
worker->Queue();
|
|
@@ -926,6 +957,8 @@ void AddonContext::init(Napi::Object exports) {
|
|
|
926
957
|
InstanceMethod("disposeSequence", &AddonContext::DisposeSequence),
|
|
927
958
|
InstanceMethod("removeTokenCellsFromSequence", &AddonContext::RemoveTokenCellsFromSequence),
|
|
928
959
|
InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
|
|
960
|
+
InstanceMethod("getSequenceKvCacheMinPosition", &AddonContext::GetSequenceKvCacheMinPosition),
|
|
961
|
+
InstanceMethod("getSequenceKvCacheMaxPosition", &AddonContext::GetSequenceKvCacheMaxPosition),
|
|
929
962
|
InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
|
|
930
963
|
InstanceMethod("sampleToken", &AddonContext::SampleToken),
|
|
931
964
|
InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
|
|
@@ -36,6 +36,8 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
36
36
|
Napi::Value DisposeSequence(const Napi::CallbackInfo& info);
|
|
37
37
|
Napi::Value RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info);
|
|
38
38
|
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info);
|
|
39
|
+
Napi::Value GetSequenceKvCacheMinPosition(const Napi::CallbackInfo& info);
|
|
40
|
+
Napi::Value GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo& info);
|
|
39
41
|
Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
|
|
40
42
|
Napi::Value SampleToken(const Napi::CallbackInfo& info);
|
|
41
43
|
|
package/llama/addon/addon.cpp
CHANGED
|
@@ -73,6 +73,19 @@ Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
|
|
|
73
73
|
return Napi::Number::New(info.Env(), typeSize);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
Napi::Value addonGetGgmlGraphOverheadCustom(const Napi::CallbackInfo& info) {
|
|
77
|
+
if (info.Length() < 2 || !info[0].IsNumber() || !info[1].IsBoolean()) {
|
|
78
|
+
return Napi::Number::New(info.Env(), 0);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const size_t size = info[0].As<Napi::Number>().Uint32Value();
|
|
82
|
+
const bool grads = info[1].As<Napi::Boolean>().Value();
|
|
83
|
+
|
|
84
|
+
const auto graphOverhead = ggml_graph_overhead_custom(size, grads);
|
|
85
|
+
|
|
86
|
+
return Napi::Number::New(info.Env(), graphOverhead);
|
|
87
|
+
}
|
|
88
|
+
|
|
76
89
|
Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
|
|
77
90
|
Napi::Object consts = Napi::Object::New(info.Env());
|
|
78
91
|
consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
|
|
@@ -231,6 +244,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
|
231
244
|
Napi::PropertyDescriptor::Function("getMathCores", addonGetMathCores),
|
|
232
245
|
Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
|
|
233
246
|
Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
|
|
247
|
+
Napi::PropertyDescriptor::Function("getGgmlGraphOverheadCustom", addonGetGgmlGraphOverheadCustom),
|
|
234
248
|
Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
|
|
235
249
|
Napi::PropertyDescriptor::Function("setLogger", setLogger),
|
|
236
250
|
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|