node-llama-cpp 3.0.0-beta.25 → 3.0.0-beta.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bins/linux-arm64/_nlcBuildMetadata.json +1 -1
- package/bins/linux-arm64/llama-addon.node +0 -0
- package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -1
- package/bins/linux-armv7l/llama-addon.node +0 -0
- package/bins/linux-x64/_nlcBuildMetadata.json +1 -1
- package/bins/linux-x64/llama-addon.node +0 -0
- package/bins/linux-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/bins/linux-x64-cuda/llama-addon.node +0 -0
- package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/bins/linux-x64-vulkan/llama-addon.node +0 -0
- package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
- package/bins/mac-arm64-metal/llama-addon.node +0 -0
- package/bins/mac-x64/_nlcBuildMetadata.json +1 -1
- package/bins/mac-x64/llama-addon.node +0 -0
- package/bins/win-arm64/_nlcBuildMetadata.json +1 -1
- package/bins/win-arm64/llama-addon.node +0 -0
- package/bins/win-x64/_nlcBuildMetadata.json +1 -1
- package/bins/win-x64/llama-addon.node +0 -0
- package/bins/win-x64-cuda/_nlcBuildMetadata.json +1 -1
- package/bins/win-x64-cuda/llama-addon.node +0 -0
- package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
- package/bins/win-x64-vulkan/llama-addon.node +0 -0
- package/dist/bindings/Llama.d.ts +3 -3
- package/dist/bindings/Llama.js +25 -19
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.d.ts +2 -2
- package/dist/bindings/utils/MemoryOrchestrator.js +1 -1
- package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -1
- package/dist/cli/commands/ChatCommand.js +1 -1
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.js +1 -1
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/DebugCommand.js +1 -1
- package/dist/cli/commands/DebugCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.js +1 -1
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +2 -2
- package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -1
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +5 -5
- package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
- package/dist/cli/utils/interactivelyAskForModel.js +9 -9
- package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
- package/dist/cli/utils/printCommonInfoLines.d.ts +1 -1
- package/dist/cli/utils/printCommonInfoLines.js +7 -3
- package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.js +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.js +1 -1
- package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +13 -13
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +15 -10
- package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +3 -3
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +3 -3
- package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +3 -3
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +3 -3
- package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/README.md +47 -4
- package/llama/grammars/json.gbnf +3 -3
- package/llama/grammars/json_arr.gbnf +3 -3
- package/llama/llama.cpp.info.json +1 -1
- package/package.json +1 -1
- package/templates/packed/electron-typescript-react.json +1 -1
|
@@ -22,18 +22,18 @@ export declare class GgufInsightsConfigurationResolver {
|
|
|
22
22
|
contextSize?: number;
|
|
23
23
|
embeddingContext?: boolean;
|
|
24
24
|
}, { getVramState, getRamState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }?: {
|
|
25
|
-
getVramState?(): {
|
|
25
|
+
getVramState?(): Promise<{
|
|
26
26
|
total: number;
|
|
27
27
|
free: number;
|
|
28
|
-
}
|
|
29
|
-
getRamState?(): {
|
|
28
|
+
}>;
|
|
29
|
+
getRamState?(): Promise<{
|
|
30
30
|
total: number;
|
|
31
31
|
free: number;
|
|
32
|
-
}
|
|
32
|
+
}>;
|
|
33
33
|
llamaVramPaddingSize?: number;
|
|
34
34
|
llamaGpu?: BuildGpu;
|
|
35
35
|
llamaSupportsGpuOffloading?: boolean;
|
|
36
|
-
}): {
|
|
36
|
+
}): Promise<{
|
|
37
37
|
/**
|
|
38
38
|
* A number between `0` (inclusive) and `1` (inclusive) representing the compatibility score.
|
|
39
39
|
*/
|
|
@@ -60,28 +60,28 @@ export declare class GgufInsightsConfigurationResolver {
|
|
|
60
60
|
contextVramUsage: number;
|
|
61
61
|
totalVramUsage: number;
|
|
62
62
|
};
|
|
63
|
-
}
|
|
63
|
+
}>;
|
|
64
64
|
resolveModelGpuLayers(gpuLayers: LlamaModelOptions["gpuLayers"], { ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }?: {
|
|
65
65
|
ignoreMemorySafetyChecks?: boolean;
|
|
66
|
-
getVramState?(): {
|
|
66
|
+
getVramState?(): Promise<{
|
|
67
67
|
total: number;
|
|
68
68
|
free: number;
|
|
69
|
-
}
|
|
69
|
+
}>;
|
|
70
70
|
llamaVramPaddingSize?: number;
|
|
71
71
|
llamaGpu?: BuildGpu;
|
|
72
72
|
llamaSupportsGpuOffloading?: boolean;
|
|
73
|
-
}): number
|
|
74
|
-
resolveContextContextSize(contextSize: LlamaContextOptions["contextSize"], { modelGpuLayers, batchSize,
|
|
73
|
+
}): Promise<number>;
|
|
74
|
+
resolveContextContextSize(contextSize: LlamaContextOptions["contextSize"], { modelGpuLayers, batchSize, modelTrainContextSize, getVramState, llamaGpu, ignoreMemorySafetyChecks, isEmbeddingContext, sequences }: {
|
|
75
75
|
modelGpuLayers: number;
|
|
76
76
|
modelTrainContextSize: number;
|
|
77
77
|
batchSize?: LlamaContextOptions["batchSize"];
|
|
78
78
|
sequences?: number;
|
|
79
|
-
getVramState?(): {
|
|
79
|
+
getVramState?(): Promise<{
|
|
80
80
|
total: number;
|
|
81
81
|
free: number;
|
|
82
|
-
}
|
|
82
|
+
}>;
|
|
83
83
|
llamaGpu?: BuildGpu;
|
|
84
84
|
ignoreMemorySafetyChecks?: boolean;
|
|
85
85
|
isEmbeddingContext?: boolean;
|
|
86
|
-
}): number
|
|
86
|
+
}): Promise<number>;
|
|
87
87
|
}
|
|
@@ -24,12 +24,14 @@ export class GgufInsightsConfigurationResolver {
|
|
|
24
24
|
*
|
|
25
25
|
* `contextSize` defaults to `4096` (if the model train context size is lower than this, the model train context size is used instead).
|
|
26
26
|
*/
|
|
27
|
-
scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (() => ({ total: os.totalmem(), free: os.freemem() })), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
27
|
+
async scoreModelConfigurationCompatibility({ contextSize = Math.min(4096, this._ggufInsights.trainContextSize ?? 4096), embeddingContext = false } = {}, { getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), getRamState = (async () => ({ total: os.totalmem(), free: os.freemem() })), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
28
28
|
const defaultTrainContextSize = 4096;
|
|
29
|
-
const vramState =
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
const [vramState, ramState] = await Promise.all([
|
|
30
|
+
getVramState(),
|
|
31
|
+
getRamState()
|
|
32
|
+
]);
|
|
33
|
+
const resolvedGpuLayers = await this.resolveModelGpuLayers("auto", {
|
|
34
|
+
getVramState: async () => vramState,
|
|
33
35
|
llamaVramPaddingSize,
|
|
34
36
|
llamaGpu,
|
|
35
37
|
llamaSupportsGpuOffloading
|
|
@@ -38,8 +40,11 @@ export class GgufInsightsConfigurationResolver {
|
|
|
38
40
|
const estimatedModelResourceUsage = this._ggufInsights.estimateModelResourceRequirements({
|
|
39
41
|
gpuLayers: resolvedGpuLayers
|
|
40
42
|
});
|
|
41
|
-
const resolvedContextSize = this.resolveContextContextSize("auto", {
|
|
42
|
-
getVramState: () => ({
|
|
43
|
+
const resolvedContextSize = await this.resolveContextContextSize("auto", {
|
|
44
|
+
getVramState: async () => ({
|
|
45
|
+
total: vramState.total,
|
|
46
|
+
free: Math.max(0, vramState.free - estimatedModelResourceUsage.gpuVram)
|
|
47
|
+
}),
|
|
43
48
|
llamaGpu,
|
|
44
49
|
isEmbeddingContext: embeddingContext,
|
|
45
50
|
modelGpuLayers: resolvedGpuLayers,
|
|
@@ -89,7 +94,7 @@ export class GgufInsightsConfigurationResolver {
|
|
|
89
94
|
}
|
|
90
95
|
};
|
|
91
96
|
}
|
|
92
|
-
resolveModelGpuLayers(gpuLayers, { ignoreMemorySafetyChecks = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
97
|
+
async resolveModelGpuLayers(gpuLayers, { ignoreMemorySafetyChecks = false, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaVramPaddingSize = this._ggufInsights._llama.vramPaddingSize, llamaGpu = this._ggufInsights._llama.gpu, llamaSupportsGpuOffloading = this._ggufInsights._llama.supportsGpuOffloading } = {}) {
|
|
93
98
|
return resolveModelGpuLayersOption(gpuLayers, {
|
|
94
99
|
ggufInsights: this._ggufInsights,
|
|
95
100
|
ignoreMemorySafetyChecks,
|
|
@@ -99,8 +104,8 @@ export class GgufInsightsConfigurationResolver {
|
|
|
99
104
|
llamaSupportsGpuOffloading
|
|
100
105
|
});
|
|
101
106
|
}
|
|
102
|
-
resolveContextContextSize(contextSize, { modelGpuLayers, batchSize,
|
|
103
|
-
return resolveContextContextSizeOption({
|
|
107
|
+
async resolveContextContextSize(contextSize, { modelGpuLayers, batchSize, modelTrainContextSize, getVramState = (() => this._ggufInsights._llama._vramOrchestrator.getMemoryState()), llamaGpu = this._ggufInsights._llama.gpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false, sequences = getDefaultContextSequences() }) {
|
|
108
|
+
return await resolveContextContextSizeOption({
|
|
104
109
|
contextSize,
|
|
105
110
|
batchSize,
|
|
106
111
|
sequences,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"GgufInsightsConfigurationResolver.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsConfigurationResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAIpB,OAAO,EAAC,0BAA0B,EAAC,MAAM,8CAA8C,CAAC;AACxF,OAAO,EAAC,2BAA2B,EAAC,MAAM,wCAAwC,CAAC;AACnF,OAAO,EAAC,+BAA+B,EAAC,MAAM,4CAA4C,CAAC;AAC3F,OAAO,EAAC,WAAW,EAAC,MAAM,wBAAwB,CAAC;AAInD,MAAM,OAAO,iCAAiC;IAC1C,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,YAAY;QACnB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED;;;;;;;;;;;;OAYG;IACI,oCAAoC,CAAC,
|
|
1
|
+
{"version":3,"file":"GgufInsightsConfigurationResolver.js","sourceRoot":"","sources":["../../../src/gguf/insights/GgufInsightsConfigurationResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,IAAI,CAAC;AAIpB,OAAO,EAAC,0BAA0B,EAAC,MAAM,8CAA8C,CAAC;AACxF,OAAO,EAAC,2BAA2B,EAAC,MAAM,wCAAwC,CAAC;AACnF,OAAO,EAAC,+BAA+B,EAAC,MAAM,4CAA4C,CAAC;AAC3F,OAAO,EAAC,WAAW,EAAC,MAAM,wBAAwB,CAAC;AAInD,MAAM,OAAO,iCAAiC;IAC1C,gBAAgB,CAAkB,aAAa,CAAe;IAE9D,YAAoB,YAA0B;QAC1C,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,YAAY;QACnB,OAAO,IAAI,CAAC,aAAa,CAAC;IAC9B,CAAC;IAED;;;;;;;;;;;;OAYG;IACI,KAAK,CAAC,oCAAoC,CAAC,EAC9C,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB,IAAI,IAAI,CAAC,EACzE,gBAAgB,GAAG,KAAK,KAIxB,EAAE,EAAE,EACJ,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,EAAC,KAAK,EAAE,EAAE,CAAC,QAAQ,EAAE,EAAE,IAAI,EAAE,EAAE,CAAC,OAAO,EAAE,EAAC,CAAC,CAAC,EACxE,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,eAAe,EAChE,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EACxC,0BAA0B,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,qBAAqB,KAO5E,EAAE;QAiCF,MAAM,uBAAuB,GAAG,IAAI,CAAC;QAErC,MAAM,CACF,SAAS,EACT,QAAQ,CACX,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAClB,YAAY,EAAE;YACd,WAAW,EAAE;SAChB,CAAC,CAAC;QACH,MAAM,iBAAiB,GAAG,MAAM,IAAI,CAAC,qBAAqB,CAAC,MAAM,EAAE;YAC/D,YAAY,EAAE,KAAK,IAAI,EAAE,CAAC,SAAS;YACnC,oBAAoB;YACpB,QAAQ;YACR,0BAA0B;SAC7B,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,0BAA0B,IAAI,QAAQ,KAAK,KAAK,CAAC;QACnE,MAAM,2BAA2B,GAAG,IAAI,CAAC,aAAa,CAAC,iCAAiC,CAAC;YACrF,SAAS,EAAE,iBAAiB;SAC/B,CAAC,CAAC;QAEH,MAAM,mBAAmB,GAAG,MAAM,IAAI,CAAC,yBAAyB,CAAC,MAAM,EAAE;YACrE,YAAY,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;gBACvB,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,IAAI,GAAG,2BAA2B,CAAC,OAAO,CAAC;aAC1E,CAAC;YACF,QAAQ;YACR,kBAAkB,EAAE,gBAAgB;YACpC,cAAc,EAAE,iBAAiB;YACjC,qBAAqB,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB,IAAI,uBAAuB;SACxF,CAAC,CAAC;QACH,MAAM,6BAA6B,GAAG,IAAI,CAAC,aAAa,CAAC,mCAAmC,CAAC;YACzF,WAAW,EAAE,mBAAmB;YAChC,kBAAkB,EAAE,gBAAgB;YACpC,cAAc,EAAE,iBAAiB;SACpC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG;YACf,SAAS,EAAE,EAAE;YACb,qBAAqB,EAAE,EAAE;YACzB,WAAW,EAAE,EAAE;YACf,iBAAiB,EAAE,EAAE;YACrB,qBAAqB,EAAE,EAAE,EAAE,sDAAsD;YACjF,gBAAgB,EAAE,EAAE;SACd,CAAC;QAEX,MAAM,eAAe,GAAG,UAAU,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,iBAAiB,GAAG,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QAC/G,MAAM,2BAA2B,GAAG,UAAU,CAAC,qBAAqB,GAAG,CACnE,iBAAiB,KAAK,IAAI,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAC/D,CAAC;QACF,MAAM,iBAAiB,GAAG,UAAU,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,GAAG,WAAW,CAAC,CAAC;QAClG,MAAM,uBAAuB,GAAG,UAAU,CAAC,iBAAiB,GAAG,CAC3D,2BAA2B,CAAC,MAAM,IAAI,QAAQ,CAAC,IAAI;YAC/C,CAAC,CAAC,CAAC;YACH,CAAC,CAAC,2BAA2B,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK;gBAClD,CAAC,CAAC,GAAG;gBACL,CAAC,CAAC,CACE,GAAG,GAAG,IAAI,CAAC,GAAG,CACV,GAAG,EACH,GAAG,GAAG,CACF,CAAC,2BAA2B,CAAC,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,QAAQ,CAAC,KAAK,CACzE,CACJ,CACJ,CACZ,CAAC;QACF,MAAM,sBAAsB,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,GAAG,WAAW,CAAC,GAAG,WAAW,CAAC,CAAC;QAE9G,MAAM,kBAAkB,GAAG,SAAS;YAChC,CAAC,CAAC,CACE,CAAC,eAAe,GAAG,2BAA2B,GAAG,iBAAiB,GAAG,uBAAuB,CAAC;gBAC7F,CAAC,UAAU,CAAC,SAAS,GAAG,UAAU,CAAC,qBAAqB,GAAG,UAAU,CAAC,WAAW,GAAG,UAAU,CAAC,iBAAiB,CAAC,CACpH;YACD,CAAC,CAAC,CACE,CAAC,iBAAiB,GAAG,uBAAuB,GAAG,6BAA6B,CAAC,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;gBAC3G,CAAC,UAAU,CAAC,WAAW,GAAG,UAAU,CAAC,iBAAiB,GAAG,UAAU,CAAC,qBAAqB,CAAC,CAAC,CAAC;QACpG,MAAM,UAAU,GAAG,sBAAsB,GAAG,UAAU,CAAC,gBAAgB,CAAC;QAExE,OAAO;YACH,kBAAkB;YAClB,UAAU;YACV,UAAU,EAAE,kBAAkB,GAAG,UAAU;YAE3C,cAAc,EAAE;gBACZ,SAAS,EAAE,iBAAiB;gBAC5B,WAAW,EAAE,mBAAmB;gBAEhC,aAAa,EAAE,2BAA2B,CAAC,MAAM;gBACjD,eAAe,EAAE,6BAA6B,CAAC,MAAM;gBACrD,aAAa,EAAE,2BAA2B,CAAC,MAAM,GAAG,6BAA6B,CAAC,MAAM;gBAExF,cAAc,EAAE,2BAA2B,CAAC,OAAO;gBACnD,gBAAgB,EAAE,6BAA6B,CAAC,OAAO;gBACvD,cAAc,EAAE,2BAA2B,CAAC,OAAO,GAAG,6BAA6B,CAAC,OAAO;aAC9F;SACJ,CAAC;IACN,CAAC;IAEM,KAAK,CAAC,qBAAqB,CAAC,SAAyC,EAAE,EAC1E,wBAAwB,GAAG,KAAK,EAChC,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,oBAAoB,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,eAAe,EAAE,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EAC1G,0BAA0B,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,qBAAqB,KAI5E,EAAE;QACF,OAAO,2BAA2B,CAAC,SAAS,EAAE;YAC1C,YAAY,EAAE,IAAI,CAAC,aAAa;YAChC,wBAAwB;YACxB,YAAY;YACZ,oBAAoB;YACpB,QAAQ;YACR,0BAA0B;SAC7B,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,yBAAyB,CAAC,WAA+C,EAAE,EACpF,cAAc,EACd,SAAS,EACT,qBAAqB,EACrB,YAAY,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,iBAAiB,CAAC,cAAc,EAAE,CAAC,EACnF,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,GAAG,EACxC,wBAAwB,GAAG,KAAK,EAChC,kBAAkB,GAAG,KAAK,EAC1B,SAAS,GAAG,0BAA0B,EAAE,EAU3C;QACG,OAAO,MAAM,+BAA+B,CAAC;YACzC,WAAW;YACX,SAAS;YACT,SAAS;YACT,iBAAiB,EAAE,IAAI,CAAC,aAAa;YACrC,cAAc;YACd,qBAAqB;YACrB,YAAY;YACZ,QAAQ;YACR,wBAAwB;YACxB,kBAAkB;SACrB,CAAC,CAAC;IACP,CAAC;IAED,gBAAgB;IACT,MAAM,CAAC,OAAO,CAAC,YAA0B;QAC5C,OAAO,IAAI,iCAAiC,CAAC,YAAY,CAAC,CAAC;IAC/D,CAAC;CACJ;AAED,SAAS,6BAA6B,CAAC,SAAiB;IACpD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC/B,OAAO,EAAE,GAAG,WAAW,CAAC,SAAS,EAAE,CAAC;YAChC,KAAK,EAAE,IAAI;YACX,GAAG,EAAE,IAAI,GAAG,GAAG;YACf,MAAM,EAAE,EAAE;SACb,EAAE;YACC,KAAK,EAAE,IAAI,GAAG,GAAG;YACjB,GAAG,EAAE,IAAI,GAAG,CAAC;YACb,MAAM,EAAE,EAAE;SACb,EAAE;YACC,KAAK,EAAE,IAAI,GAAG,CAAC;YACf,MAAM,EAAE,CAAC;SACZ,CAAC,CAAC,CAAC;AACR,CAAC"}
|
|
@@ -8,11 +8,11 @@ export declare function resolveContextContextSizeOption({ contextSize, batchSize
|
|
|
8
8
|
modelFileInsights: GgufInsights;
|
|
9
9
|
modelGpuLayers: number;
|
|
10
10
|
modelTrainContextSize: number;
|
|
11
|
-
getVramState(): {
|
|
11
|
+
getVramState(): Promise<{
|
|
12
12
|
total: number;
|
|
13
13
|
free: number;
|
|
14
|
-
}
|
|
14
|
+
}>;
|
|
15
15
|
llamaGpu: BuildGpu;
|
|
16
16
|
ignoreMemorySafetyChecks?: boolean;
|
|
17
17
|
isEmbeddingContext?: boolean;
|
|
18
|
-
}): number
|
|
18
|
+
}): Promise<number>;
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
|
2
2
|
import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../../evaluator/LlamaContext/LlamaContext.js";
|
|
3
|
-
export function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, getVramState, llamaGpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false }) {
|
|
3
|
+
export async function resolveContextContextSizeOption({ contextSize, batchSize, sequences, modelFileInsights, modelGpuLayers, modelTrainContextSize, getVramState, llamaGpu, ignoreMemorySafetyChecks = false, isEmbeddingContext = false }) {
|
|
4
4
|
if (contextSize == null)
|
|
5
5
|
contextSize = "auto";
|
|
6
6
|
if (typeof contextSize === "number") {
|
|
7
7
|
const resolvedContextSize = Math.max(1, Math.floor(contextSize));
|
|
8
8
|
if (ignoreMemorySafetyChecks)
|
|
9
9
|
return resolvedContextSize;
|
|
10
|
-
const vramState = getVramState();
|
|
10
|
+
const vramState = await getVramState();
|
|
11
11
|
const contextVram = modelFileInsights.estimateContextResourceRequirements({
|
|
12
12
|
contextSize: resolvedContextSize,
|
|
13
13
|
batchSize: batchSize ?? getDefaultContextBatchSize({ contextSize: resolvedContextSize, sequences }),
|
|
@@ -22,7 +22,7 @@ export function resolveContextContextSizeOption({ contextSize, batchSize, sequen
|
|
|
22
22
|
else if (contextSize === "auto" || typeof contextSize === "object") {
|
|
23
23
|
if (llamaGpu === false)
|
|
24
24
|
return modelTrainContextSize;
|
|
25
|
-
const vramState = getVramState();
|
|
25
|
+
const vramState = await getVramState();
|
|
26
26
|
if (vramState.total === 0)
|
|
27
27
|
return modelTrainContextSize;
|
|
28
28
|
const freeVram = vramState.free;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveContextContextSizeOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveContextContextSizeOption.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AAEvH,MAAM,UAAU,+BAA+B,CAAC,
|
|
1
|
+
{"version":3,"file":"resolveContextContextSizeOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveContextContextSizeOption.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AAEvH,MAAM,CAAC,KAAK,UAAU,+BAA+B,CAAC,EAClD,WAAW,EAAE,SAAS,EAAE,SAAS,EAAE,iBAAiB,EAAE,cAAc,EAAE,qBAAqB,EAAE,YAAY,EAAE,QAAQ,EACnH,wBAAwB,GAAG,KAAK,EAAE,kBAAkB,GAAG,KAAK,EAY/D;IACG,IAAI,WAAW,IAAI,IAAI;QACnB,WAAW,GAAG,MAAM,CAAC;IAEzB,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;QAClC,MAAM,mBAAmB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC;QAEjE,IAAI,wBAAwB;YACxB,OAAO,mBAAmB,CAAC;QAE/B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,WAAW,GAAG,iBAAiB,CAAC,mCAAmC,CAAC;YACtE,WAAW,EAAE,mBAAmB;YAChC,SAAS,EAAE,SAAS,IAAI,0BAA0B,CAAC,EAAC,WAAW,EAAE,mBAAmB,EAAE,SAAS,EAAC,CAAC;YACjG,cAAc,EAAE,cAAc;YAC9B,SAAS;YACT,kBAAkB;SACrB,CAAC,CAAC,OAAO,CAAC;QAEX,IAAI,WAAW,GAAG,SAAS,CAAC,IAAI;YAC5B,MAAM,IAAI,KAAK,CAAC,uBAAuB,mBAAmB,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,sCAAsC,CAAC,CAAC;QAE5J,OAAO,mBAAmB,CAAC;IAC/B,CAAC;SAAM,IAAI,WAAW,KAAK,MAAM,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;QACnE,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,qBAAqB,CAAC;QAEjC,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QAEvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,qBAAqB,CAAC;QAEjC,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAEhC,MAAM,cAAc,GAAG,WAAW,KAAK,MAAM;YACzC,CAAC,CAAC,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC;YACvE,CAAC,CAAC,IAAI,CAAC,GAAG,CACN,WAAW,CAAC,GAAG,IAAI,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC,EACxF,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,qBAAqB,EAAC,CAAC,CACxE,CAAC;QAEN,MAAM,cAAc,GAAG,WAAW,KAAK,MAAM;YACzC,CAAC,CAAC,mCAAmC;YACrC,CAAC,CAAC,IAAI,CAAC,GAAG,CACN,WAAW,CAAC,GAAG,IAAI,mCAAmC,EACtD,mCAAmC,CACtC,CAAC;QAEN,IAAI,4BAA4B,GAAkB,IAAI,CAAC;QACvD,IAAI,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,cAAc,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC3E,KAAK,IAAI,eAAe,GAAG,cAAc,EAAE,eAAe,IAAI,cAAc,IAAI,eAAe,IAAI,cAAc,GAAG,CAAC;YACjH,MAAM,WAAW,GAAG,iBAAiB,CAAC,mCAAmC,CAAC;gBACtE,WAAW,EAAE,eAAe;gBAC5B,SAAS,EAAE,SAAS,IAAI,0BAA0B,CAAC,EAAC,WAAW,EAAE,eAAe,EAAE,SAAS,EAAC,CAAC;gBAC7F,cAAc,EAAE,cAAc;gBAC9B,SAAS;gBACT,kBAAkB;aACrB,CAAC,CAAC,OAAO,CAAC;YAEX,IAAI,WAAW,IAAI,QAAQ,EAAE,CAAC;gBAC1B,IAAI,4BAA4B,IAAI,IAAI,IAAI,eAAe,GAAG,4BAA4B,EAAE,CAAC;oBACzF,4BAA4B,GAAG,eAAe,CAAC;oBAE/C,IAAI,IAAI,KAAK,CAAC,CAAC;wBACX,MAAM;yBACL,IAAI,IAAI,GAAG,CAAC;wBACb,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClD,CAAC;YACL,CAAC;iBAAM,IAAI,IAAI,GAAG,CAAC;gBACf,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;YAE9C,IAAI,eAAe,IAAI,cAAc,IAAI,IAAI,KAAK,CAAC,CAAC;gBAChD,MAAM;YAEV,eAAe,IAAI,IAAI,CAAC;YACxB,IAAI,eAAe,GAAG,cAAc,EAAE,CAAC;gBACnC,eAAe,GAAG,cAAc,CAAC;gBACjC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvD,CAAC;iBAAM,IAAI,eAAe,GAAG,cAAc,EAAE,CAAC;gBAC1C,eAAe,GAAG,cAAc,CAAC;gBACjC,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACxD,CAAC;QACL,CAAC;QAED,IAAI,4BAA4B,IAAI,IAAI;YACpC,OAAO,4BAA4B,CAAC;QAExC,IAAI,wBAAwB;YACxB,OAAO,cAAc,CAAC;QAE1B,MAAM,IAAI,KAAK,CAAC,8DAA8D,cAAc,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,SAAS,YAAY,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC1J,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,WAAW,GAAG,CAAC,CAAC;AAC9D,CAAC"}
|
|
@@ -4,11 +4,11 @@ import type { GgufInsights } from "../GgufInsights.js";
|
|
|
4
4
|
export declare function resolveModelGpuLayersOption(gpuLayers: LlamaModelOptions["gpuLayers"], { ggufInsights, ignoreMemorySafetyChecks, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }: {
|
|
5
5
|
ggufInsights: GgufInsights;
|
|
6
6
|
ignoreMemorySafetyChecks?: boolean;
|
|
7
|
-
getVramState(): {
|
|
7
|
+
getVramState(): Promise<{
|
|
8
8
|
total: number;
|
|
9
9
|
free: number;
|
|
10
|
-
}
|
|
10
|
+
}>;
|
|
11
11
|
llamaVramPaddingSize: number;
|
|
12
12
|
llamaGpu: BuildGpu;
|
|
13
13
|
llamaSupportsGpuOffloading: boolean;
|
|
14
|
-
}): number
|
|
14
|
+
}): Promise<number>;
|
|
@@ -4,7 +4,7 @@ import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../..
|
|
|
4
4
|
import { minAllowedContextSizeInCalculations } from "../../../config.js";
|
|
5
5
|
import { scoreLevels } from "./scoreLevels.js";
|
|
6
6
|
const fitContextExtraMemoryPaddingPercentage = 0.5;
|
|
7
|
-
export function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }) {
|
|
7
|
+
export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }) {
|
|
8
8
|
if (gpuLayers == null)
|
|
9
9
|
gpuLayers = "auto";
|
|
10
10
|
if (!llamaSupportsGpuOffloading)
|
|
@@ -15,7 +15,7 @@ export function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMem
|
|
|
15
15
|
: ggufInsights.totalLayers;
|
|
16
16
|
if (ignoreMemorySafetyChecks)
|
|
17
17
|
return resolvedGpuLayers;
|
|
18
|
-
const vramState = getVramState();
|
|
18
|
+
const vramState = await getVramState();
|
|
19
19
|
const maxLayersRequirements = getVramRequiredForGpuLayers({
|
|
20
20
|
gpuLayers: resolvedGpuLayers,
|
|
21
21
|
ggufInsights,
|
|
@@ -28,7 +28,7 @@ export function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMem
|
|
|
28
28
|
else if (gpuLayers === "auto" || typeof gpuLayers === "object") {
|
|
29
29
|
if (llamaGpu === false)
|
|
30
30
|
return 0;
|
|
31
|
-
const vramState = getVramState();
|
|
31
|
+
const vramState = await getVramState();
|
|
32
32
|
if (vramState.total === 0)
|
|
33
33
|
return 0;
|
|
34
34
|
let freeVram = vramState.free;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,UAAU,2BAA2B,CAAC,SAAyC,EAAE,
|
|
1
|
+
{"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,EAKvC;IACG,IAAI,SAAS,IAAI,IAAI;QACjB,SAAS,GAAG,MAAM,CAAC;IAEvB,IAAI,CAAC,0BAA0B;QAC3B,OAAO,CAAC,CAAC;IAEb,IAAI,SAAS,KAAK,KAAK,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvD,MAAM,iBAAiB,GAAG,OAAO,SAAS,KAAK,QAAQ;YACnD,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YAC5D,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC;QAE/B,IAAI,wBAAwB;YACxB,OAAO,iBAAiB,CAAC;QAE7B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,qBAAqB,GAAG,2BAA2B,CAAC;YACtD,SAAS,EAAE,iBAAiB;YAC5B,YAAY;YACZ,WAAW,EAAE,SAAS,CAAC,IAAI;SAC9B,CAAC,CAAC;QAEH,IAAI,qBAAqB,IAAI,IAAI;YAC7B,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC/D,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,CAAC,CAAC;QAEb,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,CAAC,CAAC;QAEb,IAAI,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAC9B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7E,QAAQ,IAAI,oBAAoB,GAAG,sCAAsC,CAAC;YAE1E,IAAI,QAAQ,GAAG,CAAC;gBACZ,QAAQ,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,MAAM,mBAAmB,GAAG,2BAA2B,CAAC;YACpD,YAAY;YACZ,QAAQ;YACR,UAAU,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACrC,CAAC,CAAC,SAAS,CAAC,UAAU;gBACtB,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;QAEH,MAAM,wBAAwB,GAAG,OAAO,SAAS,KAAK,QAAQ;YAC1D,CAAC,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,CAAC,CAAC;QAElG,IAAI,CAAC,wBAAwB,IAAI,mBAAmB,IAAI,IAAI,IAAI,wBAAwB;YACpF,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,mBAAmB,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,YAAY,EACZ,YAAY,EAOf;IACG,OAAO,cAAc,CAAC;QAClB,CAAC,SAAS;YACN,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAE3G,KAAK,IAAI,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;gBACzD,MAAM;oBACF,SAAS,EAAE,MAAM;iBACpB,CAAC;YACN,CAAC;QACL,CAAC;QACD,KAAK,CAAC,MAAM;YACR,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;gBACnD,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,YAAY;gBACZ,WAAW,EAAE,QAAQ;gBACrB,UAAU;aACb,CAAC,CAAC;YAEH,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,OAAO,IAAI,CAAC;YAEhB,OAAO,mCAAmC,CAAC,EAAC,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,WAAW,EAAE,kBAAkB,CAAC,WAAW,EAAC,EAAE;gBACnH,cAAc,EAAE,YAAY,CAAC,WAAW;gBACxC,gBAAgB,EAAE,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC;aAClG,CAAC,CAAC;QACP,CAAC;KACJ,CAAC,EAAE,SAAS,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED,SAAS,mCAAmC,CAAC,EAAC,SAAS,EAAE,WAAW,EAA2C,EAAE,EAC7G,cAAc,EAAE,gBAAgB,EAGnC;IACG,SAAS,cAAc;QACnB,OAAO,WAAW,CAAC,SAAS,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,EAAE;aACb,EAAE;gBACC,KAAK,EAAE,cAAc;gBACrB,MAAM,EAAE,EAAE;gBACV,GAAG,EAAE,cAAc;aACtB,CAAC,CAAC,CAAC;IACR,CAAC;IAED,SAAS,gBAAgB;QACrB,MAAM,mBAAmB,GAAG,SAAS,GAAG,cAAc,CAAC;QAEvD,OAAO,WAAW,CAAC,WAAW,EAAE,CAAC;gBAC7B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC;aACzC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,OAAO,cAAc,EAAE,GAAG,gBAAgB,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAGnD;IACG,MAAM,SAAS,GAAG,YAAY,CAAC,iCAAiC,CAAC,EAAC,SAAS,EAAC,CAAC,CAAC,OAAO,CAAC;IAEtF,IAAI,SAAS,GAAG,WAAW;QACvB,OAAO,IAAI,CAAC;IAEhB,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAC1F,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,UAAU,CAAC,gBAAgB,IAAI,KAAK;SAC3D,CAAC,CAAC,OAAO,CAAC;QAEX,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,CAAC;QAC1C,IAAI,SAAS,GAAG,WAAW;YACvB,OAAO,IAAI,CAAC;QAEhB,OAAO;YACH,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,WAAW;YACX,SAAS;SACZ,CAAC;IACN,CAAC;IAED,MAAM,UAAU,GAAG,iCAAiC,CAAC;QACjD,SAAS;QACT,YAAY;QACZ,IAAI,EAAE,WAAW,GAAG,SAAS;QAC7B,kBAAkB,EAAE,UAAU,EAAE,gBAAgB,IAAI,KAAK;KAC5D,CAAC,CAAC;IAEH,IAAI,UAAU,IAAI,IAAI,IAAI,SAAS,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW;QAC/D,OAAO,IAAI,CAAC;IAEhB,OAAO;QACH,WAAW,EAAE,UAAU,CAAC,WAAW;QACnC,WAAW,EAAE,UAAU,CAAC,IAAI;QAC5B,SAAS,EAAE,SAAS,GAAG,UAAU,CAAC,IAAI;KACzC,CAAC;AACN,CAAC;AAED,SAAS,iCAAiC,CAAC,EAAC,SAAS,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAE5F;IACG,MAAM,cAAc,GAAG,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC,CAAC;IAErG,KAAK,IAAI,WAAW,GAAG,cAAc,EAAE,WAAW,IAAI,mCAAmC,EAAE,WAAW,EAAE,EAAE,CAAC;QACvG,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW;YACX,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAClE,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB;SACrB,CAAC,CAAC,OAAO,CAAC;QAEX,IAAI,WAAW,IAAI,IAAI;YACnB,OAAO;gBACH,WAAW;gBACX,IAAI,EAAE,WAAW;aACpB,CAAC;IACV,CAAC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
package/llama/grammars/README.md
CHANGED
|
@@ -59,9 +59,13 @@ Parentheses `()` can be used to group sequences, which allows for embedding alte
|
|
|
59
59
|
|
|
60
60
|
## Repetition and Optional Symbols
|
|
61
61
|
|
|
62
|
-
- `*` after a symbol or sequence means that it can be repeated zero or more times.
|
|
63
|
-
- `+` denotes that the symbol or sequence should appear one or more times.
|
|
64
|
-
- `?` makes the preceding symbol or sequence optional.
|
|
62
|
+
- `*` after a symbol or sequence means that it can be repeated zero or more times (equivalent to `{0,}`).
|
|
63
|
+
- `+` denotes that the symbol or sequence should appear one or more times (equivalent to `{1,}`).
|
|
64
|
+
- `?` makes the preceding symbol or sequence optional (equivalent to `{0,1}`).
|
|
65
|
+
- `{m}` repeats the precedent symbol or sequence exactly `m` times
|
|
66
|
+
- `{m,}` repeats the precedent symbol or sequence at least `m` times
|
|
67
|
+
- `{m,n}` repeats the precedent symbol or sequence at between `m` and `n` times (included)
|
|
68
|
+
- `{0,n}` repeats the precedent symbol or sequence at most `n` times (included)
|
|
65
69
|
|
|
66
70
|
## Comments and newlines
|
|
67
71
|
|
|
@@ -90,6 +94,8 @@ This guide provides a brief overview. Check out the GBNF files in this directory
|
|
|
90
94
|
./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
|
|
91
95
|
```
|
|
92
96
|
|
|
97
|
+
`llama.cpp` can also convert JSON schemas to grammars either ahead of time or at each request, see below.
|
|
98
|
+
|
|
93
99
|
## Troubleshooting
|
|
94
100
|
|
|
95
101
|
Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
|
|
@@ -98,4 +104,41 @@ Grammars currently have performance gotchas (see https://github.com/ggerganov/ll
|
|
|
98
104
|
|
|
99
105
|
A common pattern is to allow repetitions of a pattern `x` up to N times.
|
|
100
106
|
|
|
101
|
-
While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions)
|
|
107
|
+
While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) may result in extremely slow sampling. Instead, you can write `x{0,N}` (or `(x (x (x ... (x)?...)?)?)?` w/ N-deep nesting in earlier llama.cpp versions).
|
|
108
|
+
|
|
109
|
+
## Using GBNF grammars
|
|
110
|
+
|
|
111
|
+
You can use GBNF grammars:
|
|
112
|
+
|
|
113
|
+
- In the [server](../examples/server)'s completion endpoints, passed as the `grammar` body field
|
|
114
|
+
- In the [main](../examples/main) CLI, passed as the `--grammar` & `--grammar-file` flags
|
|
115
|
+
- With the [gbnf-validator](../examples/gbnf-validator) tool, to test them against strings.
|
|
116
|
+
|
|
117
|
+
## JSON Schemas → GBNF
|
|
118
|
+
|
|
119
|
+
`llama.cpp` supports converting a subset of https://json-schema.org/ to GBNF grammars:
|
|
120
|
+
|
|
121
|
+
- In the [server](../examples/server):
|
|
122
|
+
- For any completion endpoints, passed as the `json_schema` body field
|
|
123
|
+
- For the `/chat/completions` endpoint, passed inside the `result_format` body field (e.g. `{"type", "json_object", "schema": {"items": {}}}`)
|
|
124
|
+
- In the [main](../examples/main) CLI, passed as the `--json` / `-j` flag
|
|
125
|
+
- To convert to a grammar ahead of time:
|
|
126
|
+
- in CLI, with [json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
|
|
127
|
+
- in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
|
|
128
|
+
|
|
129
|
+
Take a look at [tests](../../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggerganov/llama.cpp/pull/5978, https://github.com/ggerganov/llama.cpp/pull/6659 & https://github.com/ggerganov/llama.cpp/pull/6555).
|
|
130
|
+
|
|
131
|
+
Here is also a non-exhaustive list of **unsupported** features:
|
|
132
|
+
|
|
133
|
+
- `additionalProperties`: to be fixed in https://github.com/ggerganov/llama.cpp/pull/7840
|
|
134
|
+
- `minimum`, `exclusiveMinimum`, `maximum`, `exclusiveMaximum`
|
|
135
|
+
- `integer` constraints to be implemented in https://github.com/ggerganov/llama.cpp/pull/7797
|
|
136
|
+
- Remote `$ref`s in the C++ version (Python & JavaScript versions fetch https refs)
|
|
137
|
+
- Mixing `properties` w/ `anyOf` / `oneOf` in the same type (https://github.com/ggerganov/llama.cpp/issues/7703)
|
|
138
|
+
- `string` formats `uri`, `email`
|
|
139
|
+
- [`contains`](https://json-schema.org/draft/2020-12/json-schema-core#name-contains) / `minContains`
|
|
140
|
+
- `uniqueItems`
|
|
141
|
+
- `$anchor` (cf. [dereferencing](https://json-schema.org/draft/2020-12/json-schema-core#name-dereferencing))
|
|
142
|
+
- [`not`](https://json-schema.org/draft/2020-12/json-schema-core#name-not)
|
|
143
|
+
- [Conditionals](https://json-schema.org/draft/2020-12/json-schema-core#name-keywords-for-applying-subsche) `if` / `then` / `else` / `dependentSchemas`
|
|
144
|
+
- [`patternProperties`](https://json-schema.org/draft/2020-12/json-schema-core#name-patternproperties)
|
package/llama/grammars/json.gbnf
CHANGED
|
@@ -16,10 +16,10 @@ array ::=
|
|
|
16
16
|
string ::=
|
|
17
17
|
"\"" (
|
|
18
18
|
[^"\\\x7F\x00-\x1F] |
|
|
19
|
-
"\\" (["
|
|
19
|
+
"\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
|
|
20
20
|
)* "\"" ws
|
|
21
21
|
|
|
22
|
-
number ::= ("-"? ([0-9] | [1-9] [0-9]
|
|
22
|
+
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws
|
|
23
23
|
|
|
24
24
|
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
|
25
|
-
ws ::=
|
|
25
|
+
ws ::= | " " | "\n" [ \t]{0,20}
|
|
@@ -25,10 +25,10 @@ array ::=
|
|
|
25
25
|
string ::=
|
|
26
26
|
"\"" (
|
|
27
27
|
[^"\\\x7F\x00-\x1F] |
|
|
28
|
-
"\\" (["
|
|
28
|
+
"\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) # escapes
|
|
29
29
|
)* "\"" ws
|
|
30
30
|
|
|
31
|
-
number ::= ("-"? ([0-9] | [1-9] [0-9]
|
|
31
|
+
number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws
|
|
32
32
|
|
|
33
33
|
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
|
34
|
-
ws ::=
|
|
34
|
+
ws ::= | " " | "\n" [ \t]{0,20}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-llama-cpp",
|
|
3
|
-
"version": "3.0.0-beta.
|
|
3
|
+
"version": "3.0.0-beta.27",
|
|
4
4
|
"description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"type": "module",
|