node-llama-cpp 2.8.4 → 2.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli/commands/ChatCommand.d.ts +1 -0
- package/dist/cli/commands/ChatCommand.js +11 -3
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/llamaEvaluator/LlamaContext.d.ts +1 -0
- package/dist/llamaEvaluator/LlamaContext.js +3 -0
- package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
- package/dist/utils/getBin.d.ts +1 -0
- package/llama/addon.cpp +8 -0
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/mac-arm64/ggml-metal.metal +303 -4
- package/llamaBins/mac-arm64/llama-addon.node +0 -0
- package/llamaBins/mac-x64/ggml-metal.metal +303 -4
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
</div>
|
|
17
17
|
|
|
18
|
-
✨ New! [Try the beta of version `3.0.0`](https://github.com/withcatai/node-llama-cpp/pull/105) ✨ (included: function calling, automatic chat wrapper detection, and more)
|
|
18
|
+
✨ New! [Try the beta of version `3.0.0`](https://github.com/withcatai/node-llama-cpp/pull/105) ✨ (included: function calling, automatic chat wrapper detection, embedding support, and more)
|
|
19
19
|
|
|
20
20
|
## Features
|
|
21
21
|
* Run a text generation model locally on your machine
|
|
@@ -32,6 +32,12 @@ export const ChatCommand = {
|
|
|
32
32
|
default: false,
|
|
33
33
|
description: "Print llama.cpp system info",
|
|
34
34
|
group: "Optional:"
|
|
35
|
+
})
|
|
36
|
+
.option("printTimings", {
|
|
37
|
+
type: "boolean",
|
|
38
|
+
default: false,
|
|
39
|
+
description: "Print llama.cpp timings",
|
|
40
|
+
group: "Optional:"
|
|
35
41
|
})
|
|
36
42
|
.option("systemPrompt", {
|
|
37
43
|
alias: "s",
|
|
@@ -157,12 +163,12 @@ export const ChatCommand = {
|
|
|
157
163
|
group: "Optional:"
|
|
158
164
|
});
|
|
159
165
|
},
|
|
160
|
-
async handler({ model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory }) {
|
|
166
|
+
async handler({ model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP, gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings }) {
|
|
161
167
|
try {
|
|
162
168
|
await RunChat({
|
|
163
169
|
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
|
|
164
170
|
topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
|
|
165
|
-
repeatPresencePenalty, maxTokens, noHistory
|
|
171
|
+
repeatPresencePenalty, maxTokens, noHistory, printTimings
|
|
166
172
|
});
|
|
167
173
|
}
|
|
168
174
|
catch (err) {
|
|
@@ -171,7 +177,7 @@ export const ChatCommand = {
|
|
|
171
177
|
}
|
|
172
178
|
}
|
|
173
179
|
};
|
|
174
|
-
async function RunChat({ model: modelArg, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory }) {
|
|
180
|
+
async function RunChat({ model: modelArg, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar: grammarArg, jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory, printTimings }) {
|
|
175
181
|
const { LlamaChatSession } = await import("../../llamaEvaluator/LlamaChatSession.js");
|
|
176
182
|
const { LlamaModel } = await import("../../llamaEvaluator/LlamaModel.js");
|
|
177
183
|
const { LlamaContext } = await import("../../llamaEvaluator/LlamaContext.js");
|
|
@@ -276,6 +282,8 @@ async function RunChat({ model: modelArg, systemInfo, systemPrompt, prompt, wrap
|
|
|
276
282
|
});
|
|
277
283
|
process.stdout.write(endColor);
|
|
278
284
|
console.log();
|
|
285
|
+
if (printTimings)
|
|
286
|
+
context.printTimings();
|
|
279
287
|
}
|
|
280
288
|
}
|
|
281
289
|
function getChatWrapper(wrapper, bos) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ChatCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/ChatCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC;AACrC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,OAAO,MAAM,wBAAwB,CAAC;AAC7C,OAAO,EAAC,0BAA0B,EAAE,uBAAuB,EAAC,MAAM,iBAAiB,CAAC;AACpF,OAAO,EAAC,sBAAsB,EAAC,MAAM,8CAA8C,CAAC;AACpF,OAAO,EAAC,wBAAwB,EAAC,MAAM,gDAAgD,CAAC;AACxF,OAAO,EAAC,uBAAuB,EAAC,MAAM,+CAA+C,CAAC;AACtF,OAAO,EAAC,mBAAmB,EAAC,MAAM,8CAA8C,CAAC;AAEjF,OAAO,EAAC,uBAAuB,EAAC,MAAM,+CAA+C,CAAC;AACtF,OAAO,EAAC,wBAAwB,EAAC,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAC,WAAW,EAAC,MAAM,4BAA4B,CAAC;AAGvD,MAAM,aAAa,GAAG,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,YAAY,CAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"ChatCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/ChatCommand.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,QAAQ,MAAM,UAAU,CAAC;AACrC,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,OAAO,MAAM,wBAAwB,CAAC;AAC7C,OAAO,EAAC,0BAA0B,EAAE,uBAAuB,EAAC,MAAM,iBAAiB,CAAC;AACpF,OAAO,EAAC,sBAAsB,EAAC,MAAM,8CAA8C,CAAC;AACpF,OAAO,EAAC,wBAAwB,EAAC,MAAM,gDAAgD,CAAC;AACxF,OAAO,EAAC,uBAAuB,EAAC,MAAM,+CAA+C,CAAC;AACtF,OAAO,EAAC,mBAAmB,EAAC,MAAM,8CAA8C,CAAC;AAEjF,OAAO,EAAC,uBAAuB,EAAC,MAAM,+CAA+C,CAAC;AACtF,OAAO,EAAC,wBAAwB,EAAC,MAAM,gBAAgB,CAAC;AACxD,OAAO,EAAC,WAAW,EAAC,MAAM,4BAA4B,CAAC;AAGvD,MAAM,aAAa,GAAG,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,YAAY,CAAU,CAAC;AA0BxF,MAAM,CAAC,MAAM,WAAW,GAAuC;IAC3D,OAAO,EAAE,MAAM;IACf,QAAQ,EAAE,yBAAyB;IACnC,OAAO,CAAC,KAAK;QACT,MAAM,qBAAqB,GAAG,wBAAwB,EAAE,CAAC;QAEzD,OAAO,KAAK;aACP,MAAM,CAAC,OAAO,EAAE;YACb,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,YAAY,EAAE,IAAI;YAClB,WAAW,EAAE,sCAAsC;YACnD,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,YAAY,EAAE;YAClB,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,6BAA6B;YAC1C,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,cAAc,EAAE;YACpB,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,yBAAyB;YACtC,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,cAAc,EAAE;YACpB,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,uBAAuB;YAChC,kBAAkB,EAAE,GAAG;YACvB,WAAW,EACP,wCAAwC;gBACxC,CAAC,qBAAqB,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC;YAC/G,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,QAAQ,EAAE;YACd,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,wEAAwE;YACrF,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,SAAS,EAAE;YACf,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,SAAmC;YAC5C,OAAO,EAAE,aAAa;YACtB,WAAW,EAAE,kGAAkG;YAC/G,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,aAAa,EAAE;YACnB,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,IAAI,GAAG,CAAC;YACjB,WAAW,EAAE,mCAAmC;YAChD,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,SAAS,EAAE;YACf,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,MAAgC;YACzC,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,OAAO,CAAoC;YACvG,WAAW,EAAE,0EAA0E;YACvF,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,uBAAuB,EAAE;YAC7B,KAAK,EAAE,CAAC,MAAM,CAAC;YACf,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,0HAA0H;YACvI,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,SAAS,EAAE;YACf,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,CAAC;YACV,WAAW,EAAE,uDAAuD;YACpE,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,aAAa,EAAE;YACnB,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,CAAC;YACV,WAAW,EAAE,6jBAA6jB;YAC1kB,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,MAAM,EAAE;YACZ,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,EAAE;YACX,WAAW,EAAE,4SAA4S;YACzT,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,MAAM,EAAE;YACZ,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,qRAAqR;YAClS,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,WAAW,EAAE;YACjB,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,mCAAmC;YAChD,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,eAAe,EAAE;YACrB,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,GAAG;YACZ,WAAW,EAAE,kFAAkF;YAC/F,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,yBAAyB,EAAE;YAC/B,KAAK,EAAE,KAAK;YACZ,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,EAAE;YACX,WAAW,EAAE,oFAAoF;YACjG,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,0BAA0B,EAAE;YAChC,KAAK,EAAE,MAAM;YACb,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,6FAA6F;YAC1G,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,wBAAwB,EAAE;YAC9B,KAAK,EAAE,KAAK;YACZ,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,yJAAyJ;YACtK,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,uBAAuB,EAAE;YAC7B,KAAK,EAAE,KAAK;YACZ,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,+IAA+I;YAC5J,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,WAAW,EAAE;YACjB,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,QAAQ;YACd,OAAO,EAAE,CAAC;YACV,WAAW,EAAE,kHAAkH;YAC/H,KAAK,EAAE,WAAW;SACrB,CAAC;aACD,MAAM,CAAC,WAAW,EAAE;YACjB,KAAK,EAAE,IAAI;YACX,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,KAAK;YACd,WAAW,EAAE,iCAAiC;YAC9C,KAAK,EAAE,WAAW;SACrB,CAAC,CAAC;IACX,CAAC;IACD,KAAK,CAAC,OAAO,CAAC,EACV,KAAK,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAC7D,OAAO,EAAE,qBAAqB,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAChE,SAAS,EAAE,aAAa,EAAE,uBAAuB,EAAE,wBAAwB,EAC3E,sBAAsB,EAAE,qBAAqB,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EACpF;QACG,IAAI;YACA,MAAM,OAAO,CAAC;gBACV,KAAK,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,qBAAqB,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI;gBACzH,IAAI,EAAE,SAAS,EAAE,uBAAuB,EAAE,aAAa,EAAE,wBAAwB,EAAE,sBAAsB;gBACzG,qBAAqB,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY;aAC5D,CAAC,CAAC;SACN;QAAC,OAAO,GAAG,EAAE;YACV,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACnB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SACnB;IACL,CAAC;CACJ,CAAC;AAGF,KAAK,UAAU,OAAO,CAAC,EACnB,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,UAAU,EAC5F,qBAAqB,EAAE,yBAAyB,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,uBAAuB,EAAE,aAAa,EACrI,wBAAwB,EAAE,sBAAsB,EAAE,qBAAqB,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EACjG;IACV,MAAM,EAAC,gBAAgB,EAAC,GAAG,MAAM,MAAM,CAAC,0CAA0C,CAAC,CAAC;IACpF,MAAM,EAAC,UAAU,EAAC,GAAG,MAAM,MAAM,CAAC,oCAAoC,CAAC,CAAC;IACxE,MAAM,EAAC,YAAY,EAAC,GAAG,MAAM,MAAM,CAAC,sCAAsC,CAAC,CAAC;IAC5E,MAAM,EAAC,YAAY,EAAC,GAAG,MAAM,MAAM,CAAC,sCAAsC,CAAC,CAAC;IAC5E,MAAM,EAAC,sBAAsB,EAAC,GAAG,MAAM,MAAM,CAAC,gDAAgD,CAAC,CAAC;IAEhG,IAAI,aAAa,GAAG,MAAM,IAAI,IAAI,CAAC;IACnC,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC;QACzB,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC;QAChD,SAAS,EAAE,SAAS,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;KACvD,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC;QAC7B,KAAK;QACL,WAAW;QACX,OAAO;KACV,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,yBAAyB,IAAI,IAAI;QAC7C,CAAC,CAAC,IAAI,sBAAsB,CACxB,MAAM,EAAE,CAAC,QAAQ,CACb,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,yBAAyB,CAAC,CACzD,CACJ;QACD,CAAC,CAAC,UAAU,KAAK,MAAM;YACnB,CAAC,CAAC,MAAM,YAAY,CAAC,MAAM,CAAC,UAAU,CAAC;YACvC,CAAC,CAAC,SAAS,CAAC;IACpB,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,8BAA8B;IAClE,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,wBAAwB;IAC5D,MAAM,aAAa,GAAG,cAAc,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IACnD,MAAM,OAAO,GAAG,IAAI,gBAAgB,CAAC;QACjC,OAAO;QACP,oBAAoB,EAAE,UAAU;QAChC,YAAY;QACZ,aAAa;KAChB,CAAC,CAAC;IAEH,IAAI,UAAU,IAAI,MAAM,IAAI,yBAAyB,IAAI,IAAI;QACzD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,kGAAkG,CAAC,CAAC,CAAC;IAEnI,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC;IAC/C,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC,CAAC;IAC/C,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAC9E,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,iBAAiB,CAAC,IAAI,aAAa,mBAAmB,uBAAuB,UAAU,CAAC,CAAC;IAEtH,IAAI,sBAAsB,IAAI,IAAI;QAC9B,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,2BAA2B,CAAC,IAAI,sBAAsB,EAAE,CAAC,CAAC;IAE3F,IAAI,qBAAqB,IAAI,IAAI;QAC7B,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,0BAA0B,CAAC,IAAI,qBAAqB,EAAE,CAAC,CAAC;IAEzF,IAAI,CAAC,wBAAwB;QACzB,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,8BAA8B,CAAC,WAAW,CAAC,CAAC;IAE7E,IAAI,yBAAyB,IAAI,IAAI;QACjC,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,2BAA2B,CAAC,IACrD,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,yBAAyB,CAAC,CACvF,EAAE,CAAC,CAAC;SACH,IAAI,UAAU,KAAK,MAAM;QAC1B,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,UAAU,EAAE,CAAC,CAAC;IAE9D,MAAM,OAAO,CAAC;QACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC;QACpC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC;QACnC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,sBAAsB,CAAC;KAC3C,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC,CAAC,CAAC;IAEH,iDAAiD;IACjD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;IAErD,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,0BAA0B,EAAE,CAAC,SAAS,CAAC,CAAC;IAEnF,KAAK,UAAU,SAAS;QACpB,MAAM,EAAE,GAAG,QAAQ,CAAC,eAAe,CAAC;YAChC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,OAAO,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,EAAE;SACvC,CAAC,CAAC;QAEH,MAAM,GAAG,GAAW,MAAM,IAAI,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;QAC3F,EAAE,CAAC,KAAK,EAAE,CAAC;QAEX,OAAO,GAAG,CAAC;IACf,CAAC;IAED,iDAAiD;IACjD,OAAO,IAAI,EAAE;QACT,MAAM,KAAK,GAAG,aAAa,IAAI,IAAI;YAC/B,CAAC,CAAC,aAAa;YACf,CAAC,CAAC,MAAM,SAAS,EAAE,CAAC;QAExB,IAAI,aAAa,IAAI,IAAI,EAAE;YACvB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;YAC/C,aAAa,GAAG,IAAI,CAAC;SACxB;;YACG,MAAM,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAEjC,IAAI,KAAK,KAAK,OAAO;YACjB,MAAM;QAEV,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QAE3C,MAAM,CAAC,UAAU,EAAE,QAAQ,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAEpE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QACjC,MAAM,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE;YACxB,OAAO;YACP,WAAW;YACX,IAAI;YACJ,IAAI;YACJ,aAAa,EAAE;gBACX,OAAO,EAAE,aAAa;gBACtB,gBAAgB,EAAE,sBAAsB,IAAI,IAAI,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,SAAS;gBACrF,eAAe,EAAE,qBAAqB,IAAI,IAAI,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,SAAS;gBAClF,eAAe,EAAE,wBAAwB;gBACzC,UAAU,EAAE,uBAAuB;aACtC;YACD,SAAS,EAAE,SAAS,KAAK,CAAC,CAAC;gBACvB,CAAC,CAAC,OAAO,CAAC,cAAc,EAAE;gBAC1B,CAAC,CAAC,SAAS,IAAI,CAAC;oBACZ,CAAC,CAAC,SAAS;oBACX,CAAC,CAAC,SAAS;YACnB,OAAO,CAAC,KAAK;gBACT,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACxD,CAAC;SACJ,CAAC,CAAC;QACH,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,EAAE,CAAC;QAEd,IAAI,YAAY;YACZ,OAAO,CAAC,YAAY,EAAE,CAAC;KAC9B;AACL,CAAC;AAED,SAAS,cAAc,CAAC,OAA+B,EAAE,GAAkB;IACvE,QAAQ,OAAO,EAAE;QACb,KAAK,SAAS;YACV,OAAO,IAAI,wBAAwB,EAAE,CAAC;QAC1C,KAAK,WAAW;YACZ,OAAO,IAAI,sBAAsB,EAAE,CAAC;QACxC,KAAK,QAAQ;YACT,OAAO,IAAI,uBAAuB,EAAE,CAAC;QACzC,KAAK,YAAY;YACb,OAAO,IAAI,uBAAuB,EAAE,CAAC;QACzC,QAAQ;KACX;IAED,IAAI,OAAO,KAAK,MAAM,EAAE;QACpB,MAAM,WAAW,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;QAE7C,IAAI,WAAW,IAAI,IAAI;YACnB,OAAO,IAAI,WAAW,EAAE,CAAC;QAE7B,OAAO,IAAI,wBAAwB,EAAE,CAAC;KACzC;IAED,KAAK,CAAC,OAAuB,CAAC,CAAC;IAE/B,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,OAAO,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAE9D,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AA8D5C,MAAM,OAAO,YAAY;IACJ,MAAM,CAAa;IACnB,IAAI,CAAe;IACnB,WAAW,CAAU;IAC9B,cAAc,CAAU;IAEhC,gBAAgB;IACA,YAAY,CAAgB;IAG5C;;OAEG;IACH,YAAmB,EACf,KAAK,EACL,UAAU,GAAG,IAAI,EACjB,OAAO,EACP,IAAI,GAAG,KAAK,CAAC,eAAe,CAAC,IAAI,EACjC,WAAW,GAAG,KAAK,CAAC,eAAe,CAAC,WAAW,EAC/C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,OAAO,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EACrB;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,gBAAgB,CAAC;YACxD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,SAAS;YACT,SAAS;YACT,OAAO;SACV,CAAC,CAAC,CAAC;QACJ,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;QAC9B,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;QACzB,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;QAE5B,IAAI,UAAU,EAAE;YACZ,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;SACrD;IACL,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,IAAI,IAAI,KAAK,EAAE;YACX,OAAO,IAAI,WAAW,EAAE,CAAC;QAE7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAA6B;QACvC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YACnB,OAAO,EAAE,CAAC;QAEd,IAAI,MAAM,YAAY,WAAW;YAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEpC,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,UAAU;QACb,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAEpC,IAAI,OAAO,KAAK,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QAEhB,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAElC,IAAI,OAAO,IAAI,IAAI;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAEM,cAAc;QACjB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;IACtC,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB,EAAE,EACxC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,WAAW,EACxD,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,sBAAsB,EACtB,aAAa,KAIb,EAAE;QACF,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE;YAChC,MAAM,UAAU,GAAY,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YAE3E,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;SAC5B;QAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YACvB,OAAO;QAEX,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAU,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,gBAAgB,CAAC;gBACvE,WAAW;gBACX,IAAI;gBACJ,IAAI;gBACJ,aAAa,EAAE,aAAa,EAAE,OAAO;gBACrC,mBAAmB,EAAE,aAAa,EAAE,YAAY,YAAY,QAAQ;oBAChE,CAAC,CAAC,aAAa,CAAC,YAAY,EAAE;oBAC9B,CAAC,CAAC,aAAa,EAAE,YAAY;gBACjC,4BAA4B,EAAE,aAAa,EAAE,eAAe;gBAC5D,6BAA6B,EAAE,aAAa,EAAE,gBAAgB;gBAC9D,sBAAsB,EAAE,sBAAsB,EAAE,MAAM;aACzD,CAAC,CAAC,CAAC;YAEJ,mCAAmC;YACnC,IAAI,SAAS,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAClC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CACJ"}
|
|
1
|
+
{"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAE9D,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AA8D5C,MAAM,OAAO,YAAY;IACJ,MAAM,CAAa;IACnB,IAAI,CAAe;IACnB,WAAW,CAAU;IAC9B,cAAc,CAAU;IAEhC,gBAAgB;IACA,YAAY,CAAgB;IAG5C;;OAEG;IACH,YAAmB,EACf,KAAK,EACL,UAAU,GAAG,IAAI,EACjB,OAAO,EACP,IAAI,GAAG,KAAK,CAAC,eAAe,CAAC,IAAI,EACjC,WAAW,GAAG,KAAK,CAAC,eAAe,CAAC,WAAW,EAC/C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,OAAO,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EACrB;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,gBAAgB,CAAC;YACxD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,SAAS;YACT,SAAS;YACT,OAAO;SACV,CAAC,CAAC,CAAC;QACJ,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;QAC9B,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;QACzB,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;QAE5B,IAAI,UAAU,EAAE;YACZ,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;SACrD;IACL,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,IAAI,IAAI,KAAK,EAAE;YACX,OAAO,IAAI,WAAW,EAAE,CAAC;QAE7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAA6B;QACvC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YACnB,OAAO,EAAE,CAAC;QAEd,IAAI,MAAM,YAAY,WAAW;YAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEpC,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,UAAU;QACb,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAEpC,IAAI,OAAO,KAAK,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QAEhB,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAElC,IAAI,OAAO,IAAI,IAAI;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAEM,cAAc;QACjB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;IACtC,CAAC;IAEM,YAAY;QACf,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;IAC7B,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB,EAAE,EACxC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,WAAW,EACxD,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,sBAAsB,EACtB,aAAa,KAIb,EAAE;QACF,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE;YAChC,MAAM,UAAU,GAAY,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YAE3E,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;SAC5B;QAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YACvB,OAAO;QAEX,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAU,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,gBAAgB,CAAC;gBACvE,WAAW;gBACX,IAAI;gBACJ,IAAI;gBACJ,aAAa,EAAE,aAAa,EAAE,OAAO;gBACrC,mBAAmB,EAAE,aAAa,EAAE,YAAY,YAAY,QAAQ;oBAChE,CAAC,CAAC,aAAa,CAAC,YAAY,EAAE;oBAC9B,CAAC,CAAC,aAAa,EAAE,YAAY;gBACjC,4BAA4B,EAAE,aAAa,EAAE,eAAe;gBAC5D,6BAA6B,EAAE,aAAa,EAAE,gBAAgB;gBAC9D,sBAAsB,EAAE,sBAAsB,EAAE,MAAM;aACzD,CAAC,CAAC,CAAC;YAEJ,mCAAmC;YACnC,IAAI,SAAS,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAClC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CACJ"}
|
package/dist/utils/getBin.d.ts
CHANGED
package/llama/addon.cpp
CHANGED
|
@@ -215,6 +215,13 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
215
215
|
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
|
|
216
216
|
return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
|
|
217
217
|
}
|
|
218
|
+
|
|
219
|
+
Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
|
|
220
|
+
llama_print_timings(ctx);
|
|
221
|
+
llama_reset_timings(ctx);
|
|
222
|
+
return info.Env().Undefined();
|
|
223
|
+
}
|
|
224
|
+
|
|
218
225
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
219
226
|
int token = info[0].As<Napi::Number>().Int32Value();
|
|
220
227
|
std::stringstream ss;
|
|
@@ -242,6 +249,7 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
|
|
|
242
249
|
InstanceMethod("getContextSize", &LLAMAContext::GetContextSize),
|
|
243
250
|
InstanceMethod("getTokenString", &LLAMAContext::GetTokenString),
|
|
244
251
|
InstanceMethod("eval", &LLAMAContext::Eval),
|
|
252
|
+
InstanceMethod("printTimings", &LLAMAContext::PrintTimings),
|
|
245
253
|
}));
|
|
246
254
|
}
|
|
247
255
|
};
|
package/llama/gitRelease.bundle
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1775,9 +1775,29 @@ kernel void kernel_rope(
|
|
|
1775
1775
|
template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
|
|
1776
1776
|
template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
|
|
1777
1777
|
|
|
1778
|
-
|
|
1778
|
+
typedef void (im2col_t)(
|
|
1779
1779
|
device const float * x,
|
|
1780
|
-
device
|
|
1780
|
+
device char * dst,
|
|
1781
|
+
constant int32_t & ofs0,
|
|
1782
|
+
constant int32_t & ofs1,
|
|
1783
|
+
constant int32_t & IW,
|
|
1784
|
+
constant int32_t & IH,
|
|
1785
|
+
constant int32_t & CHW,
|
|
1786
|
+
constant int32_t & s0,
|
|
1787
|
+
constant int32_t & s1,
|
|
1788
|
+
constant int32_t & p0,
|
|
1789
|
+
constant int32_t & p1,
|
|
1790
|
+
constant int32_t & d0,
|
|
1791
|
+
constant int32_t & d1,
|
|
1792
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
1793
|
+
uint3 tgpg[[threadgroups_per_grid]],
|
|
1794
|
+
uint3 tpitg[[thread_position_in_threadgroup]],
|
|
1795
|
+
uint3 ntg[[threads_per_threadgroup]]);
|
|
1796
|
+
|
|
1797
|
+
template <typename T>
|
|
1798
|
+
kernel void kernel_im2col(
|
|
1799
|
+
device const float * x,
|
|
1800
|
+
device char * dst,
|
|
1781
1801
|
constant int32_t & ofs0,
|
|
1782
1802
|
constant int32_t & ofs1,
|
|
1783
1803
|
constant int32_t & IW,
|
|
@@ -1800,14 +1820,19 @@ kernel void kernel_im2col_f16(
|
|
|
1800
1820
|
(tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
|
|
1801
1821
|
(tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
|
|
1802
1822
|
|
|
1823
|
+
device T * pdst = (device T *) (dst);
|
|
1824
|
+
|
|
1803
1825
|
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
|
1804
|
-
|
|
1826
|
+
pdst[offset_dst] = 0.0f;
|
|
1805
1827
|
} else {
|
|
1806
1828
|
const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
|
|
1807
|
-
|
|
1829
|
+
pdst[offset_dst] = x[offset_src + iih * IW + iiw];
|
|
1808
1830
|
}
|
|
1809
1831
|
}
|
|
1810
1832
|
|
|
1833
|
+
template [[host_name("kernel_im2col_f32")]] kernel im2col_t kernel_im2col<float>;
|
|
1834
|
+
template [[host_name("kernel_im2col_f16")]] kernel im2col_t kernel_im2col<half>;
|
|
1835
|
+
|
|
1811
1836
|
kernel void kernel_upscale_f32(
|
|
1812
1837
|
device const char * src0,
|
|
1813
1838
|
device char * dst,
|
|
@@ -2459,6 +2484,12 @@ typedef struct {
|
|
|
2459
2484
|
} block_iq2_xs;
|
|
2460
2485
|
// 74 bytes / block for QK_K = 256, so 2.3125 bpw
|
|
2461
2486
|
|
|
2487
|
+
typedef struct {
|
|
2488
|
+
half d;
|
|
2489
|
+
uint8_t qs[3*QK_K/8];
|
|
2490
|
+
} block_iq3_xxs;
|
|
2491
|
+
// 98 bytes / block for QK_K = 256, so 3.0625 bpw
|
|
2492
|
+
|
|
2462
2493
|
//====================================== dot products =========================
|
|
2463
2494
|
|
|
2464
2495
|
void kernel_mul_mv_q2_K_f32_impl(
|
|
@@ -3681,6 +3712,42 @@ constexpr constant static uint64_t iq2xs_grid[512] = {
|
|
|
3681
3712
|
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
|
3682
3713
|
};
|
|
3683
3714
|
|
|
3715
|
+
constexpr constant static uint32_t iq3xxs_grid[256] = {
|
|
3716
|
+
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
|
3717
|
+
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
|
3718
|
+
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
|
3719
|
+
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
|
3720
|
+
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
|
3721
|
+
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
|
3722
|
+
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
|
3723
|
+
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
|
3724
|
+
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
|
3725
|
+
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
|
3726
|
+
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
|
3727
|
+
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
|
3728
|
+
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
|
3729
|
+
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
|
3730
|
+
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
|
3731
|
+
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
|
3732
|
+
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
|
3733
|
+
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
|
3734
|
+
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
|
3735
|
+
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
|
3736
|
+
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
|
3737
|
+
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
|
3738
|
+
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
|
3739
|
+
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
|
3740
|
+
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
|
3741
|
+
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
|
3742
|
+
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
|
3743
|
+
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
|
3744
|
+
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
|
3745
|
+
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
|
3746
|
+
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
|
3747
|
+
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
|
3748
|
+
};
|
|
3749
|
+
|
|
3750
|
+
|
|
3684
3751
|
constexpr constant static uint8_t ksigns_iq2xs[128] = {
|
|
3685
3752
|
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
|
3686
3753
|
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
|
@@ -3970,6 +4037,143 @@ kernel void kernel_mul_mv_iq2_xs_f32(
|
|
|
3970
4037
|
kernel_mul_mv_iq2_xs_f32_impl(src0, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3, shared_values, tgpig, tiisg, sgitg);
|
|
3971
4038
|
}
|
|
3972
4039
|
|
|
4040
|
+
void kernel_mul_mv_iq3_xxs_f32_impl(
|
|
4041
|
+
device const void * src0,
|
|
4042
|
+
device const float * src1,
|
|
4043
|
+
device float * dst,
|
|
4044
|
+
constant int64_t & ne00,
|
|
4045
|
+
constant int64_t & ne01,
|
|
4046
|
+
constant int64_t & ne02,
|
|
4047
|
+
constant int64_t & ne10,
|
|
4048
|
+
constant int64_t & ne12,
|
|
4049
|
+
constant int64_t & ne0,
|
|
4050
|
+
constant int64_t & ne1,
|
|
4051
|
+
constant uint & r2,
|
|
4052
|
+
constant uint & r3,
|
|
4053
|
+
threadgroup int8_t * shared_values [[threadgroup(0)]],
|
|
4054
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
4055
|
+
uint tiisg[[thread_index_in_simdgroup]],
|
|
4056
|
+
uint sgitg[[simdgroup_index_in_threadgroup]]) {
|
|
4057
|
+
|
|
4058
|
+
const int nb = ne00/QK_K;
|
|
4059
|
+
const int r0 = tgpig.x;
|
|
4060
|
+
const int r1 = tgpig.y;
|
|
4061
|
+
const int im = tgpig.z;
|
|
4062
|
+
|
|
4063
|
+
const int first_row = (r0 * N_SIMDGROUP + sgitg) * N_DST;
|
|
4064
|
+
const int ib_row = first_row * nb;
|
|
4065
|
+
|
|
4066
|
+
const uint i12 = im%ne12;
|
|
4067
|
+
const uint i13 = im/ne12;
|
|
4068
|
+
|
|
4069
|
+
const uint offset0 = (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02);
|
|
4070
|
+
|
|
4071
|
+
device const block_iq3_xxs * x = (device const block_iq3_xxs *) src0 + ib_row + offset0;
|
|
4072
|
+
device const float * y = (device const float *) src1 + r1*ne10 + im*ne00*ne1;
|
|
4073
|
+
|
|
4074
|
+
float yl[32];
|
|
4075
|
+
float sumf[N_DST]={0.f}, all_sum;
|
|
4076
|
+
|
|
4077
|
+
const int nb32 = nb * (QK_K / 32);
|
|
4078
|
+
|
|
4079
|
+
threadgroup uint32_t * values = (threadgroup uint32_t *)shared_values;
|
|
4080
|
+
threadgroup uint8_t * shared_signs = (threadgroup uint8_t *)(values + 256);
|
|
4081
|
+
{
|
|
4082
|
+
int nval = 4;
|
|
4083
|
+
int pos = (32*sgitg + tiisg)*nval;
|
|
4084
|
+
for (int i = 0; i < nval; ++i) values[pos + i] = iq3xxs_grid[pos + i];
|
|
4085
|
+
nval = 2;
|
|
4086
|
+
pos = (32*sgitg + tiisg)*nval;
|
|
4087
|
+
for (int i = 0; i < nval; ++i) shared_signs[pos+i] = ksigns_iq2xs[pos+i];
|
|
4088
|
+
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
4089
|
+
}
|
|
4090
|
+
|
|
4091
|
+
#if QK_K == 256
|
|
4092
|
+
const int ix = tiisg;
|
|
4093
|
+
|
|
4094
|
+
device const float * y4 = y + 32 * ix;
|
|
4095
|
+
|
|
4096
|
+
for (int ib32 = ix; ib32 < nb32; ib32 += 32) {
|
|
4097
|
+
|
|
4098
|
+
for (int i = 0; i < 32; ++i) {
|
|
4099
|
+
yl[i] = y4[i];
|
|
4100
|
+
}
|
|
4101
|
+
|
|
4102
|
+
const int ibl = ib32 / (QK_K / 32);
|
|
4103
|
+
const int ib = ib32 % (QK_K / 32);
|
|
4104
|
+
|
|
4105
|
+
device const block_iq3_xxs * xr = x + ibl;
|
|
4106
|
+
device const uint8_t * q3 = xr->qs + 8 * ib;
|
|
4107
|
+
device const uint16_t * gas = (device const uint16_t *)(xr->qs + QK_K/4) + 2 * ib;
|
|
4108
|
+
device const half * dh = &xr->d;
|
|
4109
|
+
|
|
4110
|
+
for (int row = 0; row < N_DST; row++) {
|
|
4111
|
+
|
|
4112
|
+
const float db = dh[0];
|
|
4113
|
+
const uint32_t aux32 = gas[0] | (gas[1] << 16);
|
|
4114
|
+
const float d = db * (0.5f + (aux32 >> 28));
|
|
4115
|
+
|
|
4116
|
+
float2 sum = {0};
|
|
4117
|
+
for (int l = 0; l < 4; ++l) {
|
|
4118
|
+
const threadgroup uint8_t * grid1 = (const threadgroup uint8_t *)(values + q3[2*l+0]);
|
|
4119
|
+
const threadgroup uint8_t * grid2 = (const threadgroup uint8_t *)(values + q3[2*l+1]);
|
|
4120
|
+
const uint8_t signs = shared_signs[(aux32 >> 7*l) & 127];
|
|
4121
|
+
for (int j = 0; j < 4; ++j) {
|
|
4122
|
+
sum[0] += yl[8*l + j + 0] * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
|
|
4123
|
+
sum[1] += yl[8*l + j + 4] * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
|
|
4124
|
+
}
|
|
4125
|
+
}
|
|
4126
|
+
sumf[row] += d * (sum[0] + sum[1]);
|
|
4127
|
+
|
|
4128
|
+
dh += nb*sizeof(block_iq3_xxs)/2;
|
|
4129
|
+
q3 += nb*sizeof(block_iq3_xxs);
|
|
4130
|
+
gas += nb*sizeof(block_iq3_xxs)/2;
|
|
4131
|
+
}
|
|
4132
|
+
|
|
4133
|
+
y4 += 32 * 32;
|
|
4134
|
+
}
|
|
4135
|
+
#else
|
|
4136
|
+
// TODO
|
|
4137
|
+
#endif
|
|
4138
|
+
|
|
4139
|
+
for (int row = 0; row < N_DST; ++row) {
|
|
4140
|
+
all_sum = simd_sum(sumf[row]);
|
|
4141
|
+
if (tiisg == 0) {
|
|
4142
|
+
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum * 0.5f;
|
|
4143
|
+
}
|
|
4144
|
+
}
|
|
4145
|
+
}
|
|
4146
|
+
|
|
4147
|
+
[[host_name("kernel_mul_mv_iq3_xxs_f32")]]
|
|
4148
|
+
kernel void kernel_mul_mv_iq3_xxs_f32(
|
|
4149
|
+
device const void * src0,
|
|
4150
|
+
device const float * src1,
|
|
4151
|
+
device float * dst,
|
|
4152
|
+
constant int64_t & ne00,
|
|
4153
|
+
constant int64_t & ne01,
|
|
4154
|
+
constant int64_t & ne02,
|
|
4155
|
+
constant uint64_t & nb00,
|
|
4156
|
+
constant uint64_t & nb01,
|
|
4157
|
+
constant uint64_t & nb02,
|
|
4158
|
+
constant int64_t & ne10,
|
|
4159
|
+
constant int64_t & ne11,
|
|
4160
|
+
constant int64_t & ne12,
|
|
4161
|
+
constant uint64_t & nb10,
|
|
4162
|
+
constant uint64_t & nb11,
|
|
4163
|
+
constant uint64_t & nb12,
|
|
4164
|
+
constant int64_t & ne0,
|
|
4165
|
+
constant int64_t & ne1,
|
|
4166
|
+
constant uint & r2,
|
|
4167
|
+
constant uint & r3,
|
|
4168
|
+
threadgroup int8_t * shared_values [[threadgroup(0)]],
|
|
4169
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
4170
|
+
uint tiisg[[thread_index_in_simdgroup]],
|
|
4171
|
+
uint sgitg[[simdgroup_index_in_threadgroup]]) {
|
|
4172
|
+
|
|
4173
|
+
kernel_mul_mv_iq3_xxs_f32_impl(src0, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3, shared_values, tgpig, tiisg, sgitg);
|
|
4174
|
+
}
|
|
4175
|
+
|
|
4176
|
+
|
|
3973
4177
|
//============================= templates and their specializations =============================
|
|
3974
4178
|
|
|
3975
4179
|
// NOTE: this is not dequantizing - we are simply fitting the template
|
|
@@ -4287,6 +4491,33 @@ void dequantize_iq2_xs(device const block_iq2_xs * xb, short il, thread type4x4
|
|
|
4287
4491
|
}
|
|
4288
4492
|
}
|
|
4289
4493
|
|
|
4494
|
+
template <typename type4x4>
|
|
4495
|
+
void dequantize_iq3_xxs(device const block_iq3_xxs * xb, short il, thread type4x4 & reg) {
|
|
4496
|
+
// il is 0...15 for QK_K = 256 => index of block of 32 is il/2
|
|
4497
|
+
const float d = xb->d;
|
|
4498
|
+
const int ib32 = il/2;
|
|
4499
|
+
il = il%2;
|
|
4500
|
+
// il = 0 or 1. il = 0 processes the first 16 quants in a block of 32, il = 1 the second 16
|
|
4501
|
+
device const uint8_t * q3 = xb->qs + 8*ib32;
|
|
4502
|
+
device const uint16_t * gas = (device const uint16_t *)(xb->qs + QK_K/4) + 2*ib32;
|
|
4503
|
+
const uint32_t aux32 = gas[0] | (gas[1] << 16);
|
|
4504
|
+
const float dl = d * (0.5f + (aux32 >> 28)) * 0.5f;
|
|
4505
|
+
constant uint8_t * grid1 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+0]);
|
|
4506
|
+
constant uint8_t * grid2 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+1]);
|
|
4507
|
+
uint8_t signs = ksigns_iq2xs[(aux32 >> 14*il) & 127];
|
|
4508
|
+
for (int i = 0; i < 4; ++i) {
|
|
4509
|
+
reg[0][i] = dl * grid1[i] * (signs & kmask_iq2xs[i+0] ? -1.f : 1.f);
|
|
4510
|
+
reg[1][i] = dl * grid2[i] * (signs & kmask_iq2xs[i+4] ? -1.f : 1.f);
|
|
4511
|
+
}
|
|
4512
|
+
grid1 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+2]);
|
|
4513
|
+
grid2 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+3]);
|
|
4514
|
+
signs = ksigns_iq2xs[(aux32 >> (14*il+7)) & 127];
|
|
4515
|
+
for (int i = 0; i < 4; ++i) {
|
|
4516
|
+
reg[2][i] = dl * grid1[i] * (signs & kmask_iq2xs[i+0] ? -1.f : 1.f);
|
|
4517
|
+
reg[3][i] = dl * grid2[i] * (signs & kmask_iq2xs[i+4] ? -1.f : 1.f);
|
|
4518
|
+
}
|
|
4519
|
+
}
|
|
4520
|
+
|
|
4290
4521
|
template<typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread float4x4 &)>
|
|
4291
4522
|
kernel void kernel_get_rows(
|
|
4292
4523
|
device const void * src0,
|
|
@@ -4828,6 +5059,7 @@ template [[host_name("kernel_get_rows_q5_K")]] kernel get_rows_t kernel_get_rows
|
|
|
4828
5059
|
template [[host_name("kernel_get_rows_q6_K")]] kernel get_rows_t kernel_get_rows<block_q6_K, QK_NL, dequantize_q6_K>;
|
|
4829
5060
|
template [[host_name("kernel_get_rows_iq2_xxs")]] kernel get_rows_t kernel_get_rows<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
|
4830
5061
|
template [[host_name("kernel_get_rows_iq2_xs")]] kernel get_rows_t kernel_get_rows<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
|
5062
|
+
template [[host_name("kernel_get_rows_iq3_xxs")]] kernel get_rows_t kernel_get_rows<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
|
4831
5063
|
|
|
4832
5064
|
//
|
|
4833
5065
|
// matrix-matrix multiplication
|
|
@@ -4866,6 +5098,7 @@ template [[host_name("kernel_mul_mm_q5_K_f32")]] kernel mat_mm_t kernel_mul_mm<b
|
|
|
4866
5098
|
template [[host_name("kernel_mul_mm_q6_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q6_K, QK_NL, dequantize_q6_K>;
|
|
4867
5099
|
template [[host_name("kernel_mul_mm_iq2_xxs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
|
4868
5100
|
template [[host_name("kernel_mul_mm_iq2_xs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
|
5101
|
+
template [[host_name("kernel_mul_mm_iq3_xxs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
|
4869
5102
|
|
|
4870
5103
|
//
|
|
4871
5104
|
// indirect matrix-matrix multiplication
|
|
@@ -4916,6 +5149,7 @@ template [[host_name("kernel_mul_mm_id_q5_K_f32")]] kernel mat_mm_id_t kernel_mu
|
|
|
4916
5149
|
template [[host_name("kernel_mul_mm_id_q6_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q6_K, QK_NL, dequantize_q6_K>;
|
|
4917
5150
|
template [[host_name("kernel_mul_mm_id_iq2_xxs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
|
4918
5151
|
template [[host_name("kernel_mul_mm_id_iq2_xs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
|
5152
|
+
template [[host_name("kernel_mul_mm_id_iq3_xxs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
|
4919
5153
|
|
|
4920
5154
|
//
|
|
4921
5155
|
// matrix-vector multiplication
|
|
@@ -5818,3 +6052,68 @@ kernel void kernel_mul_mv_id_iq2_xs_f32(
|
|
|
5818
6052
|
tiisg,
|
|
5819
6053
|
sgitg);
|
|
5820
6054
|
}
|
|
6055
|
+
|
|
6056
|
+
[[host_name("kernel_mul_mv_id_iq3_xxs_f32")]]
|
|
6057
|
+
kernel void kernel_mul_mv_id_iq3_xxs_f32(
|
|
6058
|
+
device const char * ids,
|
|
6059
|
+
device const char * src1,
|
|
6060
|
+
device float * dst,
|
|
6061
|
+
constant uint64_t & nbi1,
|
|
6062
|
+
constant int64_t & ne00,
|
|
6063
|
+
constant int64_t & ne01,
|
|
6064
|
+
constant int64_t & ne02,
|
|
6065
|
+
constant uint64_t & nb00,
|
|
6066
|
+
constant uint64_t & nb01,
|
|
6067
|
+
constant uint64_t & nb02,
|
|
6068
|
+
constant int64_t & ne10,
|
|
6069
|
+
constant int64_t & ne11,
|
|
6070
|
+
constant int64_t & ne12,
|
|
6071
|
+
constant int64_t & ne13,
|
|
6072
|
+
constant uint64_t & nb10,
|
|
6073
|
+
constant uint64_t & nb11,
|
|
6074
|
+
constant uint64_t & nb12,
|
|
6075
|
+
constant int64_t & ne0,
|
|
6076
|
+
constant int64_t & ne1,
|
|
6077
|
+
constant uint64_t & nb1,
|
|
6078
|
+
constant uint & r2,
|
|
6079
|
+
constant uint & r3,
|
|
6080
|
+
constant int & idx,
|
|
6081
|
+
device const char * src00,
|
|
6082
|
+
device const char * src01,
|
|
6083
|
+
device const char * src02,
|
|
6084
|
+
device const char * src03,
|
|
6085
|
+
device const char * src04,
|
|
6086
|
+
device const char * src05,
|
|
6087
|
+
device const char * src06,
|
|
6088
|
+
device const char * src07,
|
|
6089
|
+
threadgroup int8_t * shared_values [[threadgroup(0)]],
|
|
6090
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
6091
|
+
uint tiitg[[thread_index_in_threadgroup]],
|
|
6092
|
+
uint tiisg[[thread_index_in_simdgroup]],
|
|
6093
|
+
uint sgitg[[simdgroup_index_in_threadgroup]]) {
|
|
6094
|
+
device const char * src0[8] = {src00, src01, src02, src03, src04, src05, src06, src07};
|
|
6095
|
+
|
|
6096
|
+
const int64_t bid = tgpig.z/(ne12*ne13);
|
|
6097
|
+
|
|
6098
|
+
tgpig.z = tgpig.z%(ne12*ne13);
|
|
6099
|
+
|
|
6100
|
+
const int32_t id = ((device int32_t *) (ids + bid*nbi1))[idx];
|
|
6101
|
+
|
|
6102
|
+
kernel_mul_mv_iq3_xxs_f32_impl(
|
|
6103
|
+
src0[id],
|
|
6104
|
+
(device const float *) (src1 + bid*nb11),
|
|
6105
|
+
dst + bid*ne0,
|
|
6106
|
+
ne00,
|
|
6107
|
+
ne01,
|
|
6108
|
+
ne02,
|
|
6109
|
+
ne10,
|
|
6110
|
+
ne12,
|
|
6111
|
+
ne0,
|
|
6112
|
+
ne1,
|
|
6113
|
+
r2,
|
|
6114
|
+
r3,
|
|
6115
|
+
shared_values,
|
|
6116
|
+
tgpig,
|
|
6117
|
+
tiisg,
|
|
6118
|
+
sgitg);
|
|
6119
|
+
}
|
|
Binary file
|
|
@@ -1775,9 +1775,29 @@ kernel void kernel_rope(
|
|
|
1775
1775
|
template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
|
|
1776
1776
|
template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
|
|
1777
1777
|
|
|
1778
|
-
|
|
1778
|
+
typedef void (im2col_t)(
|
|
1779
1779
|
device const float * x,
|
|
1780
|
-
device
|
|
1780
|
+
device char * dst,
|
|
1781
|
+
constant int32_t & ofs0,
|
|
1782
|
+
constant int32_t & ofs1,
|
|
1783
|
+
constant int32_t & IW,
|
|
1784
|
+
constant int32_t & IH,
|
|
1785
|
+
constant int32_t & CHW,
|
|
1786
|
+
constant int32_t & s0,
|
|
1787
|
+
constant int32_t & s1,
|
|
1788
|
+
constant int32_t & p0,
|
|
1789
|
+
constant int32_t & p1,
|
|
1790
|
+
constant int32_t & d0,
|
|
1791
|
+
constant int32_t & d1,
|
|
1792
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
1793
|
+
uint3 tgpg[[threadgroups_per_grid]],
|
|
1794
|
+
uint3 tpitg[[thread_position_in_threadgroup]],
|
|
1795
|
+
uint3 ntg[[threads_per_threadgroup]]);
|
|
1796
|
+
|
|
1797
|
+
template <typename T>
|
|
1798
|
+
kernel void kernel_im2col(
|
|
1799
|
+
device const float * x,
|
|
1800
|
+
device char * dst,
|
|
1781
1801
|
constant int32_t & ofs0,
|
|
1782
1802
|
constant int32_t & ofs1,
|
|
1783
1803
|
constant int32_t & IW,
|
|
@@ -1800,14 +1820,19 @@ kernel void kernel_im2col_f16(
|
|
|
1800
1820
|
(tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
|
|
1801
1821
|
(tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
|
|
1802
1822
|
|
|
1823
|
+
device T * pdst = (device T *) (dst);
|
|
1824
|
+
|
|
1803
1825
|
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
|
1804
|
-
|
|
1826
|
+
pdst[offset_dst] = 0.0f;
|
|
1805
1827
|
} else {
|
|
1806
1828
|
const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
|
|
1807
|
-
|
|
1829
|
+
pdst[offset_dst] = x[offset_src + iih * IW + iiw];
|
|
1808
1830
|
}
|
|
1809
1831
|
}
|
|
1810
1832
|
|
|
1833
|
+
template [[host_name("kernel_im2col_f32")]] kernel im2col_t kernel_im2col<float>;
|
|
1834
|
+
template [[host_name("kernel_im2col_f16")]] kernel im2col_t kernel_im2col<half>;
|
|
1835
|
+
|
|
1811
1836
|
kernel void kernel_upscale_f32(
|
|
1812
1837
|
device const char * src0,
|
|
1813
1838
|
device char * dst,
|
|
@@ -2459,6 +2484,12 @@ typedef struct {
|
|
|
2459
2484
|
} block_iq2_xs;
|
|
2460
2485
|
// 74 bytes / block for QK_K = 256, so 2.3125 bpw
|
|
2461
2486
|
|
|
2487
|
+
typedef struct {
|
|
2488
|
+
half d;
|
|
2489
|
+
uint8_t qs[3*QK_K/8];
|
|
2490
|
+
} block_iq3_xxs;
|
|
2491
|
+
// 98 bytes / block for QK_K = 256, so 3.0625 bpw
|
|
2492
|
+
|
|
2462
2493
|
//====================================== dot products =========================
|
|
2463
2494
|
|
|
2464
2495
|
void kernel_mul_mv_q2_K_f32_impl(
|
|
@@ -3681,6 +3712,42 @@ constexpr constant static uint64_t iq2xs_grid[512] = {
|
|
|
3681
3712
|
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
|
3682
3713
|
};
|
|
3683
3714
|
|
|
3715
|
+
constexpr constant static uint32_t iq3xxs_grid[256] = {
|
|
3716
|
+
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
|
3717
|
+
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
|
3718
|
+
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
|
3719
|
+
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
|
3720
|
+
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
|
3721
|
+
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
|
3722
|
+
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
|
3723
|
+
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
|
3724
|
+
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
|
3725
|
+
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
|
3726
|
+
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
|
3727
|
+
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
|
3728
|
+
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
|
3729
|
+
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
|
3730
|
+
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
|
3731
|
+
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
|
3732
|
+
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
|
3733
|
+
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
|
3734
|
+
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
|
3735
|
+
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
|
3736
|
+
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
|
3737
|
+
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
|
3738
|
+
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
|
3739
|
+
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
|
3740
|
+
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
|
3741
|
+
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
|
3742
|
+
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
|
3743
|
+
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
|
3744
|
+
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
|
3745
|
+
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
|
3746
|
+
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
|
3747
|
+
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
|
3748
|
+
};
|
|
3749
|
+
|
|
3750
|
+
|
|
3684
3751
|
constexpr constant static uint8_t ksigns_iq2xs[128] = {
|
|
3685
3752
|
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
|
3686
3753
|
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
|
@@ -3970,6 +4037,143 @@ kernel void kernel_mul_mv_iq2_xs_f32(
|
|
|
3970
4037
|
kernel_mul_mv_iq2_xs_f32_impl(src0, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3, shared_values, tgpig, tiisg, sgitg);
|
|
3971
4038
|
}
|
|
3972
4039
|
|
|
4040
|
+
void kernel_mul_mv_iq3_xxs_f32_impl(
|
|
4041
|
+
device const void * src0,
|
|
4042
|
+
device const float * src1,
|
|
4043
|
+
device float * dst,
|
|
4044
|
+
constant int64_t & ne00,
|
|
4045
|
+
constant int64_t & ne01,
|
|
4046
|
+
constant int64_t & ne02,
|
|
4047
|
+
constant int64_t & ne10,
|
|
4048
|
+
constant int64_t & ne12,
|
|
4049
|
+
constant int64_t & ne0,
|
|
4050
|
+
constant int64_t & ne1,
|
|
4051
|
+
constant uint & r2,
|
|
4052
|
+
constant uint & r3,
|
|
4053
|
+
threadgroup int8_t * shared_values [[threadgroup(0)]],
|
|
4054
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
4055
|
+
uint tiisg[[thread_index_in_simdgroup]],
|
|
4056
|
+
uint sgitg[[simdgroup_index_in_threadgroup]]) {
|
|
4057
|
+
|
|
4058
|
+
const int nb = ne00/QK_K;
|
|
4059
|
+
const int r0 = tgpig.x;
|
|
4060
|
+
const int r1 = tgpig.y;
|
|
4061
|
+
const int im = tgpig.z;
|
|
4062
|
+
|
|
4063
|
+
const int first_row = (r0 * N_SIMDGROUP + sgitg) * N_DST;
|
|
4064
|
+
const int ib_row = first_row * nb;
|
|
4065
|
+
|
|
4066
|
+
const uint i12 = im%ne12;
|
|
4067
|
+
const uint i13 = im/ne12;
|
|
4068
|
+
|
|
4069
|
+
const uint offset0 = (i12/r2)*(nb*ne01) + (i13/r3)*(nb*ne01*ne02);
|
|
4070
|
+
|
|
4071
|
+
device const block_iq3_xxs * x = (device const block_iq3_xxs *) src0 + ib_row + offset0;
|
|
4072
|
+
device const float * y = (device const float *) src1 + r1*ne10 + im*ne00*ne1;
|
|
4073
|
+
|
|
4074
|
+
float yl[32];
|
|
4075
|
+
float sumf[N_DST]={0.f}, all_sum;
|
|
4076
|
+
|
|
4077
|
+
const int nb32 = nb * (QK_K / 32);
|
|
4078
|
+
|
|
4079
|
+
threadgroup uint32_t * values = (threadgroup uint32_t *)shared_values;
|
|
4080
|
+
threadgroup uint8_t * shared_signs = (threadgroup uint8_t *)(values + 256);
|
|
4081
|
+
{
|
|
4082
|
+
int nval = 4;
|
|
4083
|
+
int pos = (32*sgitg + tiisg)*nval;
|
|
4084
|
+
for (int i = 0; i < nval; ++i) values[pos + i] = iq3xxs_grid[pos + i];
|
|
4085
|
+
nval = 2;
|
|
4086
|
+
pos = (32*sgitg + tiisg)*nval;
|
|
4087
|
+
for (int i = 0; i < nval; ++i) shared_signs[pos+i] = ksigns_iq2xs[pos+i];
|
|
4088
|
+
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
4089
|
+
}
|
|
4090
|
+
|
|
4091
|
+
#if QK_K == 256
|
|
4092
|
+
const int ix = tiisg;
|
|
4093
|
+
|
|
4094
|
+
device const float * y4 = y + 32 * ix;
|
|
4095
|
+
|
|
4096
|
+
for (int ib32 = ix; ib32 < nb32; ib32 += 32) {
|
|
4097
|
+
|
|
4098
|
+
for (int i = 0; i < 32; ++i) {
|
|
4099
|
+
yl[i] = y4[i];
|
|
4100
|
+
}
|
|
4101
|
+
|
|
4102
|
+
const int ibl = ib32 / (QK_K / 32);
|
|
4103
|
+
const int ib = ib32 % (QK_K / 32);
|
|
4104
|
+
|
|
4105
|
+
device const block_iq3_xxs * xr = x + ibl;
|
|
4106
|
+
device const uint8_t * q3 = xr->qs + 8 * ib;
|
|
4107
|
+
device const uint16_t * gas = (device const uint16_t *)(xr->qs + QK_K/4) + 2 * ib;
|
|
4108
|
+
device const half * dh = &xr->d;
|
|
4109
|
+
|
|
4110
|
+
for (int row = 0; row < N_DST; row++) {
|
|
4111
|
+
|
|
4112
|
+
const float db = dh[0];
|
|
4113
|
+
const uint32_t aux32 = gas[0] | (gas[1] << 16);
|
|
4114
|
+
const float d = db * (0.5f + (aux32 >> 28));
|
|
4115
|
+
|
|
4116
|
+
float2 sum = {0};
|
|
4117
|
+
for (int l = 0; l < 4; ++l) {
|
|
4118
|
+
const threadgroup uint8_t * grid1 = (const threadgroup uint8_t *)(values + q3[2*l+0]);
|
|
4119
|
+
const threadgroup uint8_t * grid2 = (const threadgroup uint8_t *)(values + q3[2*l+1]);
|
|
4120
|
+
const uint8_t signs = shared_signs[(aux32 >> 7*l) & 127];
|
|
4121
|
+
for (int j = 0; j < 4; ++j) {
|
|
4122
|
+
sum[0] += yl[8*l + j + 0] * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
|
|
4123
|
+
sum[1] += yl[8*l + j + 4] * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
|
|
4124
|
+
}
|
|
4125
|
+
}
|
|
4126
|
+
sumf[row] += d * (sum[0] + sum[1]);
|
|
4127
|
+
|
|
4128
|
+
dh += nb*sizeof(block_iq3_xxs)/2;
|
|
4129
|
+
q3 += nb*sizeof(block_iq3_xxs);
|
|
4130
|
+
gas += nb*sizeof(block_iq3_xxs)/2;
|
|
4131
|
+
}
|
|
4132
|
+
|
|
4133
|
+
y4 += 32 * 32;
|
|
4134
|
+
}
|
|
4135
|
+
#else
|
|
4136
|
+
// TODO
|
|
4137
|
+
#endif
|
|
4138
|
+
|
|
4139
|
+
for (int row = 0; row < N_DST; ++row) {
|
|
4140
|
+
all_sum = simd_sum(sumf[row]);
|
|
4141
|
+
if (tiisg == 0) {
|
|
4142
|
+
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum * 0.5f;
|
|
4143
|
+
}
|
|
4144
|
+
}
|
|
4145
|
+
}
|
|
4146
|
+
|
|
4147
|
+
[[host_name("kernel_mul_mv_iq3_xxs_f32")]]
|
|
4148
|
+
kernel void kernel_mul_mv_iq3_xxs_f32(
|
|
4149
|
+
device const void * src0,
|
|
4150
|
+
device const float * src1,
|
|
4151
|
+
device float * dst,
|
|
4152
|
+
constant int64_t & ne00,
|
|
4153
|
+
constant int64_t & ne01,
|
|
4154
|
+
constant int64_t & ne02,
|
|
4155
|
+
constant uint64_t & nb00,
|
|
4156
|
+
constant uint64_t & nb01,
|
|
4157
|
+
constant uint64_t & nb02,
|
|
4158
|
+
constant int64_t & ne10,
|
|
4159
|
+
constant int64_t & ne11,
|
|
4160
|
+
constant int64_t & ne12,
|
|
4161
|
+
constant uint64_t & nb10,
|
|
4162
|
+
constant uint64_t & nb11,
|
|
4163
|
+
constant uint64_t & nb12,
|
|
4164
|
+
constant int64_t & ne0,
|
|
4165
|
+
constant int64_t & ne1,
|
|
4166
|
+
constant uint & r2,
|
|
4167
|
+
constant uint & r3,
|
|
4168
|
+
threadgroup int8_t * shared_values [[threadgroup(0)]],
|
|
4169
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
4170
|
+
uint tiisg[[thread_index_in_simdgroup]],
|
|
4171
|
+
uint sgitg[[simdgroup_index_in_threadgroup]]) {
|
|
4172
|
+
|
|
4173
|
+
kernel_mul_mv_iq3_xxs_f32_impl(src0, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3, shared_values, tgpig, tiisg, sgitg);
|
|
4174
|
+
}
|
|
4175
|
+
|
|
4176
|
+
|
|
3973
4177
|
//============================= templates and their specializations =============================
|
|
3974
4178
|
|
|
3975
4179
|
// NOTE: this is not dequantizing - we are simply fitting the template
|
|
@@ -4287,6 +4491,33 @@ void dequantize_iq2_xs(device const block_iq2_xs * xb, short il, thread type4x4
|
|
|
4287
4491
|
}
|
|
4288
4492
|
}
|
|
4289
4493
|
|
|
4494
|
+
template <typename type4x4>
|
|
4495
|
+
void dequantize_iq3_xxs(device const block_iq3_xxs * xb, short il, thread type4x4 & reg) {
|
|
4496
|
+
// il is 0...15 for QK_K = 256 => index of block of 32 is il/2
|
|
4497
|
+
const float d = xb->d;
|
|
4498
|
+
const int ib32 = il/2;
|
|
4499
|
+
il = il%2;
|
|
4500
|
+
// il = 0 or 1. il = 0 processes the first 16 quants in a block of 32, il = 1 the second 16
|
|
4501
|
+
device const uint8_t * q3 = xb->qs + 8*ib32;
|
|
4502
|
+
device const uint16_t * gas = (device const uint16_t *)(xb->qs + QK_K/4) + 2*ib32;
|
|
4503
|
+
const uint32_t aux32 = gas[0] | (gas[1] << 16);
|
|
4504
|
+
const float dl = d * (0.5f + (aux32 >> 28)) * 0.5f;
|
|
4505
|
+
constant uint8_t * grid1 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+0]);
|
|
4506
|
+
constant uint8_t * grid2 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+1]);
|
|
4507
|
+
uint8_t signs = ksigns_iq2xs[(aux32 >> 14*il) & 127];
|
|
4508
|
+
for (int i = 0; i < 4; ++i) {
|
|
4509
|
+
reg[0][i] = dl * grid1[i] * (signs & kmask_iq2xs[i+0] ? -1.f : 1.f);
|
|
4510
|
+
reg[1][i] = dl * grid2[i] * (signs & kmask_iq2xs[i+4] ? -1.f : 1.f);
|
|
4511
|
+
}
|
|
4512
|
+
grid1 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+2]);
|
|
4513
|
+
grid2 = (constant uint8_t *)(iq3xxs_grid + q3[4*il+3]);
|
|
4514
|
+
signs = ksigns_iq2xs[(aux32 >> (14*il+7)) & 127];
|
|
4515
|
+
for (int i = 0; i < 4; ++i) {
|
|
4516
|
+
reg[2][i] = dl * grid1[i] * (signs & kmask_iq2xs[i+0] ? -1.f : 1.f);
|
|
4517
|
+
reg[3][i] = dl * grid2[i] * (signs & kmask_iq2xs[i+4] ? -1.f : 1.f);
|
|
4518
|
+
}
|
|
4519
|
+
}
|
|
4520
|
+
|
|
4290
4521
|
template<typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread float4x4 &)>
|
|
4291
4522
|
kernel void kernel_get_rows(
|
|
4292
4523
|
device const void * src0,
|
|
@@ -4828,6 +5059,7 @@ template [[host_name("kernel_get_rows_q5_K")]] kernel get_rows_t kernel_get_rows
|
|
|
4828
5059
|
template [[host_name("kernel_get_rows_q6_K")]] kernel get_rows_t kernel_get_rows<block_q6_K, QK_NL, dequantize_q6_K>;
|
|
4829
5060
|
template [[host_name("kernel_get_rows_iq2_xxs")]] kernel get_rows_t kernel_get_rows<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
|
4830
5061
|
template [[host_name("kernel_get_rows_iq2_xs")]] kernel get_rows_t kernel_get_rows<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
|
5062
|
+
template [[host_name("kernel_get_rows_iq3_xxs")]] kernel get_rows_t kernel_get_rows<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
|
4831
5063
|
|
|
4832
5064
|
//
|
|
4833
5065
|
// matrix-matrix multiplication
|
|
@@ -4866,6 +5098,7 @@ template [[host_name("kernel_mul_mm_q5_K_f32")]] kernel mat_mm_t kernel_mul_mm<b
|
|
|
4866
5098
|
template [[host_name("kernel_mul_mm_q6_K_f32")]] kernel mat_mm_t kernel_mul_mm<block_q6_K, QK_NL, dequantize_q6_K>;
|
|
4867
5099
|
template [[host_name("kernel_mul_mm_iq2_xxs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
|
4868
5100
|
template [[host_name("kernel_mul_mm_iq2_xs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
|
5101
|
+
template [[host_name("kernel_mul_mm_iq3_xxs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
|
4869
5102
|
|
|
4870
5103
|
//
|
|
4871
5104
|
// indirect matrix-matrix multiplication
|
|
@@ -4916,6 +5149,7 @@ template [[host_name("kernel_mul_mm_id_q5_K_f32")]] kernel mat_mm_id_t kernel_mu
|
|
|
4916
5149
|
template [[host_name("kernel_mul_mm_id_q6_K_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_q6_K, QK_NL, dequantize_q6_K>;
|
|
4917
5150
|
template [[host_name("kernel_mul_mm_id_iq2_xxs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xxs, QK_NL, dequantize_iq2_xxs>;
|
|
4918
5151
|
template [[host_name("kernel_mul_mm_id_iq2_xs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_xs, QK_NL, dequantize_iq2_xs>;
|
|
5152
|
+
template [[host_name("kernel_mul_mm_id_iq3_xxs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq3_xxs, QK_NL, dequantize_iq3_xxs>;
|
|
4919
5153
|
|
|
4920
5154
|
//
|
|
4921
5155
|
// matrix-vector multiplication
|
|
@@ -5818,3 +6052,68 @@ kernel void kernel_mul_mv_id_iq2_xs_f32(
|
|
|
5818
6052
|
tiisg,
|
|
5819
6053
|
sgitg);
|
|
5820
6054
|
}
|
|
6055
|
+
|
|
6056
|
+
[[host_name("kernel_mul_mv_id_iq3_xxs_f32")]]
|
|
6057
|
+
kernel void kernel_mul_mv_id_iq3_xxs_f32(
|
|
6058
|
+
device const char * ids,
|
|
6059
|
+
device const char * src1,
|
|
6060
|
+
device float * dst,
|
|
6061
|
+
constant uint64_t & nbi1,
|
|
6062
|
+
constant int64_t & ne00,
|
|
6063
|
+
constant int64_t & ne01,
|
|
6064
|
+
constant int64_t & ne02,
|
|
6065
|
+
constant uint64_t & nb00,
|
|
6066
|
+
constant uint64_t & nb01,
|
|
6067
|
+
constant uint64_t & nb02,
|
|
6068
|
+
constant int64_t & ne10,
|
|
6069
|
+
constant int64_t & ne11,
|
|
6070
|
+
constant int64_t & ne12,
|
|
6071
|
+
constant int64_t & ne13,
|
|
6072
|
+
constant uint64_t & nb10,
|
|
6073
|
+
constant uint64_t & nb11,
|
|
6074
|
+
constant uint64_t & nb12,
|
|
6075
|
+
constant int64_t & ne0,
|
|
6076
|
+
constant int64_t & ne1,
|
|
6077
|
+
constant uint64_t & nb1,
|
|
6078
|
+
constant uint & r2,
|
|
6079
|
+
constant uint & r3,
|
|
6080
|
+
constant int & idx,
|
|
6081
|
+
device const char * src00,
|
|
6082
|
+
device const char * src01,
|
|
6083
|
+
device const char * src02,
|
|
6084
|
+
device const char * src03,
|
|
6085
|
+
device const char * src04,
|
|
6086
|
+
device const char * src05,
|
|
6087
|
+
device const char * src06,
|
|
6088
|
+
device const char * src07,
|
|
6089
|
+
threadgroup int8_t * shared_values [[threadgroup(0)]],
|
|
6090
|
+
uint3 tgpig[[threadgroup_position_in_grid]],
|
|
6091
|
+
uint tiitg[[thread_index_in_threadgroup]],
|
|
6092
|
+
uint tiisg[[thread_index_in_simdgroup]],
|
|
6093
|
+
uint sgitg[[simdgroup_index_in_threadgroup]]) {
|
|
6094
|
+
device const char * src0[8] = {src00, src01, src02, src03, src04, src05, src06, src07};
|
|
6095
|
+
|
|
6096
|
+
const int64_t bid = tgpig.z/(ne12*ne13);
|
|
6097
|
+
|
|
6098
|
+
tgpig.z = tgpig.z%(ne12*ne13);
|
|
6099
|
+
|
|
6100
|
+
const int32_t id = ((device int32_t *) (ids + bid*nbi1))[idx];
|
|
6101
|
+
|
|
6102
|
+
kernel_mul_mv_iq3_xxs_f32_impl(
|
|
6103
|
+
src0[id],
|
|
6104
|
+
(device const float *) (src1 + bid*nb11),
|
|
6105
|
+
dst + bid*ne0,
|
|
6106
|
+
ne00,
|
|
6107
|
+
ne01,
|
|
6108
|
+
ne02,
|
|
6109
|
+
ne10,
|
|
6110
|
+
ne12,
|
|
6111
|
+
ne0,
|
|
6112
|
+
ne1,
|
|
6113
|
+
r2,
|
|
6114
|
+
r3,
|
|
6115
|
+
shared_values,
|
|
6116
|
+
tgpig,
|
|
6117
|
+
tiisg,
|
|
6118
|
+
sgitg);
|
|
6119
|
+
}
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-llama-cpp",
|
|
3
|
-
"version": "2.8.
|
|
3
|
+
"version": "2.8.6",
|
|
4
4
|
"description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|