node-llama-cpp 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/llamaEvaluator/LlamaModel.d.ts +15 -3
- package/dist/llamaEvaluator/LlamaModel.js +3 -3
- package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
- package/llama/addon.cpp +2 -2
- package/llamaBins/linux-arm64-16.node +0 -0
- package/llamaBins/linux-arm64-17.node +0 -0
- package/llamaBins/linux-arm64-18.node +0 -0
- package/llamaBins/linux-arm64-19.node +0 -0
- package/llamaBins/linux-arm64-20.node +0 -0
- package/llamaBins/linux-armv7l-16.node +0 -0
- package/llamaBins/linux-armv7l-17.node +0 -0
- package/llamaBins/linux-armv7l-18.node +0 -0
- package/llamaBins/linux-armv7l-19.node +0 -0
- package/llamaBins/linux-armv7l-20.node +0 -0
- package/llamaBins/linux-ppc64le-16.node +0 -0
- package/llamaBins/linux-ppc64le-17.node +0 -0
- package/llamaBins/linux-ppc64le-18.node +0 -0
- package/llamaBins/linux-ppc64le-19.node +0 -0
- package/llamaBins/linux-ppc64le-20.node +0 -0
- package/llamaBins/linux-x64-16.node +0 -0
- package/llamaBins/linux-x64-17.node +0 -0
- package/llamaBins/linux-x64-18.node +0 -0
- package/llamaBins/linux-x64-19.node +0 -0
- package/llamaBins/linux-x64-20.node +0 -0
- package/llamaBins/mac-arm64-16.node +0 -0
- package/llamaBins/mac-arm64-17.node +0 -0
- package/llamaBins/mac-arm64-18.node +0 -0
- package/llamaBins/mac-arm64-19.node +0 -0
- package/llamaBins/mac-arm64-20.node +0 -0
- package/llamaBins/mac-x64-16.node +0 -0
- package/llamaBins/mac-x64-17.node +0 -0
- package/llamaBins/mac-x64-18.node +0 -0
- package/llamaBins/mac-x64-19.node +0 -0
- package/llamaBins/mac-x64-20.node +0 -0
- package/llamaBins/win-x64-16.node +0 -0
- package/llamaBins/win-x64-17.node +0 -0
- package/llamaBins/win-x64-18.node +0 -0
- package/llamaBins/win-x64-19.node +0 -0
- package/llamaBins/win-x64-20.node +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -25,7 +25,7 @@ To disable this behavior set the environment variable `NODE_LLAMA_CPP_SKIP_DOWNL
|
|
|
25
25
|
```typescript
|
|
26
26
|
import {fileURLToPath} from "url";
|
|
27
27
|
import path from "path";
|
|
28
|
-
import {LlamaModel, LlamaChatSession} from "node-llama-cpp";
|
|
28
|
+
import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";
|
|
29
29
|
|
|
30
30
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
31
31
|
|
|
@@ -54,7 +54,7 @@ console.log("AI: " + a2);
|
|
|
54
54
|
```typescript
|
|
55
55
|
import {fileURLToPath} from "url";
|
|
56
56
|
import path from "path";
|
|
57
|
-
import {LlamaModel, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp";
|
|
57
|
+
import {LlamaModel, LlamaContext, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp";
|
|
58
58
|
|
|
59
59
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
60
60
|
|
|
@@ -98,7 +98,7 @@ console.log("AI: " + a2);
|
|
|
98
98
|
```typescript
|
|
99
99
|
import {fileURLToPath} from "url";
|
|
100
100
|
import path from "path";
|
|
101
|
-
import {LlamaModel, LlamaChatSession} from "node-llama-cpp";
|
|
101
|
+
import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";
|
|
102
102
|
|
|
103
103
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
104
104
|
|
|
@@ -7,7 +7,7 @@ export declare class LlamaModel {
|
|
|
7
7
|
* @param {number | null} [options.seed] - If null, a random seed will be used
|
|
8
8
|
* @param {number} [options.contextSize] - text context size
|
|
9
9
|
* @param {number} [options.batchSize] - prompt processing batch size
|
|
10
|
-
* @param {number} [options.
|
|
10
|
+
* @param {number} [options.gpuLayers] - number of layers to store in VRAM
|
|
11
11
|
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
|
|
12
12
|
* @param {boolean} [options.f16Kv] - use fp16 for KV cache
|
|
13
13
|
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
|
|
@@ -16,18 +16,30 @@ export declare class LlamaModel {
|
|
|
16
16
|
* @param {boolean} [options.useMlock] - force system to keep model in RAM
|
|
17
17
|
* @param {boolean} [options.embedding] - embedding mode only
|
|
18
18
|
*/
|
|
19
|
-
constructor({ modelPath, seed, contextSize, batchSize,
|
|
19
|
+
constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
|
|
20
|
+
/** path to the model on the filesystem */
|
|
20
21
|
modelPath: string;
|
|
22
|
+
/** If null, a random seed will be used */
|
|
21
23
|
seed?: number | null;
|
|
24
|
+
/** text context size */
|
|
22
25
|
contextSize?: number;
|
|
26
|
+
/** prompt processing batch size */
|
|
23
27
|
batchSize?: number;
|
|
24
|
-
|
|
28
|
+
/** number of layers to store in VRAM */
|
|
29
|
+
gpuLayers?: number;
|
|
30
|
+
/** if true, reduce VRAM usage at the cost of performance */
|
|
25
31
|
lowVram?: boolean;
|
|
32
|
+
/** use fp16 for KV cache */
|
|
26
33
|
f16Kv?: boolean;
|
|
34
|
+
/** the llama_eval() call computes all logits, not just the last one */
|
|
27
35
|
logitsAll?: boolean;
|
|
36
|
+
/** only load the vocabulary, no weights */
|
|
28
37
|
vocabOnly?: boolean;
|
|
38
|
+
/** use mmap if possible */
|
|
29
39
|
useMmap?: boolean;
|
|
40
|
+
/** force system to keep model in RAM */
|
|
30
41
|
useMlock?: boolean;
|
|
42
|
+
/** embedding mode only */
|
|
31
43
|
embedding?: boolean;
|
|
32
44
|
});
|
|
33
45
|
static get systemInfo(): string;
|
|
@@ -10,7 +10,7 @@ export class LlamaModel {
|
|
|
10
10
|
* @param {number | null} [options.seed] - If null, a random seed will be used
|
|
11
11
|
* @param {number} [options.contextSize] - text context size
|
|
12
12
|
* @param {number} [options.batchSize] - prompt processing batch size
|
|
13
|
-
* @param {number} [options.
|
|
13
|
+
* @param {number} [options.gpuLayers] - number of layers to store in VRAM
|
|
14
14
|
* @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
|
|
15
15
|
* @param {boolean} [options.f16Kv] - use fp16 for KV cache
|
|
16
16
|
* @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
|
|
@@ -19,12 +19,12 @@ export class LlamaModel {
|
|
|
19
19
|
* @param {boolean} [options.useMlock] - force system to keep model in RAM
|
|
20
20
|
* @param {boolean} [options.embedding] - embedding mode only
|
|
21
21
|
*/
|
|
22
|
-
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize,
|
|
22
|
+
constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
|
|
23
23
|
this._model = new LLAMAModel(modelPath, removeNullFields({
|
|
24
24
|
seed: seed != null ? Math.max(-1, seed) : undefined,
|
|
25
25
|
contextSize,
|
|
26
26
|
batchSize,
|
|
27
|
-
|
|
27
|
+
gpuLayers,
|
|
28
28
|
lowVram,
|
|
29
29
|
f16Kv,
|
|
30
30
|
logitsAll,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC;;;;;;;;;;;;;;;;OAgBG;IACH,YAAmB,EACf,SAAS,EAAE,IAAI,GAAG,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,CAAC,EAAE,SAAS,EAAE,
|
|
1
|
+
{"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC;;;;;;;;;;;;;;;;OAgBG;IACH,YAAmB,EACf,SAAS,EAAE,IAAI,GAAG,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EACpE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAqCrE;QACG,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,gBAAgB,CAAC;YACrD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,SAAS;YACT,OAAO;YACP,KAAK;YACL,SAAS;YACT,SAAS;YACT,OAAO;YACP,QAAQ;YACR,SAAS;SACZ,CAAC,CAAC,CAAC;IACR,CAAC;IAEM,MAAM,KAAK,UAAU;QACxB,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC;IACrC,CAAC;CACJ;AAED,SAAS,gBAAgB,CAAmB,GAAM;IAC9C,MAAM,MAAM,GAAM,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE;QACnB,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI;YACnB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;KAC1B;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
|
package/llama/addon.cpp
CHANGED
|
@@ -34,8 +34,8 @@ class LLAMAModel : public Napi::ObjectWrap<LLAMAModel> {
|
|
|
34
34
|
params.n_batch = options.Get("batchSize").As<Napi::Number>().Int32Value();
|
|
35
35
|
}
|
|
36
36
|
|
|
37
|
-
if (options.Has("
|
|
38
|
-
params.n_gpu_layers = options.Get("
|
|
37
|
+
if (options.Has("gpuLayers")) {
|
|
38
|
+
params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
if (options.Has("lowVram")) {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|