node-llama-cpp 2.5.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +19 -301
  2. package/dist/chatWrappers/{ChatMLPromptWrapper.d.ts → ChatMLChatPromptWrapper.d.ts} +1 -1
  3. package/dist/chatWrappers/{ChatMLPromptWrapper.js → ChatMLChatPromptWrapper.js} +2 -2
  4. package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +1 -0
  5. package/dist/chatWrappers/createChatWrapperByBos.js +2 -2
  6. package/dist/chatWrappers/createChatWrapperByBos.js.map +1 -1
  7. package/dist/cli/commands/BuildCommand.js +3 -1
  8. package/dist/cli/commands/BuildCommand.js.map +1 -1
  9. package/dist/cli/commands/ChatCommand.d.ts +8 -1
  10. package/dist/cli/commands/ChatCommand.js +88 -21
  11. package/dist/cli/commands/ChatCommand.js.map +1 -1
  12. package/dist/cli/commands/DownloadCommand.d.ts +2 -2
  13. package/dist/cli/commands/DownloadCommand.js +13 -38
  14. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  15. package/dist/config.d.ts +5 -0
  16. package/dist/config.js +7 -0
  17. package/dist/config.js.map +1 -1
  18. package/dist/index.d.ts +5 -4
  19. package/dist/index.js +3 -2
  20. package/dist/index.js.map +1 -1
  21. package/dist/llamaEvaluator/LlamaBins.d.ts +3 -3
  22. package/dist/llamaEvaluator/LlamaBins.js +2 -2
  23. package/dist/llamaEvaluator/LlamaBins.js.map +1 -1
  24. package/dist/llamaEvaluator/LlamaChatSession.d.ts +79 -2
  25. package/dist/llamaEvaluator/LlamaChatSession.js +52 -8
  26. package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
  27. package/dist/llamaEvaluator/LlamaContext.d.ts +60 -3
  28. package/dist/llamaEvaluator/LlamaContext.js +36 -4
  29. package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
  30. package/dist/llamaEvaluator/LlamaGrammar.d.ts +16 -3
  31. package/dist/llamaEvaluator/LlamaGrammar.js +23 -4
  32. package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -1
  33. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts +14 -0
  34. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js +16 -0
  35. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +1 -0
  36. package/dist/llamaEvaluator/LlamaModel.d.ts +46 -14
  37. package/dist/llamaEvaluator/LlamaModel.js +23 -16
  38. package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
  39. package/dist/state.d.ts +2 -0
  40. package/dist/state.js +8 -0
  41. package/dist/state.js.map +1 -0
  42. package/dist/utils/cloneLlamaCppRepo.d.ts +1 -0
  43. package/dist/utils/cloneLlamaCppRepo.js +62 -0
  44. package/dist/utils/cloneLlamaCppRepo.js.map +1 -0
  45. package/dist/utils/compileLLamaCpp.js +24 -6
  46. package/dist/utils/compileLLamaCpp.js.map +1 -1
  47. package/dist/utils/getBin.d.ts +21 -13
  48. package/dist/utils/gitReleaseBundles.d.ts +2 -0
  49. package/dist/utils/gitReleaseBundles.js +25 -0
  50. package/dist/utils/gitReleaseBundles.js.map +1 -0
  51. package/llama/addon.cpp +184 -110
  52. package/llama/binariesGithubRelease.json +1 -1
  53. package/llama/gitRelease.bundle +0 -0
  54. package/llama/toolchains/darwin.host-x64.target-arm64.cmake +8 -0
  55. package/llama/toolchains/linux.host-arm64.target-x64.cmake +5 -0
  56. package/llama/toolchains/linux.host-x64.target-arm64.cmake +5 -0
  57. package/llama/toolchains/linux.host-x64.target-arm71.cmake +5 -0
  58. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  59. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  60. package/llamaBins/linux-x64/llama-addon.node +0 -0
  61. package/llamaBins/mac-arm64/ggml-metal.metal +246 -79
  62. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  63. package/llamaBins/mac-x64/ggml-metal.metal +246 -79
  64. package/llamaBins/mac-x64/llama-addon.node +0 -0
  65. package/llamaBins/win-x64/llama-addon.node +0 -0
  66. package/package.json +10 -4
  67. package/dist/chatWrappers/ChatMLPromptWrapper.js.map +0 -1
  68. package/llamaBins/linux-ppc64le/llama-addon.node +0 -0
package/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  <p></p>
7
7
  </div>
8
8
 
9
- <div align="center">
9
+ <div align="center" class="main-badges">
10
10
 
11
11
  [![Build](https://github.com/withcatai/node-llama-cpp/actions/workflows/build.yml/badge.svg)](https://github.com/withcatai/node-llama-cpp/actions/workflows/build.yml)
12
12
  [![License](https://badgen.net/badge/color/MIT/green?label=license)](https://www.npmjs.com/package/node-llama-cpp)
@@ -15,6 +15,20 @@
15
15
 
16
16
  </div>
17
17
 
18
+ ## Features
19
+ * Run a text generation model locally on your machine
20
+ * Metal and CUDA support
21
+ * Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python
22
+ * Chat with a model using a chat wrapper
23
+ * Use the CLI to chat with a model without writing any code
24
+ * Up-to-date with the latest version of `llama.cpp`. Download and compile the latest release with a single CLI command.
25
+ * Force a model to generate output in a parseable format, like JSON
26
+
27
+ ## [Documentation](https://withcatai.github.io/node-llama-cpp/)
28
+ * [Getting started guide](https://withcatai.github.io/node-llama-cpp/guide/)
29
+ * [API reference](https://withcatai.github.io/node-llama-cpp/api/classes/LlamaModel)
30
+ * [CLI help](https://withcatai.github.io/node-llama-cpp/guide/cli/)
31
+
18
32
  ## Installation
19
33
  ```bash
20
34
  npm install --save node-llama-cpp
@@ -25,11 +39,7 @@ This package comes with pre-built binaries for macOS, Linux and Windows.
25
39
  If binaries are not available for your platform, it'll fallback to download the latest version of `llama.cpp` and build it from source with `cmake`.
26
40
  To disable this behavior set the environment variable `NODE_LLAMA_CPP_SKIP_DOWNLOAD` to `true`.
27
41
 
28
- ## Documentation
29
- ### [API reference](https://withcatai.github.io/node-llama-cpp/modules.html)
30
-
31
- ### Usage
32
- #### As a chatbot
42
+ ## Usage
33
43
  ```typescript
34
44
  import {fileURLToPath} from "url";
35
45
  import path from "path";
@@ -58,302 +68,10 @@ const a2 = await session.prompt(q2);
58
68
  console.log("AI: " + a2);
59
69
  ```
60
70
 
61
- ##### Custom prompt handling against the model
62
- ```typescript
63
- import {fileURLToPath} from "url";
64
- import path from "path";
65
- import {LlamaModel, LlamaContext, LlamaChatSession, ChatPromptWrapper} from "node-llama-cpp";
66
-
67
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
68
-
69
- export class MyCustomChatPromptWrapper extends ChatPromptWrapper {
70
- public override wrapPrompt(prompt: string, {systemPrompt, promptIndex}: {systemPrompt: string, promptIndex: number}) {
71
- if (promptIndex === 0) {
72
- return "SYSTEM: " + systemPrompt + "\nUSER: " + prompt + "\nASSISTANT:";
73
- } else {
74
- return "USER: " + prompt + "\nASSISTANT:";
75
- }
76
- }
77
-
78
- public override getStopStrings(): string[] {
79
- return ["USER:"];
80
- }
81
- }
82
-
83
- const model = new LlamaModel({
84
- modelPath: path.join(__dirname, "models", "codellama-13b.Q3_K_M.gguf")
85
- })
86
- const context = new LlamaContext({model});
87
- const session = new LlamaChatSession({
88
- context,
89
- promptWrapper: new MyCustomChatPromptWrapper() // by default, GeneralChatPromptWrapper is used
90
- });
91
-
92
-
93
- const q1 = "Hi there, how are you?";
94
- console.log("User: " + q1);
95
-
96
- const a1 = await session.prompt(q1);
97
- console.log("AI: " + a1);
98
-
99
-
100
- const q2 = "Summerize what you said";
101
- console.log("User: " + q2);
102
-
103
- const a2 = await session.prompt(q2);
104
- console.log("AI: " + a2);
105
- ```
106
-
107
- ##### Load existing conversation history
108
- ```typescript
109
- import {fileURLToPath} from "url";
110
- import path from "path";
111
- import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";
112
-
113
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
114
-
115
- const model = new LlamaModel({
116
- modelPath: path.join(__dirname, "models", "codellama-13b.Q3_K_M.gguf")
117
- })
118
- const context = new LlamaContext({model});
119
- const session = new LlamaChatSession({
120
- context,
121
- conversationHistory: [{
122
- prompt: `Remember the number 6 as "The number"`,
123
- response: "OK. I'll remember it"
124
- }]
125
- });
126
-
127
-
128
- const q2 = 'What is "The number"?';
129
- console.log("User: " + q2);
130
-
131
- const a2 = await session.prompt(q2);
132
- console.log("AI: " + a2);
133
- ```
134
-
135
- #### Raw
136
- ```typescript
137
- import {fileURLToPath} from "url";
138
- import path from "path";
139
- import {LlamaModel, LlamaContext, LlamaChatSession} from "node-llama-cpp";
140
-
141
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
142
-
143
- const model = new LlamaModel({
144
- modelPath: path.join(__dirname, "models", "codellama-13b.Q3_K_M.gguf")
145
- });
146
-
147
- const context = new LlamaContext({model});
148
-
149
- const q1 = "Hi there, how are you?";
150
- console.log("AI: " + q1);
151
-
152
- const tokens = context.encode(q1);
153
- const res: number[] = [];
154
- for await (const modelToken of context.evaluate(tokens)) {
155
- res.push(modelToken);
156
-
157
- // it's important to not concatinate the results as strings,
158
- // as doing so will break some characters (like some emojis) that are made of multiple tokens.
159
- // by using an array of tokens, we can decode them correctly together.
160
- const resString: string = context.decode(Uint32Array.from(res));
161
-
162
- const lastPart = resString.split("ASSISTANT:").reverse()[0];
163
- if (lastPart.includes("USER:"))
164
- break;
165
- }
166
-
167
- const a1 = context.decode(Uint32Array.from(res)).split("USER:")[0];
168
- console.log("AI: " + a1);
169
- ```
170
-
171
- #### With grammar
172
- Use this to direct the model to generate a specific format of text, like `JSON` for example.
173
-
174
- > **Note:** there's an issue with some grammars where the model won't stop generating output,
175
- > so it's advised to use it together with `maxTokens` set to the context size of the model
176
-
177
- ```typescript
178
- import {fileURLToPath} from "url";
179
- import path from "path";
180
- import {LlamaModel, LlamaGrammar, LlamaContext, LlamaChatSession} from "node-llama-cpp";
181
-
182
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
183
-
184
- const model = new LlamaModel({
185
- modelPath: path.join(__dirname, "models", "codellama-13b.Q3_K_M.gguf")
186
- })
187
- const grammar = await LlamaGrammar.getFor("json");
188
- const context = new LlamaContext({
189
- model,
190
- grammar
191
- });
192
- const session = new LlamaChatSession({context});
193
-
194
-
195
- const q1 = 'Create a JSON that contains a message saying "hi there"';
196
- console.log("User: " + q1);
197
-
198
- const a1 = await session.prompt(q1, {maxTokens: context.getContextSize()});
199
- console.log("AI: " + a1);
200
- console.log(JSON.parse(a1));
201
-
202
-
203
- const q2 = 'Add another field to the JSON with the key being "author" and the value being "Llama"';
204
- console.log("User: " + q2);
205
-
206
- const a2 = await session.prompt(q2, {maxTokens: context.getContextSize()});
207
- console.log("AI: " + a2);
208
- console.log(JSON.parse(a2));
209
- ```
210
-
211
- ### Metal and CUDA support
212
- **Metal:** `llama.cpp` is built with Metal support by default on macOS.
213
-
214
- **CUDA:** To load a version of `llama.cpp` that was compiled to use CUDA,
215
- you have to build it from source with the `--cuda` flag before running your code that imports `node-llama-cpp`.
216
-
217
- To do this, run this command inside of your project directory:
218
- ```bash
219
- npx node-llama-cpp download --cuda
220
- ```
221
-
222
- > If `cmake` is not installed on your machine, `node-llama-cpp` will automatically download `cmake` to an internal directory and try to use it to build `llama.cpp` from source.
223
- >
224
- > If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies).
225
-
226
- To troubleshoot CUDA issues, visit the [CUDA documentation](https://github.com/withcatai/node-llama-cpp/blob/master/docs/CUDA.md).
227
-
228
- ### CLI
229
- ```
230
- Usage: node-llama-cpp <command> [options]
231
-
232
- Commands:
233
- node-llama-cpp download Download a release of llama.cpp and compile it
234
- node-llama-cpp build Compile the currently downloaded llama.cpp
235
- node-llama-cpp clear [type] Clear files created by node-llama-cpp [aliases: clean]
236
- node-llama-cpp chat Chat with a Llama model
237
-
238
- Options:
239
- -h, --help Show help [boolean]
240
- -v, --version Show version number [boolean]
241
- ```
242
-
243
- #### `download` command
244
- ```
245
- node-llama-cpp download
246
-
247
- Download a release of llama.cpp and compile it
248
-
249
- Options:
250
- -h, --help Show help [boolean]
251
- --repo The GitHub repository to download a release of llama.cpp from. Can also be
252
- set via the NODE_LLAMA_CPP_REPO environment variable
253
- [string] [default: "ggerganov/llama.cpp"]
254
- --release The tag of the llama.cpp release to download. Set to "latest" to download t
255
- he latest release. Can also be set via the NODE_LLAMA_CPP_REPO_RELEASE envi
256
- ronment variable [string] [default: "latest"]
257
- -a, --arch The architecture to compile llama.cpp for [string]
258
- -t, --nodeTarget The Node.js version to compile llama.cpp for. Example: v18.0.0 [string]
259
- --metal Compile llama.cpp with Metal support. Enabled by default on macOS. Can be d
260
- isabled with "--no-metal". Can also be set via the NODE_LLAMA_CPP_METAL env
261
- ironment variable [boolean] [default: true]
262
- --cuda Compile llama.cpp with CUDA support. Can also be set via the NODE_LLAMA_CPP
263
- _CUDA environment variable [boolean] [default: false]
264
- --skipBuild, --sb Skip building llama.cpp after downloading it [boolean] [default: false]
265
- -v, --version Show version number [boolean]
266
- ```
267
-
268
- #### `build` command
269
- ```
270
- node-llama-cpp build
271
-
272
- Compile the currently downloaded llama.cpp
273
-
274
- Options:
275
- -h, --help Show help [boolean]
276
- -a, --arch The architecture to compile llama.cpp for [string]
277
- -t, --nodeTarget The Node.js version to compile llama.cpp for. Example: v18.0.0 [string]
278
- --metal Compile llama.cpp with Metal support. Enabled by default on macOS. Can be disabl
279
- ed with "--no-metal". Can also be set via the NODE_LLAMA_CPP_METAL environment v
280
- ariable [boolean] [default: true]
281
- --cuda Compile llama.cpp with CUDA support. Can also be set via the NODE_LLAMA_CPP_CUDA
282
- environment variable [boolean] [default: false]
283
- -v, --version Show version number [boolean]
284
- ```
285
-
286
- > To set custom cmake options that are supported by `llama.cpp`'s cmake build,
287
- > set an environment variable of the option prefixed with `NODE_LLAMA_CPP_CMAKE_OPTION_`.
288
-
289
- #### `clear` command
290
- ```
291
- node-llama-cpp clear [type]
292
-
293
- Clear files created by node-llama-cpp
294
-
295
- Options:
296
- -h, --help Show help [boolean]
297
- --type Files to clear
298
- [string] [choices: "source", "build", "cmake", "all"] [default: "all"]
299
- -v, --version Show version number [boolean]
300
- ```
301
-
302
- #### `chat` command
303
- ```
304
- node-llama-cpp chat
305
-
306
- Chat with a Llama model
307
-
308
- Required:
309
- -m, --model Llama model file to use for the chat [string] [required]
310
-
311
- Optional:
312
- -i, --systemInfo Print llama.cpp system info [boolean] [default: false]
313
- -s, --systemPrompt System prompt to use against the model. [default value: You are a helpful,
314
- respectful and honest assistant. Always answer as helpfully as possible. If
315
- a question does not make any sense, or is not factually coherent, explain
316
- why instead of answering something not correct. If you don't know the answe
317
- r to a question, please don't share false information.]
318
- [string] [default: "You are a helpful, respectful and honest assistant. Always answer as helpfully
319
- as possible.
320
- If a question does not make any sense, or is not factually coherent, explain why ins
321
- tead of answering something not correct. If you don't know the answer to a question, please don't
322
- share false information."]
323
- -w, --wrapper Chat wrapper to use. Use `auto` to automatically select a wrapper based on
324
- the model's BOS token
325
- [string] [choices: "auto", "general", "llamaChat", "chatML", "falconChat"] [default: "general"]
326
- -c, --contextSize Context size to use for the model [number] [default: 4096]
327
- -g, --grammar Restrict the model response to a specific grammar, like JSON for example
328
- [string] [choices: "text", "json", "list", "arithmetic", "japanese", "chess"] [default: "text"]
329
- --threads Number of threads to use for the evaluation of tokens [number] [default: 6]
330
- -t, --temperature Temperature is a hyperparameter that controls the randomness of the generat
331
- ed text. It affects the probability distribution of the model's output toke
332
- ns. A higher temperature (e.g., 1.5) makes the output more random and creat
333
- ive, while a lower temperature (e.g., 0.5) makes the output more focused, d
334
- eterministic, and conservative. The suggested temperature is 0.8, which pro
335
- vides a balance between randomness and determinism. At the extreme, a tempe
336
- rature of 0 will always pick the most likely next token, leading to identic
337
- al outputs in each run. Set to `0` to disable. [number] [default: 0]
338
- -k, --topK Limits the model to consider only the K most likely next tokens for samplin
339
- g at each step of sequence generation. An integer number between `1` and th
340
- e size of the vocabulary. Set to `0` to disable (which uses the full vocabu
341
- lary). Only relevant when `temperature` is set to a value greater than 0.
342
- [number] [default: 40]
343
- -p, --topP Dynamically selects the smallest set of tokens whose cumulative probability
344
- exceeds the threshold P, and samples the next token only from this set. A
345
- float number between `0` and `1`. Set to `1` to disable. Only relevant when
346
- `temperature` is set to a value greater than `0`. [number] [default: 0.95]
347
- --maxTokens, --mt Maximum number of tokens to generate in responses. Set to `0` to disable. S
348
- et to `-1` to set to the context size [number] [default: 0]
349
-
350
- Options:
351
- -h, --help Show help [boolean]
352
- -v, --version Show version number [boolean]
353
- ```
71
+ > For more examples, see the [getting started guide](https://withcatai.github.io/node-llama-cpp/guide/)
354
72
 
355
73
  ## Contributing
356
- To contribute to `node-llama-cpp` read [CONTRIBUTING.md](https://github.com/withcatai/node-llama-cpp/blob/master/DEVELOPMENT.md).
74
+ To contribute to `node-llama-cpp` read the [contribution guide](https://withcatai.github.io/node-llama-cpp/guide/contributing).
357
75
 
358
76
  ## Acknowledgements
359
77
  * llama.cpp: [ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp)
@@ -362,7 +80,7 @@ To contribute to `node-llama-cpp` read [CONTRIBUTING.md](https://github.com/with
362
80
  <br />
363
81
 
364
82
  <div align="center" width="360">
365
- <img alt="Star please" src="https://media.githubusercontent.com/media/withcatai/node-llama-cpp/master/assets/star.please.roundEdges.png" width="360px" margin="auto" />
83
+ <img alt="Star please" src="https://media.githubusercontent.com/media/withcatai/node-llama-cpp/master/assets/star.please.roundEdges.png" width="360" margin="auto" />
366
84
  <br/>
367
85
  <p align="right">
368
86
  <i>If you like this repo, star it ✨</i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
@@ -1,5 +1,5 @@
1
1
  import { ChatPromptWrapper } from "../ChatPromptWrapper.js";
2
- export declare class ChatMLPromptWrapper extends ChatPromptWrapper {
2
+ export declare class ChatMLChatPromptWrapper extends ChatPromptWrapper {
3
3
  readonly wrapperName: string;
4
4
  wrapPrompt(prompt: string, { systemPrompt, promptIndex, lastStopString, lastStopStringSuffix }: {
5
5
  systemPrompt: string;
@@ -1,7 +1,7 @@
1
1
  import { ChatPromptWrapper } from "../ChatPromptWrapper.js";
2
2
  import { getTextCompletion } from "../utils/getTextCompletion.js";
3
3
  // source: https://github.com/openai/openai-python/blob/120d225b91a8453e15240a49fb1c6794d8119326/chatml.md
4
- export class ChatMLPromptWrapper extends ChatPromptWrapper {
4
+ export class ChatMLChatPromptWrapper extends ChatPromptWrapper {
5
5
  wrapperName = "ChatML";
6
6
  wrapPrompt(prompt, { systemPrompt, promptIndex, lastStopString, lastStopStringSuffix }) {
7
7
  const previousCompletionEnd = (lastStopString ?? "") + (lastStopStringSuffix ?? "");
@@ -19,4 +19,4 @@ export class ChatMLPromptWrapper extends ChatPromptWrapper {
19
19
  return "<|im_end|>";
20
20
  }
21
21
  }
22
- //# sourceMappingURL=ChatMLPromptWrapper.js.map
22
+ //# sourceMappingURL=ChatMLChatPromptWrapper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ChatMLChatPromptWrapper.js","sourceRoot":"","sources":["../../src/chatWrappers/ChatMLChatPromptWrapper.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,iBAAiB,EAAC,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAEhE,0GAA0G;AAC1G,MAAM,OAAO,uBAAwB,SAAQ,iBAAiB;IAC1C,WAAW,GAAW,QAAQ,CAAC;IAE/B,UAAU,CAAC,MAAc,EAAE,EAAC,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,oBAAoB,EAE1G;QACG,MAAM,qBAAqB,GAAG,CAAC,cAAc,IAAI,EAAE,CAAC,GAAG,CAAC,oBAAoB,IAAI,EAAE,CAAC,CAAC;QAEpF,IAAI,WAAW,KAAK,CAAC,IAAI,YAAY,IAAI,EAAE;YACvC,OAAO,CAAC,iBAAiB,CAAC,qBAAqB,EAAE,sBAAsB,CAAC,IAAI,sBAAsB,CAAC;gBAC/F,YAAY,GAAG,gCAAgC,GAAG,MAAM,GAAG,qCAAqC,CAAC;;YAErG,OAAO,CAAC,iBAAiB,CAAC,qBAAqB,EAAE,gCAAgC,CAAC,IAAI,gCAAgC,CAAC;gBACnH,MAAM,GAAG,qCAAqC,CAAC;IAC3D,CAAC;IAEe,cAAc;QAC1B,OAAO,CAAC,YAAY,CAAC,CAAC;IAC1B,CAAC;IAEe,oBAAoB;QAChC,OAAO,YAAY,CAAC;IACxB,CAAC;CACJ"}
@@ -1,5 +1,5 @@
1
1
  import { LlamaChatPromptWrapper } from "./LlamaChatPromptWrapper.js";
2
- import { ChatMLPromptWrapper } from "./ChatMLPromptWrapper.js";
2
+ import { ChatMLChatPromptWrapper } from "./ChatMLChatPromptWrapper.js";
3
3
  export function getChatWrapperByBos(bos) {
4
4
  if (bos === "" || bos == null)
5
5
  return null;
@@ -7,7 +7,7 @@ export function getChatWrapperByBos(bos) {
7
7
  return LlamaChatPromptWrapper;
8
8
  }
9
9
  else if ("<|im_start|>system\n".startsWith(bos)) {
10
- return ChatMLPromptWrapper;
10
+ return ChatMLChatPromptWrapper;
11
11
  }
12
12
  return null;
13
13
  }
@@ -1 +1 @@
1
- {"version":3,"file":"createChatWrapperByBos.js","sourceRoot":"","sources":["../../src/chatWrappers/createChatWrapperByBos.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,sBAAsB,EAAC,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAC,mBAAmB,EAAC,MAAM,0BAA0B,CAAC;AAE7D,MAAM,UAAU,mBAAmB,CAAC,GAA8B;IAC9D,IAAI,GAAG,KAAK,EAAE,IAAI,GAAG,IAAI,IAAI;QACzB,OAAO,IAAI,CAAC;IAEhB,IAAI,qBAAqB,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;QACvC,OAAO,sBAAsB,CAAC;KACjC;SAAM,IAAI,sBAAsB,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;QAC/C,OAAO,mBAAmB,CAAC;KAC9B;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"createChatWrapperByBos.js","sourceRoot":"","sources":["../../src/chatWrappers/createChatWrapperByBos.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,sBAAsB,EAAC,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAC,uBAAuB,EAAC,MAAM,8BAA8B,CAAC;AAErE,MAAM,UAAU,mBAAmB,CAAC,GAA8B;IAC9D,IAAI,GAAG,KAAK,EAAE,IAAI,GAAG,IAAI,IAAI;QACzB,OAAO,IAAI,CAAC;IAEhB,IAAI,qBAAqB,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;QACvC,OAAO,sBAAsB,CAAC;KACjC;SAAM,IAAI,sBAAsB,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;QAC/C,OAAO,uBAAuB,CAAC;KAClC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
@@ -7,10 +7,12 @@ import { clearTempFolder } from "../../utils/clearTempFolder.js";
7
7
  import { defaultLlamaCppCudaSupport, defaultLlamaCppMetalSupport, llamaCppDirectory } from "../../config.js";
8
8
  import { downloadCmakeIfNeeded } from "../../utils/cmake.js";
9
9
  import withStatusLogs from "../../utils/withStatusLogs.js";
10
+ import { getIsInDocumentationMode } from "../../state.js";
10
11
  export const BuildCommand = {
11
12
  command: "build",
12
13
  describe: "Compile the currently downloaded llama.cpp",
13
14
  builder(yargs) {
15
+ const isInDocumentationMode = getIsInDocumentationMode();
14
16
  return yargs
15
17
  .option("arch", {
16
18
  alias: "a",
@@ -24,7 +26,7 @@ export const BuildCommand = {
24
26
  })
25
27
  .option("metal", {
26
28
  type: "boolean",
27
- default: defaultLlamaCppMetalSupport,
29
+ default: defaultLlamaCppMetalSupport || isInDocumentationMode,
28
30
  description: "Compile llama.cpp with Metal support. Enabled by default on macOS. Can be disabled with \"--no-metal\". Can also be set via the NODE_LLAMA_CPP_METAL environment variable"
29
31
  })
30
32
  .option("cuda", {
@@ -1 +1 @@
1
- {"version":3,"file":"BuildCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/BuildCommand.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;AAC/D,OAAO,OAAO,MAAM,wBAAwB,CAAC;AAC7C,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAC,0BAA0B,EAAE,2BAA2B,EAAE,iBAAiB,EAAC,MAAM,iBAAiB,CAAC;AAC3G,OAAO,EAAC,qBAAqB,EAAC,MAAM,sBAAsB,CAAC;AAC3D,OAAO,cAAc,MAAM,+BAA+B,CAAC;AAS3D,MAAM,CAAC,MAAM,YAAY,GAAwC;IAC7D,OAAO,EAAE,OAAO;IAChB,QAAQ,EAAE,4CAA4C;IACtD,OAAO,CAAC,KAAK;QACT,OAAO,KAAK;aACP,MAAM,CAAC,MAAM,EAAE;YACZ,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,2CAA2C;SAC3D,CAAC;aACD,MAAM,CAAC,YAAY,EAAE;YAClB,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,gEAAgE;SAChF,CAAC;aACD,MAAM,CAAC,OAAO,EAAE;YACb,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,2BAA2B;YACpC,WAAW,EAAE,2KAA2K;SAC3L,CAAC;aACD,MAAM,CAAC,MAAM,EAAE;YACZ,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,0BAA0B;YACnC,WAAW,EAAE,uGAAuG;SACvH,CAAC,CAAC;IACX,CAAC;IACD,OAAO,EAAE,oBAAoB;CAChC,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,EAAC,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,IAAI,EAAe;IACpF,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,iBAAiB,CAAC,CAAC,EAAE;QAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,yEAAyE,CAAC,CAAC,CAAC;QAClG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACnB;IAED,IAAI,KAAK,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,EAAE;QACxC,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;KACpD;IAED,IAAI,IAAI,EAAE;QACN,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;KACnD;IAED,MAAM,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,cAAc,CAAC;QACjB,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;QAC1C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC;QACzC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,6BAA6B,CAAC;KAClD,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,eAAe,CAAC;YAClB,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;YAC7B,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;YAC/C,cAAc,EAAE,IAAI;YACpB,KAAK;YACL,IAAI;SACP,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC;QACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC;QAC/C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC;QAC9C,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC;KACvD,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,eAAe,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;AACP,CAAC"}
1
+ {"version":3,"file":"BuildCommand.js","sourceRoot":"","sources":["../../../src/cli/commands/BuildCommand.ts"],"names":[],"mappings":"AAAA,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;AAC/D,OAAO,OAAO,MAAM,wBAAwB,CAAC;AAC7C,OAAO,EAAC,eAAe,EAAC,MAAM,gCAAgC,CAAC;AAC/D,OAAO,EAAC,0BAA0B,EAAE,2BAA2B,EAAE,iBAAiB,EAAC,MAAM,iBAAiB,CAAC;AAC3G,OAAO,EAAC,qBAAqB,EAAC,MAAM,sBAAsB,CAAC;AAC3D,OAAO,cAAc,MAAM,+BAA+B,CAAC;AAC3D,OAAO,EAAC,wBAAwB,EAAC,MAAM,gBAAgB,CAAC;AASxD,MAAM,CAAC,MAAM,YAAY,GAAwC;IAC7D,OAAO,EAAE,OAAO;IAChB,QAAQ,EAAE,4CAA4C;IACtD,OAAO,CAAC,KAAK;QACT,MAAM,qBAAqB,GAAG,wBAAwB,EAAE,CAAC;QAEzD,OAAO,KAAK;aACP,MAAM,CAAC,MAAM,EAAE;YACZ,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,2CAA2C;SAC3D,CAAC;aACD,MAAM,CAAC,YAAY,EAAE;YAClB,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,QAAQ;YACd,WAAW,EAAE,gEAAgE;SAChF,CAAC;aACD,MAAM,CAAC,OAAO,EAAE;YACb,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,2BAA2B,IAAI,qBAAqB;YAC7D,WAAW,EAAE,2KAA2K;SAC3L,CAAC;aACD,MAAM,CAAC,MAAM,EAAE;YACZ,IAAI,EAAE,SAAS;YACf,OAAO,EAAE,0BAA0B;YACnC,WAAW,EAAE,uGAAuG;SACvH,CAAC,CAAC;IACX,CAAC;IACD,OAAO,EAAE,oBAAoB;CAChC,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,EAAC,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,IAAI,EAAe;IACpF,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,iBAAiB,CAAC,CAAC,EAAE;QAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,yEAAyE,CAAC,CAAC,CAAC;QAClG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KACnB;IAED,IAAI,KAAK,IAAI,OAAO,CAAC,QAAQ,KAAK,QAAQ,EAAE;QACxC,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;KACpD;IAED,IAAI,IAAI,EAAE;QACN,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;KACnD;IAED,MAAM,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,cAAc,CAAC;QACjB,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC;QAC1C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC;QACzC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,6BAA6B,CAAC;KAClD,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,eAAe,CAAC;YAClB,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;YAC7B,UAAU,EAAE,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;YAC/C,cAAc,EAAE,IAAI;YACpB,KAAK;YACL,IAAI;SACP,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,CAAC;QACV,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC;QAC/C,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,yBAAyB,CAAC;QAC9C,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,kCAAkC,CAAC;KACvD,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,eAAe,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;AACP,CAAC"}
@@ -1,16 +1,23 @@
1
1
  import { CommandModule } from "yargs";
2
2
  import type { LlamaGrammar } from "../../llamaEvaluator/LlamaGrammar.js";
3
+ declare const modelWrappers: readonly ["auto", "general", "llamaChat", "chatML", "falconChat"];
3
4
  type ChatCommand = {
4
5
  model: string;
5
6
  systemInfo: boolean;
6
7
  systemPrompt: string;
7
- wrapper: "auto" | "general" | "llamaChat" | "chatML" | "falconChat";
8
+ prompt?: string;
9
+ wrapper: (typeof modelWrappers)[number];
8
10
  contextSize: number;
9
11
  grammar: "text" | Parameters<typeof LlamaGrammar.getFor>[0];
10
12
  threads: number;
11
13
  temperature: number;
12
14
  topK: number;
13
15
  topP: number;
16
+ repeatPenalty: number;
17
+ lastTokensRepeatPenalty: number;
18
+ penalizeRepeatingNewLine: boolean;
19
+ repeatFrequencyPenalty?: number;
20
+ repeatPresencePenalty?: number;
14
21
  maxTokens: number;
15
22
  };
16
23
  export declare const ChatCommand: CommandModule<object, ChatCommand>;