node-llama-cpp 1.3.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/README.md +100 -28
  2. package/dist/ChatPromptWrapper.d.ts +3 -0
  3. package/dist/ChatPromptWrapper.js.map +1 -1
  4. package/dist/chatWrappers/ChatMLPromptWrapper.d.ts +11 -0
  5. package/dist/chatWrappers/ChatMLPromptWrapper.js +19 -0
  6. package/dist/chatWrappers/ChatMLPromptWrapper.js.map +1 -0
  7. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +1 -0
  8. package/dist/chatWrappers/EmptyChatPromptWrapper.js +1 -0
  9. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +1 -1
  10. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +11 -1
  11. package/dist/chatWrappers/GeneralChatPromptWrapper.js +28 -4
  12. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +1 -1
  13. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +4 -1
  14. package/dist/chatWrappers/LlamaChatPromptWrapper.js +9 -5
  15. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +1 -1
  16. package/dist/chatWrappers/createChatWrapperByBos.d.ts +2 -0
  17. package/dist/chatWrappers/createChatWrapperByBos.js +14 -0
  18. package/dist/chatWrappers/createChatWrapperByBos.js.map +1 -0
  19. package/dist/cli/commands/BuildCommand.d.ts +3 -1
  20. package/dist/cli/commands/BuildCommand.js +24 -2
  21. package/dist/cli/commands/BuildCommand.js.map +1 -1
  22. package/dist/cli/commands/ChatCommand.d.ts +7 -1
  23. package/dist/cli/commands/ChatCommand.js +87 -12
  24. package/dist/cli/commands/ChatCommand.js.map +1 -1
  25. package/dist/cli/commands/ClearCommand.js +1 -1
  26. package/dist/cli/commands/ClearCommand.js.map +1 -1
  27. package/dist/cli/commands/DownloadCommand.d.ts +4 -1
  28. package/dist/cli/commands/DownloadCommand.js +71 -70
  29. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  30. package/dist/cli/commands/OnPostInstallCommand.js +4 -2
  31. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  32. package/dist/config.d.ts +5 -0
  33. package/dist/config.js +11 -1
  34. package/dist/config.js.map +1 -1
  35. package/dist/index.d.ts +5 -1
  36. package/dist/index.js +4 -1
  37. package/dist/index.js.map +1 -1
  38. package/dist/llamaEvaluator/LlamaBins.d.ts +3 -3
  39. package/dist/llamaEvaluator/LlamaBins.js +2 -2
  40. package/dist/llamaEvaluator/LlamaBins.js.map +1 -1
  41. package/dist/llamaEvaluator/LlamaChatSession.d.ts +7 -2
  42. package/dist/llamaEvaluator/LlamaChatSession.js +51 -11
  43. package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
  44. package/dist/llamaEvaluator/LlamaContext.d.ts +31 -2
  45. package/dist/llamaEvaluator/LlamaContext.js +74 -7
  46. package/dist/llamaEvaluator/LlamaContext.js.map +1 -1
  47. package/dist/llamaEvaluator/LlamaGrammar.d.ts +14 -0
  48. package/dist/llamaEvaluator/LlamaGrammar.js +30 -0
  49. package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -0
  50. package/dist/llamaEvaluator/LlamaModel.d.ts +49 -1
  51. package/dist/llamaEvaluator/LlamaModel.js +25 -9
  52. package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
  53. package/dist/types.d.ts +1 -0
  54. package/dist/types.js +2 -0
  55. package/dist/types.js.map +1 -0
  56. package/dist/utils/binariesGithubRelease.d.ts +6 -0
  57. package/dist/utils/binariesGithubRelease.js +15 -0
  58. package/dist/utils/binariesGithubRelease.js.map +1 -0
  59. package/dist/utils/compileLLamaCpp.d.ts +3 -1
  60. package/dist/utils/compileLLamaCpp.js +34 -4
  61. package/dist/utils/compileLLamaCpp.js.map +1 -1
  62. package/dist/utils/getBin.d.ts +18 -4
  63. package/dist/utils/getBin.js +4 -2
  64. package/dist/utils/getBin.js.map +1 -1
  65. package/dist/utils/getGrammarsFolder.d.ts +1 -0
  66. package/dist/utils/getGrammarsFolder.js +18 -0
  67. package/dist/utils/getGrammarsFolder.js.map +1 -0
  68. package/dist/utils/getTextCompletion.d.ts +3 -0
  69. package/dist/utils/getTextCompletion.js +12 -0
  70. package/dist/utils/getTextCompletion.js.map +1 -0
  71. package/dist/utils/removeNullFields.d.ts +1 -0
  72. package/dist/utils/removeNullFields.js +9 -0
  73. package/dist/utils/removeNullFields.js.map +1 -0
  74. package/dist/utils/spawnCommand.d.ts +2 -1
  75. package/dist/utils/spawnCommand.js +2 -2
  76. package/dist/utils/spawnCommand.js.map +1 -1
  77. package/llama/addon.cpp +180 -42
  78. package/llama/binariesGithubRelease.json +3 -0
  79. package/llama/binding.gyp +6 -3
  80. package/llama/grammars/README.md +91 -0
  81. package/llama/grammars/arithmetic.gbnf +6 -0
  82. package/llama/grammars/chess.gbnf +13 -0
  83. package/llama/grammars/japanese.gbnf +7 -0
  84. package/llama/grammars/json.gbnf +25 -0
  85. package/llama/grammars/list.gbnf +4 -0
  86. package/llamaBins/linux-arm64-16.node +0 -0
  87. package/llamaBins/linux-arm64-17.node +0 -0
  88. package/llamaBins/linux-arm64-18.node +0 -0
  89. package/llamaBins/linux-arm64-19.node +0 -0
  90. package/llamaBins/linux-arm64-20.node +0 -0
  91. package/llamaBins/linux-armv7l-16.node +0 -0
  92. package/llamaBins/linux-armv7l-17.node +0 -0
  93. package/llamaBins/linux-armv7l-18.node +0 -0
  94. package/llamaBins/linux-armv7l-19.node +0 -0
  95. package/llamaBins/linux-armv7l-20.node +0 -0
  96. package/llamaBins/linux-ppc64le-16.node +0 -0
  97. package/llamaBins/linux-ppc64le-17.node +0 -0
  98. package/llamaBins/linux-ppc64le-18.node +0 -0
  99. package/llamaBins/linux-ppc64le-19.node +0 -0
  100. package/llamaBins/linux-ppc64le-20.node +0 -0
  101. package/llamaBins/linux-x64-16.node +0 -0
  102. package/llamaBins/linux-x64-17.node +0 -0
  103. package/llamaBins/linux-x64-18.node +0 -0
  104. package/llamaBins/linux-x64-19.node +0 -0
  105. package/llamaBins/linux-x64-20.node +0 -0
  106. package/llamaBins/mac-arm64-16.node +0 -0
  107. package/llamaBins/mac-arm64-17.node +0 -0
  108. package/llamaBins/mac-arm64-18.node +0 -0
  109. package/llamaBins/mac-arm64-19.node +0 -0
  110. package/llamaBins/mac-arm64-20.node +0 -0
  111. package/llamaBins/mac-x64-16.node +0 -0
  112. package/llamaBins/mac-x64-17.node +0 -0
  113. package/llamaBins/mac-x64-18.node +0 -0
  114. package/llamaBins/mac-x64-19.node +0 -0
  115. package/llamaBins/mac-x64-20.node +0 -0
  116. package/llamaBins/win-x64-16.node +0 -0
  117. package/llamaBins/win-x64-17.node +0 -0
  118. package/llamaBins/win-x64-18.node +0 -0
  119. package/llamaBins/win-x64-19.node +0 -0
  120. package/llamaBins/win-x64-20.node +0 -0
  121. package/package.json +12 -6
@@ -1,4 +1,4 @@
1
- import { type LLAMAModel, type LLAMAContext } from "../utils/getBin.js";
1
+ import { type LLAMAModel, type LLAMAContext, type LLAMAGrammar } from "../utils/getBin.js";
2
2
  export declare const llamaCppNode: import("../utils/getBin.js").LlamaCppNodeModule;
3
- declare const LLAMAModel: LLAMAModel, LLAMAContext: LLAMAContext;
4
- export { LLAMAModel, LLAMAContext };
3
+ declare const LLAMAModel: LLAMAModel, LLAMAContext: LLAMAContext, LLAMAGrammar: LLAMAGrammar;
4
+ export { LLAMAModel, LLAMAContext, LLAMAGrammar };
@@ -1,5 +1,5 @@
1
1
  import { loadBin } from "../utils/getBin.js";
2
2
  export const llamaCppNode = await loadBin();
3
- const { LLAMAModel, LLAMAContext } = llamaCppNode;
4
- export { LLAMAModel, LLAMAContext };
3
+ const { LLAMAModel, LLAMAContext, LLAMAGrammar } = llamaCppNode;
4
+ export { LLAMAModel, LLAMAContext, LLAMAGrammar };
5
5
  //# sourceMappingURL=LlamaBins.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaBins.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaBins.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,OAAO,EAAqC,MAAM,oBAAoB,CAAC;AAE/E,MAAM,CAAC,MAAM,YAAY,GAAG,MAAM,OAAO,EAAE,CAAC;AAC5C,MAAM,EAAC,UAAU,EAAE,YAAY,EAAC,GAAG,YAAY,CAAC;AAEhD,OAAO,EAAC,UAAU,EAAE,YAAY,EAAC,CAAC"}
1
+ {"version":3,"file":"LlamaBins.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaBins.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,OAAO,EAAwD,MAAM,oBAAoB,CAAC;AAElG,MAAM,CAAC,MAAM,YAAY,GAAG,MAAM,OAAO,EAAE,CAAC;AAC5C,MAAM,EAAC,UAAU,EAAE,YAAY,EAAE,YAAY,EAAC,GAAG,YAAY,CAAC;AAE9D,OAAO,EAAC,UAAU,EAAE,YAAY,EAAE,YAAY,EAAC,CAAC"}
@@ -1,5 +1,6 @@
1
1
  /// <reference types="node" />
2
2
  import { ChatPromptWrapper } from "../ChatPromptWrapper.js";
3
+ import { Token } from "../types.js";
3
4
  import { LlamaContext } from "./LlamaContext.js";
4
5
  export declare class LlamaChatSession {
5
6
  private readonly _systemPrompt;
@@ -7,18 +8,22 @@ export declare class LlamaChatSession {
7
8
  private readonly _promptWrapper;
8
9
  private _promptIndex;
9
10
  private _initialized;
11
+ private _lastStopString;
12
+ private _lastStopStringSuffix;
10
13
  private readonly _ctx;
11
14
  constructor({ context, printLLamaSystemInfo, promptWrapper, systemPrompt }: {
12
15
  context: LlamaContext;
13
16
  printLLamaSystemInfo?: boolean;
14
- promptWrapper?: ChatPromptWrapper;
17
+ promptWrapper?: ChatPromptWrapper | "auto";
15
18
  systemPrompt?: string;
16
19
  });
17
20
  get initialized(): boolean;
18
21
  get context(): LlamaContext;
19
22
  init(): Promise<void>;
20
- prompt(prompt: string, onToken?: (tokens: number[]) => void, { signal }?: {
23
+ prompt(prompt: string, { onToken, signal, maxTokens }?: {
24
+ onToken?(tokens: Token[]): void;
21
25
  signal?: AbortSignal;
26
+ maxTokens?: number;
22
27
  }): Promise<string>;
23
28
  private _evalTokens;
24
29
  private _checkStopString;
@@ -2,6 +2,7 @@ import { defaultChatSystemPrompt } from "../config.js";
2
2
  import { withLock } from "../utils/withLock.js";
3
3
  import { AbortError } from "../AbortError.js";
4
4
  import { GeneralChatPromptWrapper } from "../chatWrappers/GeneralChatPromptWrapper.js";
5
+ import { getChatWrapperByBos } from "../chatWrappers/createChatWrapperByBos.js";
5
6
  import { LlamaModel } from "./LlamaModel.js";
6
7
  const UNKNOWN_UNICODE_CHAR = "\ufffd";
7
8
  export class LlamaChatSession {
@@ -10,12 +11,22 @@ export class LlamaChatSession {
10
11
  _promptWrapper;
11
12
  _promptIndex = 0;
12
13
  _initialized = false;
14
+ _lastStopString = null;
15
+ _lastStopStringSuffix = null;
13
16
  _ctx;
14
17
  constructor({ context, printLLamaSystemInfo = false, promptWrapper = new GeneralChatPromptWrapper(), systemPrompt = defaultChatSystemPrompt }) {
15
18
  this._ctx = context;
16
19
  this._printLLamaSystemInfo = printLLamaSystemInfo;
17
- this._promptWrapper = promptWrapper;
18
20
  this._systemPrompt = systemPrompt;
21
+ if (promptWrapper === "auto") {
22
+ const chatWrapper = getChatWrapperByBos(context.getBosString());
23
+ if (chatWrapper != null)
24
+ this._promptWrapper = new chatWrapper();
25
+ else
26
+ this._promptWrapper = new GeneralChatPromptWrapper();
27
+ }
28
+ else
29
+ this._promptWrapper = promptWrapper;
19
30
  }
20
31
  get initialized() {
21
32
  return this._initialized;
@@ -32,17 +43,30 @@ export class LlamaChatSession {
32
43
  this._initialized = true;
33
44
  });
34
45
  }
35
- async prompt(prompt, onToken, { signal } = {}) {
46
+ async prompt(prompt, { onToken, signal, maxTokens } = {}) {
36
47
  if (!this.initialized)
37
48
  await this.init();
38
49
  return await withLock(this, "prompt", async () => {
39
- const promptText = this._promptWrapper.wrapPrompt(prompt, { systemPrompt: this._systemPrompt, promptIndex: this._promptIndex });
50
+ const promptText = this._promptWrapper.wrapPrompt(prompt, {
51
+ systemPrompt: this._systemPrompt,
52
+ promptIndex: this._promptIndex,
53
+ lastStopString: this._lastStopString,
54
+ lastStopStringSuffix: this._promptIndex == 0
55
+ ? (this._ctx.prependBos
56
+ ? this._ctx.getBosString()
57
+ : null)
58
+ : this._lastStopStringSuffix
59
+ });
40
60
  this._promptIndex++;
41
- return await this._evalTokens(this._ctx.encode(promptText), onToken, { signal });
61
+ this._lastStopString = null;
62
+ this._lastStopStringSuffix = null;
63
+ const { text, stopString, stopStringSuffix } = await this._evalTokens(this._ctx.encode(promptText), { onToken, signal, maxTokens });
64
+ this._lastStopString = stopString;
65
+ this._lastStopStringSuffix = stopStringSuffix;
66
+ return text;
42
67
  });
43
68
  }
44
- async _evalTokens(tokens, onToken, { signal } = {}) {
45
- const decodeTokens = (tokens) => this._ctx.decode(Uint32Array.from(tokens));
69
+ async _evalTokens(tokens, { onToken, signal, maxTokens } = {}) {
46
70
  const stopStrings = this._promptWrapper.getStopStrings();
47
71
  const stopStringIndexes = Array(stopStrings.length).fill(0);
48
72
  const skippedChunksQueue = [];
@@ -50,10 +74,14 @@ export class LlamaChatSession {
50
74
  for await (const chunk of this._ctx.evaluate(tokens)) {
51
75
  if (signal?.aborted)
52
76
  throw new AbortError();
53
- const tokenStr = decodeTokens([chunk]);
54
- const { shouldReturn, skipTokenEvent } = this._checkStopString(tokenStr, stopStringIndexes);
77
+ const tokenStr = this._ctx.decode(Uint32Array.from([chunk]));
78
+ const { shouldReturn, skipTokenEvent, stopString, stopStringSuffix } = this._checkStopString(tokenStr, stopStringIndexes);
55
79
  if (shouldReturn)
56
- return decodeTokens(res);
80
+ return {
81
+ text: this._ctx.decode(Uint32Array.from(res)),
82
+ stopString,
83
+ stopStringSuffix
84
+ };
57
85
  // if the token is unknown, it means it's not complete character
58
86
  if (tokenStr === UNKNOWN_UNICODE_CHAR || skipTokenEvent) {
59
87
  skippedChunksQueue.push(chunk);
@@ -66,8 +94,14 @@ export class LlamaChatSession {
66
94
  }
67
95
  res.push(chunk);
68
96
  onToken?.([chunk]);
97
+ if (maxTokens != null && maxTokens > 0 && res.length >= maxTokens)
98
+ break;
69
99
  }
70
- return decodeTokens(res);
100
+ return {
101
+ text: this._ctx.decode(Uint32Array.from(res)),
102
+ stopString: null,
103
+ stopStringSuffix: null
104
+ };
71
105
  }
72
106
  _checkStopString(tokenStr, stopStringIndexes) {
73
107
  const stopStrings = this._promptWrapper.getStopStrings();
@@ -87,7 +121,13 @@ export class LlamaChatSession {
87
121
  }
88
122
  }
89
123
  if (stopStringIndexes[stopStringIndex] === stopString.length) {
90
- return { shouldReturn: true };
124
+ return {
125
+ shouldReturn: true,
126
+ stopString,
127
+ stopStringSuffix: tokenStr.length === stopString.length
128
+ ? null
129
+ : tokenStr.slice(stopString.length)
130
+ };
91
131
  }
92
132
  skipTokenEvent ||= localShouldSkipTokenEvent;
93
133
  }
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaChatSession.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaChatSession.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,uBAAuB,EAAC,MAAM,cAAc,CAAC;AACrD,OAAO,EAAC,QAAQ,EAAC,MAAM,sBAAsB,CAAC;AAE9C,OAAO,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAC,wBAAwB,EAAC,MAAM,6CAA6C,CAAC;AACrF,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAG3C,MAAM,oBAAoB,GAAG,QAAQ,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACR,aAAa,CAAS;IACtB,qBAAqB,CAAU;IAC/B,cAAc,CAAoB;IAC3C,YAAY,GAAW,CAAC,CAAC;IACzB,YAAY,GAAY,KAAK,CAAC;IACrB,IAAI,CAAe;IAEpC,YAAmB,EACf,OAAO,EACP,oBAAoB,GAAG,KAAK,EAC5B,aAAa,GAAG,IAAI,wBAAwB,EAAE,EAC9C,YAAY,GAAG,uBAAuB,EAMzC;QACG,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC;QACpB,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;QAEpC,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;IACtC,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,IAAI,CAAC;IACrB,CAAC;IAEM,KAAK,CAAC,IAAI;QACb,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,IAAI,EAAE;YACpC,IAAI,IAAI,CAAC,YAAY;gBACjB,OAAO;YAEX,IAAI,IAAI,CAAC,qBAAqB;gBAC1B,OAAO,CAAC,GAAG,CAAC,mBAAmB,EAAE,UAAU,CAAC,UAAU,CAAC,CAAC;YAE5D,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC7B,CAAC,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,OAAoC,EAAE,EAAC,MAAM,KAA8B,EAAE;QAC7G,IAAI,CAAC,IAAI,CAAC,WAAW;YACjB,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAEtB,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,IAAI,EAAE;YAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,MAAM,EAAE,EAAC,YAAY,EAAE,IAAI,CAAC,aAAa,EAAE,WAAW,EAAE,IAAI,CAAC,YAAY,EAAC,CAAC,CAAC;YAC9H,IAAI,CAAC,YAAY,EAAE,CAAC;YAEpB,OAAO,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,OAAO,EAAE,EAAC,MAAM,EAAC,CAAC,CAAC;QACnF,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,MAAmB,EAAE,OAAoC,EAAE,EAAC,MAAM,KAA8B,EAAE;QACxH,MAAM,YAAY,GAAG,CAAC,MAAgB,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;QAEtF,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,CAAC;QACzD,MAAM,iBAAiB,GAAG,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;QACxC,MAAM,GAAG,GAAa,EAAE,CAAC;QAGzB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;YAClD,IAAI,MAAM,EAAE,OAAO;gBACf,MAAM,IAAI,UAAU,EAAE,CAAC;YAE3B,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YACvC,MAAM,EAAC,YAAY,EAAE,cAAc,EAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;YAE1F,IAAI,YAAY;gBACZ,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC;YAE7B,gEAAgE;YAChE,IAAI,QAAQ,KAAK,oBAAoB,IAAI,cAAc,EAAE;gBACrD,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAC/B,SAAS;aACZ;YAED,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC/B,GAAG,CAAC,IAAI,CAAC,GAAG,kBAAkB,CAAC,CAAC;gBAChC,OAAO,EAAE,CAAC,kBAAkB,CAAC,CAAC;gBAC9B,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC;aACjC;YAED,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChB,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;SACtB;QAED,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAEO,gBAAgB,CAAC,QAAgB,EAAE,iBAA2B;QAClE,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,CAAC;QACzD,IAAI,cAAc,GAAG,KAAK,CAAC;QAE3B,KAAK,IAAI,eAAe,GAAG,CAAC,EAAE,eAAe,GAAG,WAAW,CAAC,MAAM,EAAE,eAAe,EAAE,EAAE;YACnF,MAAM,UAAU,GAAG,WAAW,CAAC,eAAe,CAAC,CAAC;YAEhD,IAAI,yBAAyB,GAAG,KAAK,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClG,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,EAAE;oBAChE,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC;oBACrC,yBAAyB,GAAG,IAAI,CAAC;iBACpC;qBAAM;oBACH,iBAAiB,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;oBACvC,yBAAyB,GAAG,KAAK,CAAC;oBAClC,MAAM;iBACT;aACJ;YAED,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE;gBAC1D,OAAO,EAAC,YAAY,EAAE,IAAI,EAAC,CAAC;aAC/B;YAED,cAAc,KAAK,yBAAyB,CAAC;SAChD;QAED,OAAO,EAAC,cAAc,EAAC,CAAC;IAC5B,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaChatSession.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaChatSession.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,uBAAuB,EAAC,MAAM,cAAc,CAAC;AACrD,OAAO,EAAC,QAAQ,EAAC,MAAM,sBAAsB,CAAC;AAE9C,OAAO,EAAC,UAAU,EAAC,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAC,wBAAwB,EAAC,MAAM,6CAA6C,CAAC;AACrF,OAAO,EAAC,mBAAmB,EAAC,MAAM,2CAA2C,CAAC;AAE9E,OAAO,EAAC,UAAU,EAAC,MAAM,iBAAiB,CAAC;AAG3C,MAAM,oBAAoB,GAAG,QAAQ,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACR,aAAa,CAAS;IACtB,qBAAqB,CAAU;IAC/B,cAAc,CAAoB;IAC3C,YAAY,GAAW,CAAC,CAAC;IACzB,YAAY,GAAY,KAAK,CAAC;IAC9B,eAAe,GAAkB,IAAI,CAAC;IACtC,qBAAqB,GAAkB,IAAI,CAAC;IACnC,IAAI,CAAe;IAEpC,YAAmB,EACf,OAAO,EACP,oBAAoB,GAAG,KAAK,EAC5B,aAAa,GAAG,IAAI,wBAAwB,EAAE,EAC9C,YAAY,GAAG,uBAAuB,EAMzC;QACG,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC;QACpB,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAElC,IAAI,aAAa,KAAK,MAAM,EAAE;YAC1B,MAAM,WAAW,GAAG,mBAAmB,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;YAEhE,IAAI,WAAW,IAAI,IAAI;gBACnB,IAAI,CAAC,cAAc,GAAG,IAAI,WAAW,EAAE,CAAC;;gBAExC,IAAI,CAAC,cAAc,GAAG,IAAI,wBAAwB,EAAE,CAAC;SAC5D;;YACG,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;IAC5C,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,IAAI,CAAC;IACrB,CAAC;IAEM,KAAK,CAAC,IAAI;QACb,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,IAAI,EAAE;YACpC,IAAI,IAAI,CAAC,YAAY;gBACjB,OAAO;YAEX,IAAI,IAAI,CAAC,qBAAqB;gBAC1B,OAAO,CAAC,GAAG,CAAC,mBAAmB,EAAE,UAAU,CAAC,UAAU,CAAC,CAAC;YAE5D,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC7B,CAAC,CAAC,CAAC;IACP,CAAC;IAEM,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,EAChC,OAAO,EAAE,MAAM,EAAE,SAAS,KACqD,EAAE;QACjF,IAAI,CAAC,IAAI,CAAC,WAAW;YACjB,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;QAEtB,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,IAAI,EAAE;YAC7C,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,CAAC,MAAM,EAAE;gBACtD,YAAY,EAAE,IAAI,CAAC,aAAa;gBAChC,WAAW,EAAE,IAAI,CAAC,YAAY;gBAC9B,cAAc,EAAE,IAAI,CAAC,eAAe;gBACpC,oBAAoB,EAAE,IAAI,CAAC,YAAY,IAAI,CAAC;oBACxC,CAAC,CAAC,CACE,IAAI,CAAC,IAAI,CAAC,UAAU;wBAChB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE;wBAC1B,CAAC,CAAC,IAAI,CACb;oBACD,CAAC,CAAC,IAAI,CAAC,qBAAqB;aACnC,CAAC,CAAC;YACH,IAAI,CAAC,YAAY,EAAE,CAAC;YACpB,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;YAC5B,IAAI,CAAC,qBAAqB,GAAG,IAAI,CAAC;YAElC,MAAM,EAAC,IAAI,EAAE,UAAU,EAAE,gBAAgB,EAAC,GACtC,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,EAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAC,CAAC,CAAC;YACvF,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC;YAClC,IAAI,CAAC,qBAAqB,GAAG,gBAAgB,CAAC;YAE9C,OAAO,IAAI,CAAC;QAChB,CAAC,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,MAAmB,EAAE,EAC3C,OAAO,EAAE,MAAM,EAAE,SAAS,KACqD,EAAE;QACjF,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,CAAC;QACzD,MAAM,iBAAiB,GAAG,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,kBAAkB,GAAY,EAAE,CAAC;QACvC,MAAM,GAAG,GAAY,EAAE,CAAC;QAExB,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE;YAClD,IAAI,MAAM,EAAE,OAAO;gBACf,MAAM,IAAI,UAAU,EAAE,CAAC;YAE3B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,EAAC,YAAY,EAAE,cAAc,EAAE,UAAU,EAAE,gBAAgB,EAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,iBAAiB,CAAC,CAAC;YAExH,IAAI,YAAY;gBACZ,OAAO;oBACH,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBAC7C,UAAU;oBACV,gBAAgB;iBACnB,CAAC;YAEN,gEAAgE;YAChE,IAAI,QAAQ,KAAK,oBAAoB,IAAI,cAAc,EAAE;gBACrD,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAC/B,SAAS;aACZ;YAED,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC/B,GAAG,CAAC,IAAI,CAAC,GAAG,kBAAkB,CAAC,CAAC;gBAChC,OAAO,EAAE,CAAC,kBAAkB,CAAC,CAAC;gBAC9B,kBAAkB,CAAC,MAAM,GAAG,CAAC,CAAC;aACjC;YAED,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChB,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YAEnB,IAAI,SAAS,IAAI,IAAI,IAAI,SAAS,GAAG,CAAC,IAAI,GAAG,CAAC,MAAM,IAAI,SAAS;gBAC7D,MAAM;SACb;QAED,OAAO;YACH,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC7C,UAAU,EAAE,IAAI;YAChB,gBAAgB,EAAE,IAAI;SACzB,CAAC;IACN,CAAC;IAEO,gBAAgB,CAAC,QAAgB,EAAE,iBAA2B;QAClE,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,cAAc,EAAE,CAAC;QACzD,IAAI,cAAc,GAAG,KAAK,CAAC;QAE3B,KAAK,IAAI,eAAe,GAAG,CAAC,EAAE,eAAe,GAAG,WAAW,CAAC,MAAM,EAAE,eAAe,EAAE,EAAE;YACnF,MAAM,UAAU,GAAG,WAAW,CAAC,eAAe,CAAC,CAAC;YAEhD,IAAI,yBAAyB,GAAG,KAAK,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClG,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,EAAE;oBAChE,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC;oBACrC,yBAAyB,GAAG,IAAI,CAAC;iBACpC;qBAAM;oBACH,iBAAiB,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;oBACvC,yBAAyB,GAAG,KAAK,CAAC;oBAClC,MAAM;iBACT;aACJ;YAED,IAAI,iBAAiB,CAAC,eAAe,CAAC,KAAK,UAAU,CAAC,MAAM,EAAE;gBAC1D,OAAO;oBACH,YAAY,EAAE,IAAI;oBAClB,UAAU;oBACV,gBAAgB,EAAE,QAAQ,CAAC,MAAM,KAAK,UAAU,CAAC,MAAM;wBACnD,CAAC,CAAC,IAAI;wBACN,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC;iBAC1C,CAAC;aACL;YAED,cAAc,KAAK,yBAAyB,CAAC;SAChD;QAED,OAAO,EAAC,cAAc,EAAC,CAAC;IAC5B,CAAC;CACJ"}
@@ -1,12 +1,41 @@
1
+ import { Token } from "../types.js";
1
2
  import { LlamaModel } from "./LlamaModel.js";
3
+ import { LlamaGrammar } from "./LlamaGrammar.js";
2
4
  export declare class LlamaContext {
3
5
  private readonly _ctx;
4
6
  private _prependBos;
5
- constructor({ model, prependBos }: {
7
+ constructor({ model, grammar, prependBos }: {
6
8
  model: LlamaModel;
9
+ grammar?: LlamaGrammar;
7
10
  prependBos?: boolean;
8
11
  });
9
12
  encode(text: string): Uint32Array;
10
13
  decode(tokens: Uint32Array): string;
11
- evaluate(tokens: Uint32Array, getRestrictions?: () => Uint32Array): AsyncGenerator<number, void, unknown>;
14
+ get prependBos(): boolean;
15
+ /**
16
+ * @returns {Token | null} The BOS (Beginning Of Sequence) token.
17
+ */
18
+ getBosToken(): Token | null;
19
+ /**
20
+ * @returns {Token | null} The EOS (End Of Sequence) token.
21
+ */
22
+ getEosToken(): Token | null;
23
+ /**
24
+ * @returns {Token | null} The NL (New Line) token.
25
+ */
26
+ getNlToken(): Token | null;
27
+ /**
28
+ * @returns {string | null} The BOS (Beginning Of Sequence) token as a string.
29
+ */
30
+ getBosString(): string | null;
31
+ /**
32
+ * @returns {string | null} The EOS (End Of Sequence) token as a string.
33
+ */
34
+ getEosString(): string | null;
35
+ /**
36
+ * @returns {string | null} The NL (New Line) token as a string.
37
+ */
38
+ getNlString(): string | null;
39
+ getContextSize(): number;
40
+ evaluate(tokens: Uint32Array): AsyncGenerator<Token, void>;
12
41
  }
@@ -1,31 +1,98 @@
1
- import { LLAMAContext, llamaCppNode } from "./LlamaBins.js";
1
+ import { removeNullFields } from "../utils/removeNullFields.js";
2
+ import { LLAMAContext } from "./LlamaBins.js";
2
3
  export class LlamaContext {
3
4
  _ctx;
4
5
  _prependBos;
5
- constructor({ model, prependBos = true }) {
6
- this._ctx = new LLAMAContext(model._model);
6
+ constructor({ model, grammar, prependBos = true }) {
7
+ this._ctx = new LLAMAContext(model._model, removeNullFields({
8
+ grammar: grammar?._grammar
9
+ }));
7
10
  this._prependBos = prependBos;
8
11
  }
9
12
  encode(text) {
13
+ if (text === "")
14
+ return new Uint32Array();
10
15
  return this._ctx.encode(text);
11
16
  }
12
17
  decode(tokens) {
18
+ if (tokens.length === 0)
19
+ return "";
13
20
  return this._ctx.decode(tokens);
14
21
  }
15
- async *evaluate(tokens, getRestrictions) {
22
+ get prependBos() {
23
+ return this._prependBos;
24
+ }
25
+ /**
26
+ * @returns {Token | null} The BOS (Beginning Of Sequence) token.
27
+ */
28
+ getBosToken() {
29
+ const bosToken = this._ctx.tokenBos();
30
+ if (bosToken === -1)
31
+ return null;
32
+ return bosToken;
33
+ }
34
+ /**
35
+ * @returns {Token | null} The EOS (End Of Sequence) token.
36
+ */
37
+ getEosToken() {
38
+ const eosToken = this._ctx.tokenEos();
39
+ if (eosToken === -1)
40
+ return null;
41
+ return eosToken;
42
+ }
43
+ /**
44
+ * @returns {Token | null} The NL (New Line) token.
45
+ */
46
+ getNlToken() {
47
+ const nlToken = this._ctx.tokenNl();
48
+ if (nlToken === -1)
49
+ return null;
50
+ return nlToken;
51
+ }
52
+ /**
53
+ * @returns {string | null} The BOS (Beginning Of Sequence) token as a string.
54
+ */
55
+ getBosString() {
56
+ const bosToken = this.getBosToken();
57
+ if (bosToken == null)
58
+ return null;
59
+ return this._ctx.getTokenString(bosToken);
60
+ }
61
+ /**
62
+ * @returns {string | null} The EOS (End Of Sequence) token as a string.
63
+ */
64
+ getEosString() {
65
+ const eosToken = this.getEosToken();
66
+ if (eosToken == null)
67
+ return null;
68
+ return this._ctx.getTokenString(eosToken);
69
+ }
70
+ /**
71
+ * @returns {string | null} The NL (New Line) token as a string.
72
+ */
73
+ getNlString() {
74
+ const nlToken = this.getNlToken();
75
+ if (nlToken == null)
76
+ return null;
77
+ return this._ctx.getTokenString(nlToken);
78
+ }
79
+ getContextSize() {
80
+ return this._ctx.getContextSize();
81
+ }
82
+ async *evaluate(tokens) {
16
83
  let evalTokens = tokens;
17
84
  if (this._prependBos) {
18
85
  const tokenArray = Array.from(tokens);
19
- tokenArray.unshift(llamaCppNode.tokenBos());
86
+ tokenArray.unshift(this._ctx.tokenBos());
20
87
  evalTokens = Uint32Array.from(tokenArray);
21
88
  this._prependBos = false;
22
89
  }
23
90
  // eslint-disable-next-line no-constant-condition
24
91
  while (true) {
25
92
  // Evaluate to get the next token.
26
- const nextToken = await this._ctx.eval(evalTokens, getRestrictions?.());
93
+ const nextToken = await this._ctx.eval(evalTokens);
27
94
  // the assistant finished answering
28
- if (nextToken === llamaCppNode.tokenEos())
95
+ if (nextToken === this._ctx.tokenEos())
29
96
  break;
30
97
  yield nextToken;
31
98
  // Create tokens for the next eval.
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAE,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAG1D,MAAM,OAAO,YAAY;IACJ,IAAI,CAAe;IAC5B,WAAW,CAAU;IAE7B,YAAmB,EAAC,KAAK,EAAE,UAAU,GAAG,IAAI,EAA4C;QACpF,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3C,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAAmB;QAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAEM,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB,EAAE,eAAmC;QAC3E,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,WAAW,EAAE;YAClB,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACtC,UAAU,CAAC,OAAO,CAAC,YAAY,CAAC,QAAQ,EAAE,CAAC,CAAC;YAE5C,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;SAC5B;QAED,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,eAAe,EAAE,EAAE,CAAC,CAAC;YAExE,mCAAmC;YACnC,IAAI,SAAS,KAAK,YAAY,CAAC,QAAQ,EAAE;gBACrC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CAEJ"}
1
+ {"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAE9D,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAI5C,MAAM,OAAO,YAAY;IACJ,IAAI,CAAe;IAC5B,WAAW,CAAU;IAE7B,YAAmB,EAAC,KAAK,EAAE,OAAO,EAAE,UAAU,GAAG,IAAI,EAAoE;QACrH,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,gBAAgB,CAAC;YACxD,OAAO,EAAE,OAAO,EAAE,QAAQ;SAC7B,CAAC,CAAC,CAAC;QACJ,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,IAAI,IAAI,KAAK,EAAE;YACX,OAAO,IAAI,WAAW,EAAE,CAAC;QAE7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAAmB;QAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YACnB,OAAO,EAAE,CAAC;QAEd,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,UAAU;QACb,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAEpC,IAAI,OAAO,KAAK,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QAEhB,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAElC,IAAI,OAAO,IAAI,IAAI;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAEM,cAAc;QACjB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;IACtC,CAAC;IAEM,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB;QACtC,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,WAAW,EAAE;YAClB,MAAM,UAAU,GAAY,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC/C,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEzC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;SAC5B;QAED,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAU,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAE1D,mCAAmC;YACnC,IAAI,SAAS,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAClC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CAEJ"}
@@ -0,0 +1,14 @@
1
+ export declare class LlamaGrammar {
2
+ /**
3
+ * GBNF files are supported.
4
+ * More info here: https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md
5
+ * @param {object} options
6
+ * @param {string} options.grammar - GBNF grammar
7
+ * @param {boolean} [options.printGrammar] - print the grammar to stdout
8
+ */
9
+ constructor({ grammar, printGrammar }: {
10
+ grammar: string;
11
+ printGrammar?: boolean;
12
+ });
13
+ static getFor(type: "json" | "list" | "arithmetic" | "japanese" | "chess"): Promise<LlamaGrammar>;
14
+ }
@@ -0,0 +1,30 @@
1
+ import path from "path";
2
+ import fs from "fs-extra";
3
+ import { getGrammarsFolder } from "../utils/getGrammarsFolder.js";
4
+ import { LLAMAGrammar } from "./LlamaBins.js";
5
+ export class LlamaGrammar {
6
+ /** @internal */
7
+ _grammar;
8
+ /**
9
+ * GBNF files are supported.
10
+ * More info here: https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md
11
+ * @param {object} options
12
+ * @param {string} options.grammar - GBNF grammar
13
+ * @param {boolean} [options.printGrammar] - print the grammar to stdout
14
+ */
15
+ constructor({ grammar, printGrammar = false }) {
16
+ this._grammar = new LLAMAGrammar(grammar, {
17
+ printGrammar
18
+ });
19
+ }
20
+ static async getFor(type) {
21
+ const grammarsFolder = await getGrammarsFolder();
22
+ const grammarFile = path.join(grammarsFolder, type + ".gbnf");
23
+ if (await fs.exists(grammarFile)) {
24
+ const grammar = await fs.readFile(grammarFile, "utf8");
25
+ return new LlamaGrammar({ grammar });
26
+ }
27
+ throw new Error(`Grammar file for type "${type}" was not found in "${grammarsFolder}"`);
28
+ }
29
+ }
30
+ //# sourceMappingURL=LlamaGrammar.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAG5C,MAAM,OAAO,YAAY;IACrB,gBAAgB;IACA,QAAQ,CAAe;IAEvC;;;;;;OAMG;IACH,YAAmB,EAAC,OAAO,EAAE,YAAY,GAAG,KAAK,EAA4C;QACzF,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,CAAC,OAAO,EAAE;YACtC,YAAY;SACf,CAAC,CAAC;IACP,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAA2D;QAClF,MAAM,cAAc,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE;YAC9B,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC,EAAC,OAAO,EAAC,CAAC,CAAC;SACtC;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
@@ -9,6 +9,26 @@ export declare class LlamaModel {
9
9
  * @param {number} [options.batchSize] - prompt processing batch size
10
10
  * @param {number} [options.gpuLayers] - number of layers to store in VRAM
11
11
  * @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
12
+ * @param {number} [options.temperature] - Temperature is a hyperparameter that controls the randomness of the generated text.
13
+ * It affects the probability distribution of the model's output tokens.
14
+ * A higher temperature (e.g., 1.5) makes the output more random and creative,
15
+ * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
16
+ * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
17
+ * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
18
+ *
19
+ * Set to `0` to disable.
20
+ * @param {number} [options.topK] - Limits the model to consider only the K most likely next tokens for sampling at each step of
21
+ * sequence generation.
22
+ * An integer number between `1` and the size of the vocabulary.
23
+ * Set to `0` to disable (which uses the full vocabulary).
24
+ *
25
+ * Only relevant when `temperature` is set to a value greater than 0.
26
+ * @param {number} [options.topP] - Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
27
+ * and samples the next token only from this set.
28
+ * A float number between `0` and `1`.
29
+ * Set to `1` to disable.
30
+ *
31
+ * Only relevant when `temperature` is set to a value greater than `0`.
12
32
  * @param {boolean} [options.f16Kv] - use fp16 for KV cache
13
33
  * @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
14
34
  * @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
@@ -16,7 +36,7 @@ export declare class LlamaModel {
16
36
  * @param {boolean} [options.useMlock] - force system to keep model in RAM
17
37
  * @param {boolean} [options.embedding] - embedding mode only
18
38
  */
19
- constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
39
+ constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, lowVram, temperature, topK, topP, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }: {
20
40
  /** path to the model on the filesystem */
21
41
  modelPath: string;
22
42
  /** If null, a random seed will be used */
@@ -29,6 +49,34 @@ export declare class LlamaModel {
29
49
  gpuLayers?: number;
30
50
  /** if true, reduce VRAM usage at the cost of performance */
31
51
  lowVram?: boolean;
52
+ /**
53
+ * Temperature is a hyperparameter that controls the randomness of the generated text.
54
+ * It affects the probability distribution of the model's output tokens.
55
+ * A higher temperature (e.g., 1.5) makes the output more random and creative,
56
+ * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
57
+ * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
58
+ * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
59
+ *
60
+ * Set to `0` to disable.
61
+ */
62
+ temperature?: number;
63
+ /**
64
+ * Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
65
+ * An integer number between `1` and the size of the vocabulary.
66
+ * Set to `0` to disable (which uses the full vocabulary).
67
+ *
68
+ * Only relevant when `temperature` is set to a value greater than 0.
69
+ * */
70
+ topK?: number;
71
+ /**
72
+ * Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
73
+ * and samples the next token only from this set.
74
+ * A float number between `0` and `1`.
75
+ * Set to `1` to disable.
76
+ *
77
+ * Only relevant when `temperature` is set to a value greater than `0`.
78
+ * */
79
+ topP?: number;
32
80
  /** use fp16 for KV cache */
33
81
  f16Kv?: boolean;
34
82
  /** the llama_eval() call computes all logits, not just the last one */
@@ -1,3 +1,4 @@
1
+ import { removeNullFields } from "../utils/removeNullFields.js";
1
2
  import { llamaCppNode, LLAMAModel } from "./LlamaBins.js";
2
3
  export class LlamaModel {
3
4
  /** @internal */
@@ -12,6 +13,26 @@ export class LlamaModel {
12
13
  * @param {number} [options.batchSize] - prompt processing batch size
13
14
  * @param {number} [options.gpuLayers] - number of layers to store in VRAM
14
15
  * @param {boolean} [options.lowVram] - if true, reduce VRAM usage at the cost of performance
16
+ * @param {number} [options.temperature] - Temperature is a hyperparameter that controls the randomness of the generated text.
17
+ * It affects the probability distribution of the model's output tokens.
18
+ * A higher temperature (e.g., 1.5) makes the output more random and creative,
19
+ * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
20
+ * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
21
+ * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
22
+ *
23
+ * Set to `0` to disable.
24
+ * @param {number} [options.topK] - Limits the model to consider only the K most likely next tokens for sampling at each step of
25
+ * sequence generation.
26
+ * An integer number between `1` and the size of the vocabulary.
27
+ * Set to `0` to disable (which uses the full vocabulary).
28
+ *
29
+ * Only relevant when `temperature` is set to a value greater than 0.
30
+ * @param {number} [options.topP] - Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
31
+ * and samples the next token only from this set.
32
+ * A float number between `0` and `1`.
33
+ * Set to `1` to disable.
34
+ *
35
+ * Only relevant when `temperature` is set to a value greater than `0`.
15
36
  * @param {boolean} [options.f16Kv] - use fp16 for KV cache
16
37
  * @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
17
38
  * @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
@@ -19,13 +40,16 @@ export class LlamaModel {
19
40
  * @param {boolean} [options.useMlock] - force system to keep model in RAM
20
41
  * @param {boolean} [options.embedding] - embedding mode only
21
42
  */
22
- constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
43
+ constructor({ modelPath, seed = null, contextSize = 1024 * 4, batchSize, gpuLayers, lowVram, temperature = 0, topK = 40, topP = 0.95, f16Kv, logitsAll, vocabOnly, useMmap, useMlock, embedding }) {
23
44
  this._model = new LLAMAModel(modelPath, removeNullFields({
24
45
  seed: seed != null ? Math.max(-1, seed) : undefined,
25
46
  contextSize,
26
47
  batchSize,
27
48
  gpuLayers,
28
49
  lowVram,
50
+ temperature,
51
+ topK,
52
+ topP,
29
53
  f16Kv,
30
54
  logitsAll,
31
55
  vocabOnly,
@@ -38,12 +62,4 @@ export class LlamaModel {
38
62
  return llamaCppNode.systemInfo();
39
63
  }
40
64
  }
41
- function removeNullFields(obj) {
42
- const newObj = Object.assign({}, obj);
43
- for (const key in obj) {
44
- if (newObj[key] == null)
45
- delete newObj[key];
46
- }
47
- return newObj;
48
- }
49
65
  //# sourceMappingURL=LlamaModel.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC;;;;;;;;;;;;;;;;OAgBG;IACH,YAAmB,EACf,SAAS,EAAE,IAAI,GAAG,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EACpE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAqCrE;QACG,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,gBAAgB,CAAC;YACrD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,SAAS;YACT,OAAO;YACP,KAAK;YACL,SAAS;YACT,SAAS;YACT,OAAO;YACP,QAAQ;YACR,SAAS;SACZ,CAAC,CAAC,CAAC;IACR,CAAC;IAEM,MAAM,KAAK,UAAU;QACxB,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC;IACrC,CAAC;CACJ;AAED,SAAS,gBAAgB,CAAmB,GAAM;IAC9C,MAAM,MAAM,GAAM,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;IAEzC,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE;QACnB,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI;YACnB,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;KAC1B;IAED,OAAO,MAAM,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"LlamaModel.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaModel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAC9D,OAAO,EAAC,YAAY,EAAE,UAAU,EAAC,MAAM,gBAAgB,CAAC;AAGxD,MAAM,OAAO,UAAU;IACnB,gBAAgB;IACA,MAAM,CAAa;IAEnC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;OAoCG;IACH,YAAmB,EACf,SAAS,EAAE,IAAI,GAAG,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,CAAC,EAAE,SAAS,EAAE,SAAS,EACpE,OAAO,EAAE,WAAW,GAAG,CAAC,EAAE,IAAI,GAAG,EAAE,EAAE,IAAI,GAAG,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAoE9G;QACG,IAAI,CAAC,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,gBAAgB,CAAC;YACrD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,SAAS;YACT,OAAO;YACP,WAAW;YACX,IAAI;YACJ,IAAI;YACJ,KAAK;YACL,SAAS;YACT,SAAS;YACT,OAAO;YACP,QAAQ;YACR,SAAS;SACZ,CAAC,CAAC,CAAC;IACR,CAAC;IAEM,MAAM,KAAK,UAAU;QACxB,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC;IACrC,CAAC;CACJ"}
@@ -0,0 +1 @@
1
+ export type Token = number;
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,6 @@
1
+ type BinariesGithubReleaseFile = {
2
+ release: "latest" | string;
3
+ };
4
+ export declare function getBinariesGithubRelease(): Promise<string>;
5
+ export declare function setBinariesGithubRelease(release: BinariesGithubReleaseFile["release"]): Promise<void>;
6
+ export {};
@@ -0,0 +1,15 @@
1
+ import fs from "fs-extra";
2
+ import { binariesGithubReleasePath } from "../config.js";
3
+ export async function getBinariesGithubRelease() {
4
+ const binariesGithubRelease = await fs.readJson(binariesGithubReleasePath);
5
+ return binariesGithubRelease.release;
6
+ }
7
+ export async function setBinariesGithubRelease(release) {
8
+ const binariesGithubReleaseJson = {
9
+ release: release
10
+ };
11
+ await fs.writeJson(binariesGithubReleasePath, binariesGithubReleaseJson, {
12
+ spaces: 4
13
+ });
14
+ }
15
+ //# sourceMappingURL=binariesGithubRelease.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"binariesGithubRelease.js","sourceRoot":"","sources":["../../src/utils/binariesGithubRelease.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,yBAAyB,EAAC,MAAM,cAAc,CAAC;AAMvD,MAAM,CAAC,KAAK,UAAU,wBAAwB;IAC1C,MAAM,qBAAqB,GAA8B,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC,CAAC;IAEtG,OAAO,qBAAqB,CAAC,OAAO,CAAC;AACzC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,OAA6C;IACxF,MAAM,yBAAyB,GAA8B;QACzD,OAAO,EAAE,OAAO;KACnB,CAAC;IAEF,MAAM,EAAE,CAAC,SAAS,CAAC,yBAAyB,EAAE,yBAAyB,EAAE;QACrE,MAAM,EAAE,CAAC;KACZ,CAAC,CAAC;AACP,CAAC"}
@@ -1,6 +1,8 @@
1
- export declare function compileLlamaCpp({ arch, nodeTarget, setUsedBingFlag }: {
1
+ export declare function compileLlamaCpp({ arch, nodeTarget, setUsedBingFlag, metal, cuda }: {
2
2
  arch?: string;
3
3
  nodeTarget?: string;
4
4
  setUsedBingFlag?: boolean;
5
+ metal?: boolean;
6
+ cuda?: boolean;
5
7
  }): Promise<void>;
6
8
  export declare function getCompiledLlamaCppBinaryPath(): Promise<string | null>;