node-llama-cpp 2.8.4 → 3.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/README.md +2 -2
  2. package/dist/ChatWrapper.d.ts +49 -0
  3. package/dist/ChatWrapper.js +120 -0
  4. package/dist/ChatWrapper.js.map +1 -0
  5. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +12 -0
  6. package/dist/chatWrappers/AlpacaChatWrapper.js +21 -0
  7. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -0
  8. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +13 -0
  9. package/dist/chatWrappers/ChatMLChatWrapper.js +83 -0
  10. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -0
  11. package/dist/chatWrappers/EmptyChatWrapper.d.ts +4 -0
  12. package/dist/chatWrappers/EmptyChatWrapper.js +5 -0
  13. package/dist/chatWrappers/EmptyChatWrapper.js.map +1 -0
  14. package/dist/chatWrappers/FalconChatWrapper.d.ts +21 -0
  15. package/dist/chatWrappers/FalconChatWrapper.js +104 -0
  16. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -0
  17. package/dist/chatWrappers/FunctionaryChatWrapper.d.ts +41 -0
  18. package/dist/chatWrappers/FunctionaryChatWrapper.js +200 -0
  19. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -0
  20. package/dist/chatWrappers/GeneralChatWrapper.d.ts +21 -0
  21. package/dist/chatWrappers/GeneralChatWrapper.js +112 -0
  22. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -0
  23. package/dist/chatWrappers/LlamaChatWrapper.d.ts +13 -0
  24. package/dist/chatWrappers/LlamaChatWrapper.js +78 -0
  25. package/dist/chatWrappers/LlamaChatWrapper.js.map +1 -0
  26. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +13 -0
  27. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +55 -0
  28. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -0
  29. package/dist/cli/cli.js +1 -1
  30. package/dist/cli/cli.js.map +1 -1
  31. package/dist/cli/commands/ChatCommand.d.ts +2 -1
  32. package/dist/cli/commands/ChatCommand.js +90 -42
  33. package/dist/cli/commands/ChatCommand.js.map +1 -1
  34. package/dist/config.js +1 -1
  35. package/dist/config.js.map +1 -1
  36. package/dist/index.d.ts +20 -12
  37. package/dist/index.js +19 -11
  38. package/dist/index.js.map +1 -1
  39. package/dist/llamaEvaluator/LlamaBins.d.ts +18 -4
  40. package/dist/llamaEvaluator/LlamaBins.js +3 -3
  41. package/dist/llamaEvaluator/LlamaChat/LlamaChat.d.ts +175 -0
  42. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js +704 -0
  43. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +1 -0
  44. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.d.ts +21 -0
  45. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js +120 -0
  46. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  47. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +16 -0
  48. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +117 -0
  49. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  50. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.d.ts +146 -0
  51. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js +211 -0
  52. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  53. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.d.ts +7 -0
  54. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js +8 -0
  55. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  56. package/dist/llamaEvaluator/LlamaContext/LlamaContext.d.ts +107 -0
  57. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js +597 -0
  58. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +1 -0
  59. package/dist/llamaEvaluator/LlamaContext/types.d.ts +86 -0
  60. package/dist/llamaEvaluator/LlamaContext/types.js +2 -0
  61. package/dist/llamaEvaluator/LlamaContext/types.js.map +1 -0
  62. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +5 -0
  63. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +16 -0
  64. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
  65. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +5 -0
  66. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +42 -0
  67. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
  68. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +2 -0
  69. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +13 -0
  70. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
  71. package/dist/llamaEvaluator/LlamaGrammar.d.ts +9 -13
  72. package/dist/llamaEvaluator/LlamaGrammar.js +10 -15
  73. package/dist/llamaEvaluator/LlamaGrammar.js.map +1 -1
  74. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts +6 -5
  75. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js +8 -7
  76. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +1 -1
  77. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js +2 -1
  78. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +1 -1
  79. package/dist/llamaEvaluator/LlamaModel.d.ts +101 -105
  80. package/dist/llamaEvaluator/LlamaModel.js +305 -57
  81. package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
  82. package/dist/types.d.ts +44 -4
  83. package/dist/types.js +5 -1
  84. package/dist/types.js.map +1 -1
  85. package/dist/utils/LlamaText.d.ts +42 -0
  86. package/dist/utils/LlamaText.js +207 -0
  87. package/dist/utils/LlamaText.js.map +1 -0
  88. package/dist/utils/ReplHistory.js +1 -1
  89. package/dist/utils/ReplHistory.js.map +1 -1
  90. package/dist/utils/StopGenerationDetector.d.ts +28 -0
  91. package/dist/utils/StopGenerationDetector.js +205 -0
  92. package/dist/utils/StopGenerationDetector.js.map +1 -0
  93. package/dist/utils/TokenStreamRegulator.d.ts +30 -0
  94. package/dist/utils/TokenStreamRegulator.js +96 -0
  95. package/dist/utils/TokenStreamRegulator.js.map +1 -0
  96. package/dist/utils/appendUserMessageToChatHistory.d.ts +2 -0
  97. package/dist/utils/appendUserMessageToChatHistory.js +18 -0
  98. package/dist/utils/appendUserMessageToChatHistory.js.map +1 -0
  99. package/dist/utils/compareTokens.d.ts +2 -0
  100. package/dist/utils/compareTokens.js +4 -0
  101. package/dist/utils/compareTokens.js.map +1 -0
  102. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.d.ts +18 -0
  103. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +61 -0
  104. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -0
  105. package/dist/utils/gbnfJson/GbnfGrammarGenerator.d.ts +1 -0
  106. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js +17 -0
  107. package/dist/utils/gbnfJson/GbnfGrammarGenerator.js.map +1 -1
  108. package/dist/utils/gbnfJson/GbnfTerminal.d.ts +1 -1
  109. package/dist/utils/gbnfJson/GbnfTerminal.js.map +1 -1
  110. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.d.ts +6 -0
  111. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js +21 -0
  112. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -0
  113. package/dist/utils/gbnfJson/types.d.ts +1 -1
  114. package/dist/utils/gbnfJson/types.js.map +1 -1
  115. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.d.ts +1 -0
  116. package/dist/utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js.map +1 -1
  117. package/dist/utils/getBin.d.ts +71 -38
  118. package/dist/utils/getBin.js.map +1 -1
  119. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +1 -15
  120. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +1 -1
  121. package/dist/utils/getReleaseInfo.d.ts +1 -1
  122. package/dist/utils/getReleaseInfo.js.map +1 -1
  123. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.d.ts +2 -0
  124. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +49 -0
  125. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -0
  126. package/dist/utils/parseModelFileName.d.ts +9 -0
  127. package/dist/utils/parseModelFileName.js +68 -0
  128. package/dist/utils/parseModelFileName.js.map +1 -0
  129. package/dist/utils/parseModelTypeDescription.d.ts +6 -0
  130. package/dist/utils/parseModelTypeDescription.js +9 -0
  131. package/dist/utils/parseModelTypeDescription.js.map +1 -0
  132. package/dist/utils/resolveChatWrapper.d.ts +4 -0
  133. package/dist/utils/resolveChatWrapper.js +16 -0
  134. package/dist/utils/resolveChatWrapper.js.map +1 -0
  135. package/dist/utils/truncateTextAndRoundToWords.d.ts +8 -0
  136. package/dist/utils/truncateTextAndRoundToWords.js +27 -0
  137. package/dist/utils/truncateTextAndRoundToWords.js.map +1 -0
  138. package/llama/.clang-format +10 -9
  139. package/llama/addon.cpp +701 -352
  140. package/llama/gitRelease.bundle +0 -0
  141. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  142. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  143. package/llamaBins/linux-x64/llama-addon.node +0 -0
  144. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  145. package/llamaBins/mac-x64/llama-addon.node +0 -0
  146. package/llamaBins/win-x64/llama-addon.node +0 -0
  147. package/package.json +24 -14
  148. package/dist/ChatPromptWrapper.d.ts +0 -11
  149. package/dist/ChatPromptWrapper.js +0 -20
  150. package/dist/ChatPromptWrapper.js.map +0 -1
  151. package/dist/chatWrappers/ChatMLChatPromptWrapper.d.ts +0 -12
  152. package/dist/chatWrappers/ChatMLChatPromptWrapper.js +0 -22
  153. package/dist/chatWrappers/ChatMLChatPromptWrapper.js.map +0 -1
  154. package/dist/chatWrappers/EmptyChatPromptWrapper.d.ts +0 -4
  155. package/dist/chatWrappers/EmptyChatPromptWrapper.js +0 -5
  156. package/dist/chatWrappers/EmptyChatPromptWrapper.js.map +0 -1
  157. package/dist/chatWrappers/FalconChatPromptWrapper.d.ts +0 -19
  158. package/dist/chatWrappers/FalconChatPromptWrapper.js +0 -33
  159. package/dist/chatWrappers/FalconChatPromptWrapper.js.map +0 -1
  160. package/dist/chatWrappers/GeneralChatPromptWrapper.d.ts +0 -19
  161. package/dist/chatWrappers/GeneralChatPromptWrapper.js +0 -38
  162. package/dist/chatWrappers/GeneralChatPromptWrapper.js.map +0 -1
  163. package/dist/chatWrappers/LlamaChatPromptWrapper.d.ts +0 -12
  164. package/dist/chatWrappers/LlamaChatPromptWrapper.js +0 -23
  165. package/dist/chatWrappers/LlamaChatPromptWrapper.js.map +0 -1
  166. package/dist/chatWrappers/createChatWrapperByBos.d.ts +0 -2
  167. package/dist/chatWrappers/createChatWrapperByBos.js +0 -14
  168. package/dist/chatWrappers/createChatWrapperByBos.js.map +0 -1
  169. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -23
  170. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -47
  171. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +0 -1
  172. package/dist/llamaEvaluator/LlamaChatSession.d.ts +0 -122
  173. package/dist/llamaEvaluator/LlamaChatSession.js +0 -236
  174. package/dist/llamaEvaluator/LlamaChatSession.js.map +0 -1
  175. package/dist/llamaEvaluator/LlamaContext.d.ts +0 -98
  176. package/dist/llamaEvaluator/LlamaContext.js +0 -140
  177. package/dist/llamaEvaluator/LlamaContext.js.map +0 -1
  178. package/dist/utils/getTextCompletion.d.ts +0 -3
  179. package/dist/utils/getTextCompletion.js +0 -12
  180. package/dist/utils/getTextCompletion.js.map +0 -1
  181. package/dist/utils/withLock.d.ts +0 -1
  182. package/dist/utils/withLock.js +0 -19
  183. package/dist/utils/withLock.js.map +0 -1
  184. package/llamaBins/mac-arm64/ggml-metal.metal +0 -5820
  185. package/llamaBins/mac-x64/ggml-metal.metal +0 -5820
@@ -0,0 +1,86 @@
1
+ import { Token } from "../../types.js";
2
+ import { LlamaModel } from "../LlamaModel.js";
3
+ import { LlamaContextSequence } from "./LlamaContext.js";
4
+ export type LlamaContextOptions = {
5
+ model: LlamaModel;
6
+ /**
7
+ * number of sequences for the context.
8
+ * Each sequence is a different "text generation process" that can run in parallel to other sequences in the same context.
9
+ * Although a single context has multiple sequences, the sequences are separate from each other and do not share data with each other.
10
+ * This is beneficial for performance, as multiple sequences can be evaluated in parallel (on the same batch).
11
+ */
12
+ sequences?: number;
13
+ /** If null, a random seed will be used */
14
+ seed?: number | null;
15
+ /** text context size */
16
+ contextSize?: number;
17
+ /** prompt processing batch size */
18
+ batchSize?: number;
19
+ /** the llama_eval() call computes all logits, not just the last one */
20
+ logitsAll?: boolean;
21
+ /** embedding mode only */
22
+ embedding?: boolean;
23
+ /**
24
+ * number of threads to use to evaluate tokens.
25
+ * set to 0 to use the maximum threads supported by the current machine hardware
26
+ */
27
+ threads?: number;
28
+ /** control the parallel sequences processing behavior */
29
+ batching?: BatchingOptions;
30
+ };
31
+ export type LlamaContextSequenceRepeatPenalty = {
32
+ /** Tokens to lower the predication probability of to be the next predicted token */
33
+ punishTokens: Token[] | (() => Token[]);
34
+ /**
35
+ * The relative amount to lower the probability of the tokens in `punishTokens` by
36
+ * Defaults to `1.1`.
37
+ * Set to `1` to disable.
38
+ */
39
+ penalty?: number;
40
+ /**
41
+ * For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`
42
+ * Disabled by default (`0`).
43
+ * Set to a value between `0` and `1` to enable.
44
+ */
45
+ frequencyPenalty?: number;
46
+ /**
47
+ * Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`
48
+ * Disabled by default (`0`).
49
+ * Set to a value between `0` and `1` to enable.
50
+ */
51
+ presencePenalty?: number;
52
+ };
53
+ export type BatchingOptions = {
54
+ dispatchSchedule?: "nextTick" | CustomBatchingDispatchSchedule;
55
+ itemsPrioritizingStrategy?: "maximumParallelism" | "firstInFirstOut" | CustomBatchingPrioritizeStrategy;
56
+ };
57
+ export type CustomBatchingDispatchSchedule = (dispatch: () => void) => void;
58
+ export type CustomBatchingPrioritizeStrategy = (options: {
59
+ items: readonly BatchItem[];
60
+ size: number;
61
+ }) => PrioritizedBatchItem[];
62
+ export type ContextShiftOptions = {
63
+ size?: number | ((sequence: LlamaContextSequence) => number | Promise<number>);
64
+ strategy?: "eraseBeginning" | ((options: {
65
+ sequence: LlamaContextSequence;
66
+ size: number;
67
+ }) => ContextTokensDeleteRange[] | Promise<ContextTokensDeleteRange[]>);
68
+ };
69
+ export type ContextTokensDeleteRange = {
70
+ start: number;
71
+ end: number;
72
+ };
73
+ /**
74
+ * 1 - low
75
+ *
76
+ * 5 - high
77
+ */
78
+ export type EvaluationPriority = 1 | 2 | 3 | 4 | 5;
79
+ export type BatchItem = {
80
+ readonly tokens: readonly Token[];
81
+ readonly evaluationPriority: EvaluationPriority;
82
+ };
83
+ export type PrioritizedBatchItem = {
84
+ item: BatchItem;
85
+ processAmount: number;
86
+ };
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/llamaEvaluator/LlamaContext/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,5 @@
1
+ import { BatchItem, PrioritizedBatchItem } from "../../types.js";
2
+ export declare function firstInFirstOutStrategy({ items, size }: {
3
+ items: readonly BatchItem[];
4
+ size: number;
5
+ }): PrioritizedBatchItem[];
@@ -0,0 +1,16 @@
1
+ export function firstInFirstOutStrategy({ items, size }) {
2
+ const res = [];
3
+ const sortedItems = items
4
+ .slice()
5
+ .sort((a, b) => b.evaluationPriority - a.evaluationPriority);
6
+ let leftFreeTokens = size;
7
+ for (const item of sortedItems) {
8
+ const processAmount = Math.min(item.tokens.length, leftFreeTokens);
9
+ res.push({ item, processAmount });
10
+ leftFreeTokens -= processAmount;
11
+ if (leftFreeTokens === 0)
12
+ break;
13
+ }
14
+ return res;
15
+ }
16
+ //# sourceMappingURL=firstInFirstOutStrategy.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"firstInFirstOutStrategy.js","sourceRoot":"","sources":["../../../../../src/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,uBAAuB,CAAC,EAAC,KAAK,EAAE,IAAI,EAAgD;IAChG,MAAM,GAAG,GAA2B,EAAE,CAAC;IAEvC,MAAM,WAAW,GAAG,KAAK;SACpB,KAAK,EAAE;SACP,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,kBAAkB,GAAG,CAAC,CAAC,kBAAkB,CAAC,CAAC;IAEjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE;QAC5B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QACnE,GAAG,CAAC,IAAI,CAAC,EAAC,IAAI,EAAE,aAAa,EAAC,CAAC,CAAC;QAChC,cAAc,IAAI,aAAa,CAAC;QAEhC,IAAI,cAAc,KAAK,CAAC;YACpB,MAAM;KACb;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
@@ -0,0 +1,5 @@
1
+ import { BatchItem, PrioritizedBatchItem } from "../../types.js";
2
+ export declare function maximumParallelismStrategy({ items, size }: {
3
+ items: readonly BatchItem[];
4
+ size: number;
5
+ }): PrioritizedBatchItem[];
@@ -0,0 +1,42 @@
1
+ export function maximumParallelismStrategy({ items, size }) {
2
+ let leftFreeTokens = size;
3
+ const minTokensForEachItem = Math.floor(leftFreeTokens / items.length);
4
+ const res = [];
5
+ const clippedItems = [];
6
+ for (const item of items) {
7
+ const processAmount = Math.min(item.tokens.length, leftFreeTokens, minTokensForEachItem);
8
+ const prioritizeItem = { item, processAmount };
9
+ res.push(prioritizeItem);
10
+ leftFreeTokens -= processAmount;
11
+ if (processAmount < item.tokens.length)
12
+ clippedItems.push(prioritizeItem);
13
+ if (leftFreeTokens === 0)
14
+ break;
15
+ }
16
+ for (let passesLeft = 3; leftFreeTokens > 0 && clippedItems.length > 0 && passesLeft > 0; passesLeft--) {
17
+ const minIncreaseAmount = Math.ceil(leftFreeTokens / clippedItems.length);
18
+ for (let i = 0; i < clippedItems.length && leftFreeTokens > 0; i++) {
19
+ const prioritizeItem = clippedItems[i];
20
+ const unprocessedAmount = prioritizeItem.item.tokens.length - prioritizeItem.processAmount;
21
+ const increaseAmount = Math.min(unprocessedAmount, leftFreeTokens, minIncreaseAmount);
22
+ prioritizeItem.processAmount += increaseAmount;
23
+ if (increaseAmount === unprocessedAmount) {
24
+ clippedItems.splice(i, 1);
25
+ i--;
26
+ }
27
+ }
28
+ }
29
+ clippedItems.sort((a, b) => b.item.evaluationPriority - a.item.evaluationPriority);
30
+ for (let i = 0; i < clippedItems.length && leftFreeTokens > 0; i++) {
31
+ const prioritizeItem = clippedItems[i];
32
+ const unprocessedAmount = prioritizeItem.item.tokens.length - prioritizeItem.processAmount;
33
+ const increaseAmount = Math.min(unprocessedAmount, leftFreeTokens);
34
+ prioritizeItem.processAmount += increaseAmount;
35
+ if (increaseAmount === unprocessedAmount) {
36
+ clippedItems.splice(i, 1);
37
+ i--;
38
+ }
39
+ }
40
+ return res;
41
+ }
42
+ //# sourceMappingURL=maximumParallelismStrategy.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"maximumParallelismStrategy.js","sourceRoot":"","sources":["../../../../../src/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,0BAA0B,CAAC,EAAC,KAAK,EAAE,IAAI,EAAgD;IACnG,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,MAAM,oBAAoB,GAAG,IAAI,CAAC,KAAK,CAAC,cAAc,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;IAEvE,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,MAAM,YAAY,GAA2B,EAAE,CAAC;IAEhD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE;QACtB,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,EAAE,oBAAoB,CAAC,CAAC;QACzF,MAAM,cAAc,GAAG,EAAC,IAAI,EAAE,aAAa,EAAC,CAAC;QAE7C,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACzB,cAAc,IAAI,aAAa,CAAC;QAEhC,IAAI,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM;YAClC,YAAY,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAEtC,IAAI,cAAc,KAAK,CAAC;YACpB,MAAM;KACb;IAED,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,cAAc,GAAG,CAAC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,EAAE,EAAE;QACpG,MAAM,iBAAiB,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QAE1E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;YAChE,MAAM,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YACvC,MAAM,iBAAiB,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC,aAAa,CAAC;YAC3F,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,iBAAiB,EAAE,cAAc,EAAE,iBAAiB,CAAC,CAAC;YACtF,cAAc,CAAC,aAAa,IAAI,cAAc,CAAC;YAE/C,IAAI,cAAc,KAAK,iBAAiB,EAAE;gBACtC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC1B,CAAC,EAAE,CAAC;aACP;SACJ;KACJ;IAED,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IAEnF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,IAAI,cAAc,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE;QAChE,MAAM,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,iBAAiB,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC,aAAa,CAAC;QAC3F,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QACnE,cAAc,CAAC,aAAa,IAAI,cAAc,CAAC;QAE/C,IAAI,cAAc,KAAK,iBAAiB,EAAE;YACtC,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAC1B,CAAC,EAAE,CAAC;SACP;KACJ;IAED,OAAO,GAAG,CAAC;AACf,CAAC"}
@@ -0,0 +1,2 @@
1
+ import { BatchingOptions } from "../types.js";
2
+ export declare function resolveBatchItemsPrioritizingStrategy(strategy: Required<BatchingOptions>["itemsPrioritizingStrategy"]): import("../types.js").CustomBatchingPrioritizeStrategy;
@@ -0,0 +1,13 @@
1
+ import { maximumParallelismStrategy } from "./batchItemsPrioritizingStrategies/maximumParallelismStrategy.js";
2
+ import { firstInFirstOutStrategy } from "./batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js";
3
+ export function resolveBatchItemsPrioritizingStrategy(strategy) {
4
+ if (strategy instanceof Function)
5
+ return strategy;
6
+ else if (strategy === "maximumParallelism")
7
+ return maximumParallelismStrategy;
8
+ else if (strategy === "firstInFirstOut")
9
+ return firstInFirstOutStrategy;
10
+ void (strategy);
11
+ throw new Error(`Unknown batch items prioritize strategy: ${strategy}`);
12
+ }
13
+ //# sourceMappingURL=resolveBatchItemsPrioritizingStrategy.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"resolveBatchItemsPrioritizingStrategy.js","sourceRoot":"","sources":["../../../../src/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,0BAA0B,EAAC,MAAM,kEAAkE,CAAC;AAC5G,OAAO,EAAC,uBAAuB,EAAC,MAAM,+DAA+D,CAAC;AAEtG,MAAM,UAAU,qCAAqC,CAAC,QAAgE;IAClH,IAAI,QAAQ,YAAY,QAAQ;QAC5B,OAAO,QAAQ,CAAC;SACf,IAAI,QAAQ,KAAK,oBAAoB;QACtC,OAAO,0BAA0B,CAAC;SACjC,IAAI,QAAQ,KAAK,iBAAiB;QACnC,OAAO,uBAAuB,CAAC;IAEnC,KAAK,CAAC,QAAwB,CAAC,CAAC;IAEhC,MAAM,IAAI,KAAK,CAAC,4CAA4C,QAAQ,EAAE,CAAC,CAAC;AAC5E,CAAC"}
@@ -1,32 +1,28 @@
1
+ import { LlamaText } from "../utils/LlamaText.js";
2
+ import { StopGenerationTrigger } from "../utils/StopGenerationDetector.js";
1
3
  export type LlamaGrammarOptions = {
2
4
  /** GBNF grammar */
3
5
  grammar: string;
4
6
  /** print the grammar to stdout */
5
7
  printGrammar?: boolean;
6
- /** Consider any of these texts as EOS for the generated out. Only supported by `LlamaChatSession` */
7
- stopStrings?: string[];
8
- /** Trim whitespace from the end of the generated text. Only supported by `LlamaChatSession` */
8
+ /** Consider any of these as EOS for the generated text. Only supported by `LlamaChat` and `LlamaChatSession` */
9
+ stopGenerationTriggers?: readonly (StopGenerationTrigger | LlamaText)[];
10
+ /** Trim whitespace from the end of the generated text. Only supported by `LlamaChat` and `LlamaChatSession` */
9
11
  trimWhitespaceSuffix?: boolean;
10
12
  };
11
13
  export declare class LlamaGrammar {
12
- private readonly _stopStrings;
14
+ private readonly _stopGenerationTriggers;
13
15
  private readonly _trimWhitespaceSuffix;
14
16
  private readonly _grammarText;
15
17
  /**
16
18
  * > GBNF files are supported.
17
19
  * > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
18
20
  * > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
19
- * @param {object} options
20
- * @param {string} options.grammar - GBNF grammar
21
- * @param {string[]} [options.stopStrings] - Consider any of these texts as EOS for the generated out.
22
- * Only supported by `LlamaChatSession`
23
- * @param {boolean} [options.trimWhitespaceSuffix] - Trim whitespace from the end of the generated text.
24
- * Only supported by `LlamaChatSession`
25
- * @param {boolean} [options.printGrammar] - print the grammar to stdout
21
+ * @param options
26
22
  */
27
- constructor({ grammar, stopStrings, trimWhitespaceSuffix, printGrammar }: LlamaGrammarOptions);
23
+ constructor({ grammar, stopGenerationTriggers, trimWhitespaceSuffix, printGrammar }: LlamaGrammarOptions);
28
24
  get grammar(): string;
29
- get stopStrings(): readonly string[];
25
+ get stopGenerationTriggers(): readonly (StopGenerationTrigger | LlamaText)[];
30
26
  get trimWhitespaceSuffix(): boolean;
31
27
  static getFor(type: "json" | "list" | "arithmetic" | "japanese" | "chess"): Promise<LlamaGrammar>;
32
28
  }
@@ -1,38 +1,33 @@
1
1
  import path from "path";
2
2
  import fs from "fs-extra";
3
3
  import { getGrammarsFolder } from "../utils/getGrammarsFolder.js";
4
- import { LLAMAGrammar } from "./LlamaBins.js";
4
+ import { LlamaText } from "../utils/LlamaText.js";
5
+ import { AddonGrammar } from "./LlamaBins.js";
5
6
  export class LlamaGrammar {
6
7
  /** @internal */
7
8
  _grammar;
8
- _stopStrings;
9
+ _stopGenerationTriggers;
9
10
  _trimWhitespaceSuffix;
10
11
  _grammarText;
11
12
  /**
12
13
  * > GBNF files are supported.
13
14
  * > More info here: [github:ggerganov/llama.cpp:grammars/README.md](
14
15
  * > https://github.com/ggerganov/llama.cpp/blob/f5fe98d11bdf9e7797bcfb05c0c3601ffc4b9d26/grammars/README.md)
15
- * @param {object} options
16
- * @param {string} options.grammar - GBNF grammar
17
- * @param {string[]} [options.stopStrings] - Consider any of these texts as EOS for the generated out.
18
- * Only supported by `LlamaChatSession`
19
- * @param {boolean} [options.trimWhitespaceSuffix] - Trim whitespace from the end of the generated text.
20
- * Only supported by `LlamaChatSession`
21
- * @param {boolean} [options.printGrammar] - print the grammar to stdout
16
+ * @param options
22
17
  */
23
- constructor({ grammar, stopStrings = [], trimWhitespaceSuffix = false, printGrammar = false }) {
24
- this._grammar = new LLAMAGrammar(grammar, {
18
+ constructor({ grammar, stopGenerationTriggers = [], trimWhitespaceSuffix = false, printGrammar = false }) {
19
+ this._grammar = new AddonGrammar(grammar, {
25
20
  printGrammar
26
21
  });
27
- this._stopStrings = stopStrings ?? [];
22
+ this._stopGenerationTriggers = stopGenerationTriggers ?? [];
28
23
  this._trimWhitespaceSuffix = trimWhitespaceSuffix;
29
24
  this._grammarText = grammar;
30
25
  }
31
26
  get grammar() {
32
27
  return this._grammarText;
33
28
  }
34
- get stopStrings() {
35
- return this._stopStrings;
29
+ get stopGenerationTriggers() {
30
+ return this._stopGenerationTriggers;
36
31
  }
37
32
  get trimWhitespaceSuffix() {
38
33
  return this._trimWhitespaceSuffix;
@@ -44,7 +39,7 @@ export class LlamaGrammar {
44
39
  const grammar = await fs.readFile(grammarFile, "utf8");
45
40
  return new LlamaGrammar({
46
41
  grammar,
47
- stopStrings: ["\n".repeat(10)],
42
+ stopGenerationTriggers: [LlamaText(["\n".repeat(10)])],
48
43
  trimWhitespaceSuffix: true
49
44
  });
50
45
  }
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAiB5C,MAAM,OAAO,YAAY;IACrB,gBAAgB;IACA,QAAQ,CAAe;IACtB,YAAY,CAAoB;IAChC,qBAAqB,CAAU;IAC/B,YAAY,CAAS;IAEtC;;;;;;;;;;;OAWG;IACH,YAAmB,EACf,OAAO,EAAE,WAAW,GAAG,EAAE,EAAE,oBAAoB,GAAG,KAAK,EAAE,YAAY,GAAG,KAAK,EAC3D;QAClB,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,CAAC,OAAO,EAAE;YACtC,YAAY;SACf,CAAC,CAAC;QACH,IAAI,CAAC,YAAY,GAAG,WAAW,IAAI,EAAE,CAAC;QACtC,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;IAChC,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,WAAW;QAClB,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,oBAAoB;QAC3B,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAA2D;QAClF,MAAM,cAAc,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE;YAClC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC;gBACpB,OAAO;gBACP,WAAW,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC9B,oBAAoB,EAAE,IAAI;aAC7B,CAAC,CAAC;SACN;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammar.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,EAAC,iBAAiB,EAAC,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAEhD,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAiB5C,MAAM,OAAO,YAAY;IACrB,gBAAgB;IACA,QAAQ,CAAe;IACtB,uBAAuB,CAAiD;IACxE,qBAAqB,CAAU;IAC/B,YAAY,CAAS;IAEtC;;;;;OAKG;IACH,YAAmB,EACf,OAAO,EAAE,sBAAsB,GAAG,EAAE,EAAE,oBAAoB,GAAG,KAAK,EAAE,YAAY,GAAG,KAAK,EACtE;QAClB,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,CAAC,OAAO,EAAE;YACtC,YAAY;SACf,CAAC,CAAC;QACH,IAAI,CAAC,uBAAuB,GAAG,sBAAsB,IAAI,EAAE,CAAC;QAC5D,IAAI,CAAC,qBAAqB,GAAG,oBAAoB,CAAC;QAClD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;IAChC,CAAC;IAED,IAAW,OAAO;QACd,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAED,IAAW,sBAAsB;QAC7B,OAAO,IAAI,CAAC,uBAAuB,CAAC;IACxC,CAAC;IAED,IAAW,oBAAoB;QAC3B,OAAO,IAAI,CAAC,qBAAqB,CAAC;IACtC,CAAC;IAEM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAA2D;QAClF,MAAM,cAAc,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAEjD,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,GAAG,OAAO,CAAC,CAAC;QAE9D,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE;YAClC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YACvD,OAAO,IAAI,YAAY,CAAC;gBACpB,OAAO;gBACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBACtD,oBAAoB,EAAE,IAAI;aAC7B,CAAC,CAAC;SACN;QAED,MAAM,IAAI,KAAK,CAAC,0BAA0B,IAAI,uBAAuB,cAAc,GAAG,CAAC,CAAC;IAC5F,CAAC;CACJ"}
@@ -2,13 +2,14 @@ import { LlamaGrammar } from "./LlamaGrammar.js";
2
2
  export type LlamaGrammarEvaluationStateOptions = {
3
3
  grammar: LlamaGrammar;
4
4
  };
5
+ /**
6
+ * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
7
+ * Create a new grammar evaluation state for every response you generate with the model.
8
+ * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
9
+ */
5
10
  export declare class LlamaGrammarEvaluationState {
6
11
  /**
7
- * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
8
- * Create a new grammar evaluation state for every response you generate with the model.
9
- * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
10
- * @param {object} options
11
- * @param {LlamaGrammar} options.grammar
12
+ * @param options
12
13
  */
13
14
  constructor({ grammar }: LlamaGrammarEvaluationStateOptions);
14
15
  }
@@ -1,16 +1,17 @@
1
- import { LLAMAGrammarEvaluationState } from "./LlamaBins.js";
1
+ import { AddonGrammarEvaluationState } from "./LlamaBins.js";
2
+ /**
3
+ * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
4
+ * Create a new grammar evaluation state for every response you generate with the model.
5
+ * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
6
+ */
2
7
  export class LlamaGrammarEvaluationState {
3
8
  /** @internal */
4
9
  _state;
5
10
  /**
6
- * Grammar evaluation state is used to track the model response to determine the next allowed characters for the model to generate.
7
- * Create a new grammar evaluation state for every response you generate with the model.
8
- * This is only needed when using the `LlamaContext` class directly, as `LlamaChatSession` already handles this for you.
9
- * @param {object} options
10
- * @param {LlamaGrammar} options.grammar
11
+ * @param options
11
12
  */
12
13
  constructor({ grammar }) {
13
- this._state = new LLAMAGrammarEvaluationState(grammar._grammar);
14
+ this._state = new AddonGrammarEvaluationState(grammar._grammar);
14
15
  }
15
16
  }
16
17
  //# sourceMappingURL=LlamaGrammarEvaluationState.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaGrammarEvaluationState.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammarEvaluationState.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,2BAA2B,EAAC,MAAM,gBAAgB,CAAC;AAQ3D,MAAM,OAAO,2BAA2B;IACpC,gBAAgB;IACA,MAAM,CAA8B;IAEpD;;;;;;OAMG;IACH,YAAmB,EAAC,OAAO,EAAqC;QAC5D,IAAI,CAAC,MAAM,GAAG,IAAI,2BAA2B,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpE,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaGrammarEvaluationState.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaGrammarEvaluationState.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,2BAA2B,EAAC,MAAM,gBAAgB,CAAC;AAQ3D;;;;GAIG;AACH,MAAM,OAAO,2BAA2B;IACpC,gBAAgB;IACA,MAAM,CAA8B;IAEpD;;OAEG;IACH,YAAmB,EAAC,OAAO,EAAqC;QAC5D,IAAI,CAAC,MAAM,GAAG,IAAI,2BAA2B,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpE,CAAC;CACJ"}
@@ -1,5 +1,6 @@
1
1
  import { getGbnfGrammarForGbnfJsonSchema } from "../utils/getGbnfGrammarForGbnfJsonSchema.js";
2
2
  import { validateObjectAgainstGbnfSchema } from "../utils/gbnfJson/utils/validateObjectAgainstGbnfSchema.js";
3
+ import { LlamaText } from "../utils/LlamaText.js";
3
4
  import { LlamaGrammar } from "./LlamaGrammar.js";
4
5
  export class LlamaJsonSchemaGrammar extends LlamaGrammar {
5
6
  _schema;
@@ -7,7 +8,7 @@ export class LlamaJsonSchemaGrammar extends LlamaGrammar {
7
8
  const grammar = getGbnfGrammarForGbnfJsonSchema(schema);
8
9
  super({
9
10
  grammar,
10
- stopStrings: ["\n".repeat(4)],
11
+ stopGenerationTriggers: [LlamaText(["\n".repeat(4)])],
11
12
  trimWhitespaceSuffix: true
12
13
  });
13
14
  this._schema = schema;
@@ -1 +1 @@
1
- {"version":3,"file":"LlamaJsonSchemaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaJsonSchemaGrammar.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,+BAA+B,EAAC,MAAM,6CAA6C,CAAC;AAC5F,OAAO,EAAC,+BAA+B,EAAC,MAAM,4DAA4D,CAAC;AAC3G,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,OAAO,sBAAiE,SAAQ,YAAY;IAC7E,OAAO,CAAI;IAE5B,YAAmB,MAAS;QACxB,MAAM,OAAO,GAAG,+BAA+B,CAAC,MAAM,CAAC,CAAC;QAExD,KAAK,CAAC;YACF,OAAO;YACP,WAAW,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC7B,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;IAC1B,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEpC,+BAA+B,CAAC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,OAAO,UAAU,CAAC;IACtB,CAAC;CACJ"}
1
+ {"version":3,"file":"LlamaJsonSchemaGrammar.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaJsonSchemaGrammar.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,+BAA+B,EAAC,MAAM,6CAA6C,CAAC;AAC5F,OAAO,EAAC,+BAA+B,EAAC,MAAM,4DAA4D,CAAC;AAC3G,OAAO,EAAC,SAAS,EAAC,MAAM,uBAAuB,CAAC;AAChD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAE/C,MAAM,OAAO,sBAAiE,SAAQ,YAAY;IAC7E,OAAO,CAAI;IAE5B,YAAmB,MAAS;QACxB,MAAM,OAAO,GAAG,+BAA+B,CAAC,MAAM,CAAC,CAAC;QAExD,KAAK,CAAC;YACF,OAAO;YACP,sBAAsB,EAAE,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACrD,oBAAoB,EAAE,IAAI;SAC7B,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC;IAC1B,CAAC;IAEM,KAAK,CAAC,IAAY;QACrB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEpC,+BAA+B,CAAC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,OAAO,UAAU,CAAC;IACtB,CAAC;CACJ"}
@@ -1,123 +1,119 @@
1
+ import { EventRelay } from "lifecycle-utils";
2
+ import { Token } from "../types.js";
3
+ import { ModelTypeDescription } from "../utils/getBin.js";
4
+ import type { BuiltinSpecialTokenValue } from "../utils/LlamaText.js";
1
5
  export type LlamaModelOptions = {
2
6
  /** path to the model on the filesystem */
3
7
  modelPath: string;
4
- /**
5
- * If null, a random seed will be used
6
- * @deprecated use the `seed` option on `LlamaContext` instead
7
- * @hidden
8
- * */
9
- seed?: number | null;
10
- /**
11
- * text context size
12
- * @deprecated use the `contextSize` option on `LlamaContext` instead
13
- * @hidden
14
- * */
15
- contextSize?: number;
16
- /**
17
- * prompt processing batch size
18
- * @deprecated use the `batchSize` option on `LlamaContext` instead
19
- * @hidden
20
- * */
21
- batchSize?: number;
22
8
  /** number of layers to store in VRAM */
23
9
  gpuLayers?: number;
24
- /**
25
- * number of threads to use to evaluate tokens
26
- * @deprecated use the `threads` option on `LlamaContext` instead
27
- * @hidden
28
- * */
29
- threads?: number;
30
- /**
31
- * Temperature is a hyperparameter that controls the randomness of the generated text.
32
- * It affects the probability distribution of the model's output tokens.
33
- * A higher temperature (e.g., 1.5) makes the output more random and creative,
34
- * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
35
- * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
36
- * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
37
- *
38
- * Set to `0` to disable.
39
- * @deprecated use the `temperature` option on `LlamaChatSession`'s `prompt` function or `LlamaContext`'s `evaluate` function instead
40
- * @hidden
41
- */
42
- temperature?: number;
43
- /**
44
- * Limits the model to consider only the K most likely next tokens for sampling at each step of sequence generation.
45
- * An integer number between `1` and the size of the vocabulary.
46
- * Set to `0` to disable (which uses the full vocabulary).
47
- *
48
- * Only relevant when `temperature` is set to a value greater than 0.
49
- * @deprecated use the `topK` option on `LlamaChatSession`'s `prompt` function or `LlamaContext`'s `evaluate` function instead
50
- * @hidden
51
- * */
52
- topK?: number;
53
- /**
54
- * Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
55
- * and samples the next token only from this set.
56
- * A float number between `0` and `1`.
57
- * Set to `1` to disable.
58
- *
59
- * Only relevant when `temperature` is set to a value greater than `0`.
60
- * @deprecated use the `topP` option on `LlamaChatSession`'s `prompt` function or `LlamaContext`'s `evaluate` function instead
61
- * @hidden
62
- */
63
- topP?: number;
64
- /**
65
- * the llama_eval() call computes all logits, not just the last one
66
- * @deprecated use the `logitsAll` option on `LlamaContext` instead
67
- * @hidden
68
- */
69
- logitsAll?: boolean;
70
10
  /** only load the vocabulary, no weights */
71
11
  vocabOnly?: boolean;
72
12
  /** use mmap if possible */
73
13
  useMmap?: boolean;
74
14
  /** force system to keep model in RAM */
75
15
  useMlock?: boolean;
76
- /**
77
- * embedding mode only
78
- * @deprecated use the `embedding` option on `LlamaContext` instead
79
- * @hidden
80
- */
81
- embedding?: boolean;
82
16
  };
83
17
  export declare class LlamaModel {
18
+ readonly onDispose: EventRelay<void>;
84
19
  /**
85
20
  * > options source:
86
21
  * > [github:ggerganov/llama.cpp/llama.h](
87
- * > https://github.com/ggerganov/llama.cpp/blob/b5ffb2849d23afe73647f68eec7b68187af09be6/llama.h#L102) (`struct llama_context_params`)
88
- * @param {object} options
89
- * @param {string} options.modelPath - path to the model on the filesystem
90
- * @param {number | null} [options.seed] - If null, a random seed will be used
91
- * @param {number} [options.contextSize] - text context size
92
- * @param {number} [options.batchSize] - prompt processing batch size
93
- * @param {number} [options.gpuLayers] - number of layers to store in VRAM
94
- * @param {number} [options.threads] - number of threads to use to evaluate tokens
95
- * @param {number} [options.temperature] - Temperature is a hyperparameter that controls the randomness of the generated text.
96
- * It affects the probability distribution of the model's output tokens.
97
- * A higher temperature (e.g., 1.5) makes the output more random and creative,
98
- * while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative.
99
- * The suggested temperature is 0.8, which provides a balance between randomness and determinism.
100
- * At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.
101
- *
102
- * Set to `0` to disable.
103
- * @param {number} [options.topK] - Limits the model to consider only the K most likely next tokens for sampling at each step of
104
- * sequence generation.
105
- * An integer number between `1` and the size of the vocabulary.
106
- * Set to `0` to disable (which uses the full vocabulary).
107
- *
108
- * Only relevant when `temperature` is set to a value greater than 0.
109
- * @param {number} [options.topP] - Dynamically selects the smallest set of tokens whose cumulative probability exceeds the threshold P,
110
- * and samples the next token only from this set.
111
- * A float number between `0` and `1`.
112
- * Set to `1` to disable.
113
- *
114
- * Only relevant when `temperature` is set to a value greater than `0`.
115
- * @param {boolean} [options.logitsAll] - the llama_eval() call computes all logits, not just the last one
116
- * @param {boolean} [options.vocabOnly] - only load the vocabulary, no weights
117
- * @param {boolean} [options.useMmap] - use mmap if possible
118
- * @param {boolean} [options.useMlock] - force system to keep model in RAM
119
- * @param {boolean} [options.embedding] - embedding mode only
120
- */
121
- constructor({ modelPath, seed, contextSize, batchSize, gpuLayers, threads, temperature, topK, topP, logitsAll, vocabOnly, useMmap, useMlock, embedding }: LlamaModelOptions);
22
+ * > https://github.com/ggerganov/llama.cpp/blob/05816027d649f977468fc804cdb54e99eac246d1/llama.h#L161) (`struct llama_model_params`)
23
+ * @param options
24
+ * @param options.modelPath - path to the model on the filesystem
25
+ * @param [options.gpuLayers] - number of layers to store in VRAM
26
+ * @param [options.vocabOnly] - only load the vocabulary, no weights
27
+ * @param [options.useMmap] - use mmap if possible
28
+ * @param [options.useMlock] - force system to keep model in RAM
29
+ */
30
+ constructor({ modelPath, gpuLayers, vocabOnly, useMmap, useMlock }: LlamaModelOptions);
31
+ dispose(): void;
32
+ /** @hidden */
33
+ [Symbol.dispose](): void;
34
+ get disposed(): boolean;
35
+ get tokens(): LlamaModelTokens;
36
+ get filename(): string | undefined;
37
+ /**
38
+ * Transform text into tokens that can be fed to the model
39
+ * @param text - the text to tokenize
40
+ * @param [specialTokens] - if set to true, text that correspond to special tokens will be tokenized to those tokens.
41
+ * For example, `<s>` will be tokenized to the BOS token if `specialTokens` is set to `true`,
42
+ * otherwise it will be tokenized to tokens that corresponds to the plaintext `<s>` string.
43
+ */
44
+ tokenize(text: string, specialTokens?: boolean): Token[];
45
+ tokenize(text: BuiltinSpecialTokenValue, specialTokens: "builtin"): Token[];
46
+ /** Transform tokens into text */
47
+ detokenize(tokens: readonly Token[]): string;
48
+ /** @hidden `ModelTypeDescription` type alias is too long in the documentation */
49
+ get typeDescription(): ModelTypeDescription;
50
+ /** The context size the model was trained on */
51
+ get trainContextSize(): number;
122
52
  static get systemInfo(): string;
123
53
  }
54
+ export declare class LlamaModelTokens {
55
+ private constructor();
56
+ /**
57
+ * @returns infill tokens
58
+ */
59
+ get infill(): LlamaModelInfillTokens;
60
+ /**
61
+ * @returns The BOS (Beginning Of Sequence) token.
62
+ */
63
+ get bos(): Token | null;
64
+ /**
65
+ * @returns The EOS (End Of Sequence) token.
66
+ */
67
+ get eos(): Token | null;
68
+ /**
69
+ * @returns The NL (New Line) token.
70
+ */
71
+ get nl(): Token | null;
72
+ /**
73
+ * @returns The BOS (Beginning Of Sequence) token as a string.
74
+ */
75
+ get bosString(): string | null;
76
+ /**
77
+ * @returns The EOS (End Of Sequence) token as a string.
78
+ */
79
+ get eosString(): string | null;
80
+ /**
81
+ * @returns The NL (New Line) token as a string.
82
+ */
83
+ get nlString(): string | null;
84
+ }
85
+ export declare class LlamaModelInfillTokens {
86
+ private constructor();
87
+ /**
88
+ * @returns The beginning of infill prefix token.
89
+ */
90
+ get prefix(): Token | null;
91
+ /**
92
+ * @returns The beginning of infill middle token.
93
+ */
94
+ get middle(): Token | null;
95
+ /**
96
+ * @returns The beginning of infill suffix token.
97
+ */
98
+ get suffix(): Token | null;
99
+ /**
100
+ * @returns End of infill middle token (End Of Text).
101
+ */
102
+ get eot(): Token | null;
103
+ /**
104
+ * @returns The beginning of infill prefix token as a string.
105
+ */
106
+ get prefixString(): string | null;
107
+ /**
108
+ * @returns The beginning of infill middle token as a string.
109
+ */
110
+ get middleString(): string | null;
111
+ /**
112
+ * @returns The beginning of infill suffix token as a string.
113
+ */
114
+ get suffixString(): string | null;
115
+ /**
116
+ * @returns End of infill middle token (End Of Text) as a string.
117
+ */
118
+ get eotString(): string | null;
119
+ }