modelfusion 0.106.0 → 0.108.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/CHANGELOG.md +59 -0
  2. package/README.md +19 -59
  3. package/model-function/generate-text/prompt-template/ChatMLPromptTemplate.test.cjs +11 -0
  4. package/model-function/generate-text/prompt-template/ChatMLPromptTemplate.test.js +11 -0
  5. package/model-function/generate-text/prompt-template/Llama2PromptTemplate.cjs +9 -7
  6. package/model-function/generate-text/prompt-template/Llama2PromptTemplate.js +9 -7
  7. package/model-function/generate-text/prompt-template/Llama2PromptTemplate.test.cjs +11 -0
  8. package/model-function/generate-text/prompt-template/Llama2PromptTemplate.test.js +11 -0
  9. package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.cjs +150 -0
  10. package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.d.ts +62 -0
  11. package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.js +143 -0
  12. package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.test.cjs +60 -0
  13. package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.test.js +58 -0
  14. package/model-function/generate-text/prompt-template/NeuralChatPromptTemplate.test.cjs +11 -0
  15. package/model-function/generate-text/prompt-template/NeuralChatPromptTemplate.test.js +11 -0
  16. package/model-function/generate-text/prompt-template/TextPromptTemplate.test.cjs +11 -0
  17. package/model-function/generate-text/prompt-template/TextPromptTemplate.test.js +11 -0
  18. package/model-function/generate-text/prompt-template/VicunaPromptTemplate.test.cjs +11 -0
  19. package/model-function/generate-text/prompt-template/VicunaPromptTemplate.test.js +11 -0
  20. package/model-function/generate-text/prompt-template/index.cjs +2 -1
  21. package/model-function/generate-text/prompt-template/index.d.ts +1 -0
  22. package/model-function/generate-text/prompt-template/index.js +1 -0
  23. package/model-provider/llamacpp/LlamaCppBakLLaVA1PromptTemplate.d.ts +3 -3
  24. package/model-provider/llamacpp/{LlamaCppTextGenerationModel.cjs → LlamaCppCompletionModel.cjs} +25 -11
  25. package/model-provider/llamacpp/{LlamaCppTextGenerationModel.d.ts → LlamaCppCompletionModel.d.ts} +125 -38
  26. package/model-provider/llamacpp/{LlamaCppTextGenerationModel.js → LlamaCppCompletionModel.js} +23 -9
  27. package/model-provider/llamacpp/{LlamaCppTextGenerationModel.test.cjs → LlamaCppCompletionModel.test.cjs} +3 -3
  28. package/model-provider/llamacpp/LlamaCppCompletionModel.test.d.ts +1 -0
  29. package/model-provider/llamacpp/{LlamaCppTextGenerationModel.test.js → LlamaCppCompletionModel.test.js} +3 -3
  30. package/model-provider/llamacpp/LlamaCppFacade.cjs +2 -2
  31. package/model-provider/llamacpp/LlamaCppFacade.d.ts +2 -2
  32. package/model-provider/llamacpp/LlamaCppFacade.js +2 -2
  33. package/model-provider/llamacpp/index.cjs +1 -1
  34. package/model-provider/llamacpp/index.d.ts +1 -1
  35. package/model-provider/llamacpp/index.js +1 -1
  36. package/model-provider/mistral/MistralChatModel.cjs +4 -4
  37. package/model-provider/mistral/MistralChatModel.d.ts +6 -6
  38. package/model-provider/mistral/MistralChatModel.js +1 -1
  39. package/model-provider/mistral/index.cjs +3 -3
  40. package/model-provider/mistral/index.d.ts +2 -2
  41. package/model-provider/mistral/index.js +2 -2
  42. package/model-provider/openai/AbstractOpenAIChatModel.cjs +2 -10
  43. package/model-provider/openai/AbstractOpenAIChatModel.d.ts +13 -195
  44. package/model-provider/openai/AbstractOpenAIChatModel.js +2 -10
  45. package/model-provider/openai/AbstractOpenAICompletionModel.cjs +167 -0
  46. package/model-provider/openai/AbstractOpenAICompletionModel.d.ts +199 -0
  47. package/model-provider/openai/AbstractOpenAICompletionModel.js +163 -0
  48. package/model-provider/openai/OpenAIChatFunctionCallStructureGenerationModel.d.ts +1 -3
  49. package/model-provider/openai/OpenAIChatModel.d.ts +3 -6
  50. package/model-provider/openai/OpenAICompletionModel.cjs +4 -156
  51. package/model-provider/openai/OpenAICompletionModel.d.ts +4 -191
  52. package/model-provider/openai/OpenAICompletionModel.js +3 -155
  53. package/model-provider/openai/index.cjs +1 -0
  54. package/model-provider/openai/index.d.ts +1 -0
  55. package/model-provider/openai/index.js +1 -0
  56. package/model-provider/openai-compatible/OpenAICompatibleChatModel.d.ts +4 -5
  57. package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.cjs +74 -0
  58. package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.d.ts +27 -0
  59. package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.js +70 -0
  60. package/model-provider/openai-compatible/OpenAICompatibleFacade.cjs +37 -6
  61. package/model-provider/openai-compatible/OpenAICompatibleFacade.d.ts +33 -5
  62. package/model-provider/openai-compatible/OpenAICompatibleFacade.js +35 -5
  63. package/model-provider/openai-compatible/OpenAICompatibleProviderName.cjs +2 -0
  64. package/model-provider/openai-compatible/OpenAICompatibleProviderName.d.ts +1 -0
  65. package/model-provider/openai-compatible/OpenAICompatibleProviderName.js +1 -0
  66. package/model-provider/openai-compatible/TogetherAIApiConfiguration.cjs +29 -0
  67. package/model-provider/openai-compatible/TogetherAIApiConfiguration.d.ts +18 -0
  68. package/model-provider/openai-compatible/TogetherAIApiConfiguration.js +25 -0
  69. package/model-provider/openai-compatible/index.cjs +4 -1
  70. package/model-provider/openai-compatible/index.d.ts +4 -1
  71. package/model-provider/openai-compatible/index.js +4 -1
  72. package/package.json +16 -16
  73. package/tool/generate-tool-call/index.cjs +1 -0
  74. package/tool/generate-tool-call/index.d.ts +1 -0
  75. package/tool/generate-tool-call/index.js +1 -0
  76. package/tool/generate-tool-call/jsonToolCallPrompt.cjs +30 -0
  77. package/tool/generate-tool-call/jsonToolCallPrompt.d.ts +5 -0
  78. package/tool/generate-tool-call/jsonToolCallPrompt.js +27 -0
  79. /package/{model-provider/llamacpp/LlamaCppTextGenerationModel.test.d.ts → model-function/generate-text/prompt-template/MistralInstructPromptTemplate.test.d.ts} +0 -0
  80. /package/model-provider/mistral/{MistralPromptTemplate.cjs → MistralChatPromptTemplate.cjs} +0 -0
  81. /package/model-provider/mistral/{MistralPromptTemplate.d.ts → MistralChatPromptTemplate.d.ts} +0 -0
  82. /package/model-provider/mistral/{MistralPromptTemplate.js → MistralChatPromptTemplate.js} +0 -0
@@ -8,7 +8,7 @@ import { PromptTemplateTextStreamingModel } from "../../model-function/generate-
8
8
  import { TextGenerationModelSettings, TextStreamingModel } from "../../model-function/generate-text/TextGenerationModel.js";
9
9
  import { TextGenerationPromptTemplate } from "../../model-function/generate-text/TextGenerationPromptTemplate.js";
10
10
  import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
11
- export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends TextGenerationModelSettings {
11
+ export interface LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends TextGenerationModelSettings {
12
12
  api?: ApiConfiguration;
13
13
  /**
14
14
  * Specify the context window size of the model that you have loaded in your
@@ -16,26 +16,113 @@ export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends
16
16
  */
17
17
  contextWindowSize?: CONTEXT_WINDOW_SIZE;
18
18
  /**
19
- * Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
19
+ * Adjust the randomness of the generated text (default: 0.8).
20
20
  */
21
- cachePrompt?: boolean;
22
21
  temperature?: number;
22
+ /**
23
+ * Limit the next token selection to the K most probable tokens (default: 40).
24
+ */
23
25
  topK?: number;
26
+ /**
27
+ * Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
28
+ */
24
29
  topP?: number;
30
+ /**
31
+ * The minimum probability for a token to be considered, relative to the probability of the most likely token (default: 0.05).
32
+ */
33
+ minP?: number;
34
+ /**
35
+ * Specify the number of tokens from the prompt to retain when the context size is exceeded
36
+ * and tokens need to be discarded. By default, this value is set to 0 (meaning no tokens
37
+ * are kept). Use -1 to retain all tokens from the prompt.
38
+ */
25
39
  nKeep?: number;
40
+ /**
41
+ * Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
42
+ */
26
43
  tfsZ?: number;
44
+ /**
45
+ * Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled).
46
+ */
27
47
  typicalP?: number;
48
+ /**
49
+ * Control the repetition of token sequences in the generated text (default: 1.1).
50
+ */
28
51
  repeatPenalty?: number;
52
+ /**
53
+ * Last n tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size).
54
+ */
29
55
  repeatLastN?: number;
56
+ /**
57
+ * Penalize newline tokens when applying the repeat penalty (default: true).
58
+ */
30
59
  penalizeNl?: boolean;
60
+ /**
61
+ * Repeat alpha presence penalty (default: 0.0, 0.0 = disabled).
62
+ */
63
+ presencePenalty?: number;
64
+ /**
65
+ * Repeat alpha frequency penalty (default: 0.0, 0.0 = disabled).
66
+ */
67
+ frequencyPenalty?: number;
68
+ /**
69
+ * This will replace the prompt for the purpose of the penalty evaluation.
70
+ * Can be either null, a string or an array of numbers representing tokens
71
+ * (default: null = use the original prompt).
72
+ */
73
+ penaltyPrompt?: string | number[];
74
+ /**
75
+ * Enable Mirostat sampling, controlling perplexity during text generation
76
+ * (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0).
77
+ */
31
78
  mirostat?: number;
79
+ /**
80
+ * Set the Mirostat target entropy, parameter tau (default: 5.0).
81
+ */
32
82
  mirostatTau?: number;
83
+ /**
84
+ * Set the Mirostat learning rate, parameter eta (default: 0.1).
85
+ */
33
86
  mirostatEta?: number;
87
+ /**
88
+ * Set grammar for grammar-based sampling (default: no grammar)
89
+ *
90
+ * @see https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md
91
+ */
92
+ grammar?: string;
93
+ /**
94
+ * Set the random number generator (RNG) seed
95
+ * (default: -1, -1 = random seed).
96
+ */
34
97
  seed?: number;
98
+ /**
99
+ * Ignore end of stream token and continue generating (default: false).
100
+ */
35
101
  ignoreEos?: boolean;
102
+ /**
103
+ * Modify the likelihood of a token appearing in the generated text completion.
104
+ * For example, use "logit_bias": [[15043,1.0]] to increase the likelihood of the token
105
+ * 'Hello', or "logit_bias": [[15043,-1.0]] to decrease its likelihood.
106
+ * Setting the value to false, "logit_bias": [[15043,false]] ensures that the token Hello is
107
+ * never produced (default: []).
108
+ */
36
109
  logitBias?: Array<[number, number | false]>;
110
+ /**
111
+ * If greater than 0, the response also contains the probabilities of top N tokens
112
+ * for each generated token (default: 0)
113
+ */
114
+ nProbs?: number;
115
+ /**
116
+ * Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
117
+ */
118
+ cachePrompt?: boolean;
119
+ /**
120
+ * Assign the completion task to an specific slot.
121
+ * If is -1 the task will be assigned to a Idle slot (default: -1)
122
+ */
123
+ slotId?: number;
37
124
  }
38
- export interface LlamaCppTextGenerationPrompt {
125
+ export interface LlamaCppCompletionPrompt {
39
126
  /**
40
127
  * Text prompt. Images can be included through references such as `[img-ID]`, e.g. `[img-1]`.
41
128
  */
@@ -45,18 +132,18 @@ export interface LlamaCppTextGenerationPrompt {
45
132
  */
46
133
  images?: Record<number, string>;
47
134
  }
48
- export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends number | undefined> extends AbstractModel<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> implements TextStreamingModel<LlamaCppTextGenerationPrompt, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> {
49
- constructor(settings?: LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>);
135
+ export declare class LlamaCppCompletionModel<CONTEXT_WINDOW_SIZE extends number | undefined> extends AbstractModel<LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>> implements TextStreamingModel<LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>> {
136
+ constructor(settings?: LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>);
50
137
  readonly provider = "llamacpp";
51
138
  get modelName(): null;
52
139
  get contextWindowSize(): CONTEXT_WINDOW_SIZE;
53
140
  readonly tokenizer: LlamaCppTokenizer;
54
- callAPI<RESPONSE>(prompt: LlamaCppTextGenerationPrompt, options: {
55
- responseFormat: LlamaCppTextGenerationResponseFormatType<RESPONSE>;
141
+ callAPI<RESPONSE>(prompt: LlamaCppCompletionPrompt, options: {
142
+ responseFormat: LlamaCppCompletionResponseFormatType<RESPONSE>;
56
143
  } & FunctionOptions): Promise<RESPONSE>;
57
- get settingsForEvent(): Partial<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>;
58
- countPromptTokens(prompt: LlamaCppTextGenerationPrompt): Promise<number>;
59
- doGenerateTexts(prompt: LlamaCppTextGenerationPrompt, options?: FunctionOptions): Promise<{
144
+ get settingsForEvent(): Partial<LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>>;
145
+ countPromptTokens(prompt: LlamaCppCompletionPrompt): Promise<number>;
146
+ doGenerateTexts(prompt: LlamaCppCompletionPrompt, options?: FunctionOptions): Promise<{
60
147
  response: {
61
148
  model: string;
62
149
  stop: true;
@@ -81,11 +168,11 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
81
168
  presence_penalty: number;
82
169
  repeat_last_n: number;
83
170
  repeat_penalty: number;
84
- temp: number;
85
171
  tfs_z: number;
86
172
  top_k: number;
87
173
  top_p: number;
88
174
  typical_p: number;
175
+ temperature?: number | undefined;
89
176
  };
90
177
  stopped_eos: boolean;
91
178
  stopped_limit: boolean;
@@ -96,10 +183,10 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
96
183
  predicted_n: number;
97
184
  predicted_per_second: number | null;
98
185
  predicted_per_token_ms: number | null;
99
- prompt_ms: number | null;
100
186
  prompt_n: number;
101
187
  prompt_per_second: number | null;
102
188
  prompt_per_token_ms: number | null;
189
+ prompt_ms?: number | null | undefined;
103
190
  };
104
191
  tokens_cached: number;
105
192
  tokens_evaluated: number;
@@ -116,7 +203,7 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
116
203
  totalTokens: number;
117
204
  };
118
205
  }>;
119
- doStreamText(prompt: LlamaCppTextGenerationPrompt, options?: FunctionOptions): Promise<AsyncIterable<Delta<{
206
+ doStreamText(prompt: LlamaCppCompletionPrompt, options?: FunctionOptions): Promise<AsyncIterable<Delta<{
120
207
  model: string;
121
208
  stop: true;
122
209
  content: string;
@@ -140,11 +227,11 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
140
227
  presence_penalty: number;
141
228
  repeat_last_n: number;
142
229
  repeat_penalty: number;
143
- temp: number;
144
230
  tfs_z: number;
145
231
  top_k: number;
146
232
  top_p: number;
147
233
  typical_p: number;
234
+ temperature?: number | undefined;
148
235
  };
149
236
  stopped_eos: boolean;
150
237
  stopped_limit: boolean;
@@ -155,10 +242,10 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
155
242
  predicted_n: number;
156
243
  predicted_per_second: number | null;
157
244
  predicted_per_token_ms: number | null;
158
- prompt_ms: number | null;
159
245
  prompt_n: number;
160
246
  prompt_per_second: number | null;
161
247
  prompt_per_token_ms: number | null;
248
+ prompt_ms?: number | null | undefined;
162
249
  };
163
250
  tokens_cached: number;
164
251
  tokens_evaluated: number;
@@ -169,16 +256,16 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
169
256
  content: string;
170
257
  }>>>;
171
258
  extractTextDelta(delta: unknown): string;
172
- withTextPrompt(): PromptTemplateTextStreamingModel<string, LlamaCppTextGenerationPrompt, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>;
259
+ withTextPrompt(): PromptTemplateTextStreamingModel<string, LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>;
173
260
  /**
174
261
  * Maps the prompt for a text version of the Llama.cpp prompt template (without image support).
175
262
  */
176
- withTextPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, string>): PromptTemplateTextStreamingModel<INPUT_PROMPT, string, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, PromptTemplateTextStreamingModel<string, LlamaCppTextGenerationPrompt, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>>;
263
+ withTextPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, string>): PromptTemplateTextStreamingModel<INPUT_PROMPT, string, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, PromptTemplateTextStreamingModel<string, LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>>;
177
264
  /**
178
265
  * Maps the prompt for the full Llama.cpp prompt template (incl. image support).
179
266
  */
180
- withPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, LlamaCppTextGenerationPrompt>): PromptTemplateTextStreamingModel<INPUT_PROMPT, LlamaCppTextGenerationPrompt, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>;
181
- withSettings(additionalSettings: Partial<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): this;
267
+ withPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, LlamaCppCompletionPrompt>): PromptTemplateTextStreamingModel<INPUT_PROMPT, LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>;
268
+ withSettings(additionalSettings: Partial<LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>>): this;
182
269
  }
183
270
  declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
184
271
  content: z.ZodString;
@@ -202,7 +289,7 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
202
289
  seed: z.ZodNumber;
203
290
  stop: z.ZodArray<z.ZodString, "many">;
204
291
  stream: z.ZodBoolean;
205
- temp: z.ZodNumber;
292
+ temperature: z.ZodOptional<z.ZodNumber>;
206
293
  tfs_z: z.ZodNumber;
207
294
  top_k: z.ZodNumber;
208
295
  top_p: z.ZodNumber;
@@ -226,11 +313,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
226
313
  presence_penalty: number;
227
314
  repeat_last_n: number;
228
315
  repeat_penalty: number;
229
- temp: number;
230
316
  tfs_z: number;
231
317
  top_k: number;
232
318
  top_p: number;
233
319
  typical_p: number;
320
+ temperature?: number | undefined;
234
321
  }, {
235
322
  model: string;
236
323
  stream: boolean;
@@ -250,11 +337,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
250
337
  presence_penalty: number;
251
338
  repeat_last_n: number;
252
339
  repeat_penalty: number;
253
- temp: number;
254
340
  tfs_z: number;
255
341
  top_k: number;
256
342
  top_p: number;
257
343
  typical_p: number;
344
+ temperature?: number | undefined;
258
345
  }>;
259
346
  model: z.ZodString;
260
347
  prompt: z.ZodString;
@@ -267,7 +354,7 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
267
354
  predicted_n: z.ZodNumber;
268
355
  predicted_per_second: z.ZodNullable<z.ZodNumber>;
269
356
  predicted_per_token_ms: z.ZodNullable<z.ZodNumber>;
270
- prompt_ms: z.ZodNullable<z.ZodNumber>;
357
+ prompt_ms: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
271
358
  prompt_n: z.ZodNumber;
272
359
  prompt_per_second: z.ZodNullable<z.ZodNumber>;
273
360
  prompt_per_token_ms: z.ZodNullable<z.ZodNumber>;
@@ -276,19 +363,19 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
276
363
  predicted_n: number;
277
364
  predicted_per_second: number | null;
278
365
  predicted_per_token_ms: number | null;
279
- prompt_ms: number | null;
280
366
  prompt_n: number;
281
367
  prompt_per_second: number | null;
282
368
  prompt_per_token_ms: number | null;
369
+ prompt_ms?: number | null | undefined;
283
370
  }, {
284
371
  predicted_ms: number;
285
372
  predicted_n: number;
286
373
  predicted_per_second: number | null;
287
374
  predicted_per_token_ms: number | null;
288
- prompt_ms: number | null;
289
375
  prompt_n: number;
290
376
  prompt_per_second: number | null;
291
377
  prompt_per_token_ms: number | null;
378
+ prompt_ms?: number | null | undefined;
292
379
  }>;
293
380
  tokens_cached: z.ZodNumber;
294
381
  tokens_evaluated: z.ZodNumber;
@@ -318,11 +405,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
318
405
  presence_penalty: number;
319
406
  repeat_last_n: number;
320
407
  repeat_penalty: number;
321
- temp: number;
322
408
  tfs_z: number;
323
409
  top_k: number;
324
410
  top_p: number;
325
411
  typical_p: number;
412
+ temperature?: number | undefined;
326
413
  };
327
414
  stopped_eos: boolean;
328
415
  stopped_limit: boolean;
@@ -333,10 +420,10 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
333
420
  predicted_n: number;
334
421
  predicted_per_second: number | null;
335
422
  predicted_per_token_ms: number | null;
336
- prompt_ms: number | null;
337
423
  prompt_n: number;
338
424
  prompt_per_second: number | null;
339
425
  prompt_per_token_ms: number | null;
426
+ prompt_ms?: number | null | undefined;
340
427
  };
341
428
  tokens_cached: number;
342
429
  tokens_evaluated: number;
@@ -366,11 +453,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
366
453
  presence_penalty: number;
367
454
  repeat_last_n: number;
368
455
  repeat_penalty: number;
369
- temp: number;
370
456
  tfs_z: number;
371
457
  top_k: number;
372
458
  top_p: number;
373
459
  typical_p: number;
460
+ temperature?: number | undefined;
374
461
  };
375
462
  stopped_eos: boolean;
376
463
  stopped_limit: boolean;
@@ -381,10 +468,10 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
381
468
  predicted_n: number;
382
469
  predicted_per_second: number | null;
383
470
  predicted_per_token_ms: number | null;
384
- prompt_ms: number | null;
385
471
  prompt_n: number;
386
472
  prompt_per_second: number | null;
387
473
  prompt_per_token_ms: number | null;
474
+ prompt_ms?: number | null | undefined;
388
475
  };
389
476
  tokens_cached: number;
390
477
  tokens_evaluated: number;
@@ -416,11 +503,11 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
416
503
  presence_penalty: number;
417
504
  repeat_last_n: number;
418
505
  repeat_penalty: number;
419
- temp: number;
420
506
  tfs_z: number;
421
507
  top_k: number;
422
508
  top_p: number;
423
509
  typical_p: number;
510
+ temperature?: number | undefined;
424
511
  };
425
512
  stopped_eos: boolean;
426
513
  stopped_limit: boolean;
@@ -431,10 +518,10 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
431
518
  predicted_n: number;
432
519
  predicted_per_second: number | null;
433
520
  predicted_per_token_ms: number | null;
434
- prompt_ms: number | null;
435
521
  prompt_n: number;
436
522
  prompt_per_second: number | null;
437
523
  prompt_per_token_ms: number | null;
524
+ prompt_ms?: number | null | undefined;
438
525
  };
439
526
  tokens_cached: number;
440
527
  tokens_evaluated: number;
@@ -445,11 +532,11 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
445
532
  content: string;
446
533
  }>;
447
534
  export type LlamaCppTextStreamChunk = (typeof llamaCppTextStreamChunkSchema)["_type"];
448
- export type LlamaCppTextGenerationResponseFormatType<T> = {
535
+ export type LlamaCppCompletionResponseFormatType<T> = {
449
536
  stream: boolean;
450
537
  handler: ResponseHandler<T>;
451
538
  };
452
- export declare const LlamaCppTextGenerationResponseFormat: {
539
+ export declare const LlamaCppCompletionResponseFormat: {
453
540
  /**
454
541
  * Returns the response as a JSON object.
455
542
  */
@@ -479,11 +566,11 @@ export declare const LlamaCppTextGenerationResponseFormat: {
479
566
  presence_penalty: number;
480
567
  repeat_last_n: number;
481
568
  repeat_penalty: number;
482
- temp: number;
483
569
  tfs_z: number;
484
570
  top_k: number;
485
571
  top_p: number;
486
572
  typical_p: number;
573
+ temperature?: number | undefined;
487
574
  };
488
575
  stopped_eos: boolean;
489
576
  stopped_limit: boolean;
@@ -494,10 +581,10 @@ export declare const LlamaCppTextGenerationResponseFormat: {
494
581
  predicted_n: number;
495
582
  predicted_per_second: number | null;
496
583
  predicted_per_token_ms: number | null;
497
- prompt_ms: number | null;
498
584
  prompt_n: number;
499
585
  prompt_per_second: number | null;
500
586
  prompt_per_token_ms: number | null;
587
+ prompt_ms?: number | null | undefined;
501
588
  };
502
589
  tokens_cached: number;
503
590
  tokens_evaluated: number;
@@ -537,11 +624,11 @@ export declare const LlamaCppTextGenerationResponseFormat: {
537
624
  presence_penalty: number;
538
625
  repeat_last_n: number;
539
626
  repeat_penalty: number;
540
- temp: number;
541
627
  tfs_z: number;
542
628
  top_k: number;
543
629
  top_p: number;
544
630
  typical_p: number;
631
+ temperature?: number | undefined;
545
632
  };
546
633
  stopped_eos: boolean;
547
634
  stopped_limit: boolean;
@@ -552,10 +639,10 @@ export declare const LlamaCppTextGenerationResponseFormat: {
552
639
  predicted_n: number;
553
640
  predicted_per_second: number | null;
554
641
  predicted_per_token_ms: number | null;
555
- prompt_ms: number | null;
556
642
  prompt_n: number;
557
643
  prompt_per_second: number | null;
558
644
  prompt_per_token_ms: number | null;
645
+ prompt_ms?: number | null | undefined;
559
646
  };
560
647
  tokens_cached: number;
561
648
  tokens_evaluated: number;
@@ -11,7 +11,7 @@ import { parseEventSourceStream } from "../../util/streaming/parseEventSourceStr
11
11
  import { LlamaCppApiConfiguration } from "./LlamaCppApiConfiguration.js";
12
12
  import { failedLlamaCppCallResponseHandler } from "./LlamaCppError.js";
13
13
  import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
14
- export class LlamaCppTextGenerationModel extends AbstractModel {
14
+ export class LlamaCppCompletionModel extends AbstractModel {
15
15
  constructor(settings = {}) {
16
16
  super({ settings });
17
17
  Object.defineProperty(this, "provider", {
@@ -53,10 +53,10 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
53
53
  data,
54
54
  }))
55
55
  : undefined,
56
- cache_prompt: this.settings.cachePrompt,
57
56
  temperature: this.settings.temperature,
58
57
  top_k: this.settings.topK,
59
58
  top_p: this.settings.topP,
59
+ min_p: this.settings.minP,
60
60
  n_predict: this.settings.maxGenerationTokens,
61
61
  n_keep: this.settings.nKeep,
62
62
  stop: this.settings.stopSequences,
@@ -65,12 +65,19 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
65
65
  repeat_penalty: this.settings.repeatPenalty,
66
66
  repeat_last_n: this.settings.repeatLastN,
67
67
  penalize_nl: this.settings.penalizeNl,
68
+ presence_penalty: this.settings.presencePenalty,
69
+ frequency_penalty: this.settings.frequencyPenalty,
70
+ penalty_prompt: this.settings.penaltyPrompt,
68
71
  mirostat: this.settings.mirostat,
69
72
  mirostat_tau: this.settings.mirostatTau,
70
73
  mirostat_eta: this.settings.mirostatEta,
74
+ grammar: this.settings.grammar,
71
75
  seed: this.settings.seed,
72
76
  ignore_eos: this.settings.ignoreEos,
73
77
  logit_bias: this.settings.logitBias,
78
+ n_probs: this.settings.nProbs,
79
+ cache_prompt: this.settings.cachePrompt,
80
+ slot_id: this.settings.slotId,
74
81
  },
75
82
  failedResponseHandler: failedLlamaCppCallResponseHandler,
76
83
  successfulResponseHandler: responseFormat.handler,
@@ -82,22 +89,29 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
82
89
  const eventSettingProperties = [
83
90
  ...textGenerationModelProperties,
84
91
  "contextWindowSize",
85
- "cachePrompt",
86
92
  "temperature",
87
93
  "topK",
88
94
  "topP",
95
+ "minP",
89
96
  "nKeep",
90
97
  "tfsZ",
91
98
  "typicalP",
92
99
  "repeatPenalty",
93
100
  "repeatLastN",
94
101
  "penalizeNl",
102
+ "presencePenalty",
103
+ "frequencyPenalty",
104
+ "penaltyPrompt",
95
105
  "mirostat",
96
106
  "mirostatTau",
97
107
  "mirostatEta",
108
+ "grammar",
98
109
  "seed",
99
110
  "ignoreEos",
100
111
  "logitBias",
112
+ "nProbs",
113
+ "cachePrompt",
114
+ "slotId",
101
115
  ];
102
116
  return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
103
117
  }
@@ -108,7 +122,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
108
122
  async doGenerateTexts(prompt, options) {
109
123
  const response = await this.callAPI(prompt, {
110
124
  ...options,
111
- responseFormat: LlamaCppTextGenerationResponseFormat.json,
125
+ responseFormat: LlamaCppCompletionResponseFormat.json,
112
126
  });
113
127
  return {
114
128
  response,
@@ -132,7 +146,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
132
146
  doStreamText(prompt, options) {
133
147
  return this.callAPI(prompt, {
134
148
  ...options,
135
- responseFormat: LlamaCppTextGenerationResponseFormat.deltaIterable,
149
+ responseFormat: LlamaCppCompletionResponseFormat.deltaIterable,
136
150
  });
137
151
  }
138
152
  extractTextDelta(delta) {
@@ -175,7 +189,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
175
189
  });
176
190
  }
177
191
  withSettings(additionalSettings) {
178
- return new LlamaCppTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
192
+ return new LlamaCppCompletionModel(Object.assign({}, this.settings, additionalSettings));
179
193
  }
180
194
  }
181
195
  const llamaCppTextGenerationResponseSchema = z.object({
@@ -200,7 +214,7 @@ const llamaCppTextGenerationResponseSchema = z.object({
200
214
  seed: z.number(),
201
215
  stop: z.array(z.string()),
202
216
  stream: z.boolean(),
203
- temp: z.number(),
217
+ temperature: z.number().optional(), // optional for backwards compatibility
204
218
  tfs_z: z.number(),
205
219
  top_k: z.number(),
206
220
  top_p: z.number(),
@@ -217,7 +231,7 @@ const llamaCppTextGenerationResponseSchema = z.object({
217
231
  predicted_n: z.number(),
218
232
  predicted_per_second: z.number().nullable(),
219
233
  predicted_per_token_ms: z.number().nullable(),
220
- prompt_ms: z.number().nullable(),
234
+ prompt_ms: z.number().nullable().optional(),
221
235
  prompt_n: z.number(),
222
236
  prompt_per_second: z.number().nullable(),
223
237
  prompt_per_token_ms: z.number().nullable(),
@@ -263,7 +277,7 @@ async function createLlamaCppFullDeltaIterableQueue(stream) {
263
277
  });
264
278
  return queue;
265
279
  }
266
- export const LlamaCppTextGenerationResponseFormat = {
280
+ export const LlamaCppCompletionResponseFormat = {
267
281
  /**
268
282
  * Returns the response as a JSON object.
269
283
  */
@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const streamText_js_1 = require("../../model-function/generate-text/streamText.cjs");
4
4
  const StreamingTestServer_js_1 = require("../../test/StreamingTestServer.cjs");
5
5
  const arrayFromAsync_js_1 = require("../../test/arrayFromAsync.cjs");
6
- const LlamaCppTextGenerationModel_js_1 = require("./LlamaCppTextGenerationModel.cjs");
6
+ const LlamaCppCompletionModel_js_1 = require("./LlamaCppCompletionModel.cjs");
7
7
  describe("streamText", () => {
8
8
  const server = new StreamingTestServer_js_1.StreamingTestServer("http://127.0.0.1:8080/completion");
9
9
  server.setupTestEnvironment();
@@ -17,7 +17,7 @@ describe("streamText", () => {
17
17
  `"mirostat_eta":0.10000000149011612,"mirostat_tau":5.0,"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
18
18
  `"n_ctx":4096,"n_keep":0,"n_predict":-1,"n_probs":0,"penalize_nl":true,"penalty_prompt_tokens":[],` +
19
19
  `"presence_penalty":0.0,"repeat_last_n":64,"repeat_penalty":1.100000023841858,"seed":4294967295,` +
20
- `"stop":[],"stream":true,"temp":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
20
+ `"stop":[],"stream":true,"temperature":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
21
21
  `"typical_p":1.0,"use_penalty_prompt_tokens":false},"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
22
22
  `"prompt":"hello","slot_id":0,"stop":true,"stopped_eos":true,"stopped_limit":false,` +
23
23
  `"stopped_word":false,"stopping_word":"","timings":{"predicted_ms":1054.704,"predicted_n":69,` +
@@ -26,7 +26,7 @@ describe("streamText", () => {
26
26
  `"prompt_per_token_ms":48.845600000000005},"tokens_cached":74,"tokens_evaluated":5,` +
27
27
  `"tokens_predicted":69,"truncated":false}\n\n`,
28
28
  ];
29
- const stream = await (0, streamText_js_1.streamText)(new LlamaCppTextGenerationModel_js_1.LlamaCppTextGenerationModel().withTextPrompt(), "hello");
29
+ const stream = await (0, streamText_js_1.streamText)(new LlamaCppCompletionModel_js_1.LlamaCppCompletionModel().withTextPrompt(), "hello");
30
30
  // note: space moved to last chunk bc of trimming
31
31
  expect(await (0, arrayFromAsync_js_1.arrayFromAsync)(stream)).toStrictEqual([
32
32
  "Hello",
@@ -1,7 +1,7 @@
1
1
  import { streamText } from "../../model-function/generate-text/streamText.js";
2
2
  import { StreamingTestServer } from "../../test/StreamingTestServer.js";
3
3
  import { arrayFromAsync } from "../../test/arrayFromAsync.js";
4
- import { LlamaCppTextGenerationModel } from "./LlamaCppTextGenerationModel.js";
4
+ import { LlamaCppCompletionModel } from "./LlamaCppCompletionModel.js";
5
5
  describe("streamText", () => {
6
6
  const server = new StreamingTestServer("http://127.0.0.1:8080/completion");
7
7
  server.setupTestEnvironment();
@@ -15,7 +15,7 @@ describe("streamText", () => {
15
15
  `"mirostat_eta":0.10000000149011612,"mirostat_tau":5.0,"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
16
16
  `"n_ctx":4096,"n_keep":0,"n_predict":-1,"n_probs":0,"penalize_nl":true,"penalty_prompt_tokens":[],` +
17
17
  `"presence_penalty":0.0,"repeat_last_n":64,"repeat_penalty":1.100000023841858,"seed":4294967295,` +
18
- `"stop":[],"stream":true,"temp":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
18
+ `"stop":[],"stream":true,"temperature":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
19
19
  `"typical_p":1.0,"use_penalty_prompt_tokens":false},"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
20
20
  `"prompt":"hello","slot_id":0,"stop":true,"stopped_eos":true,"stopped_limit":false,` +
21
21
  `"stopped_word":false,"stopping_word":"","timings":{"predicted_ms":1054.704,"predicted_n":69,` +
@@ -24,7 +24,7 @@ describe("streamText", () => {
24
24
  `"prompt_per_token_ms":48.845600000000005},"tokens_cached":74,"tokens_evaluated":5,` +
25
25
  `"tokens_predicted":69,"truncated":false}\n\n`,
26
26
  ];
27
- const stream = await streamText(new LlamaCppTextGenerationModel().withTextPrompt(), "hello");
27
+ const stream = await streamText(new LlamaCppCompletionModel().withTextPrompt(), "hello");
28
28
  // note: space moved to last chunk bc of trimming
29
29
  expect(await arrayFromAsync(stream)).toStrictEqual([
30
30
  "Hello",
@@ -3,10 +3,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.Tokenizer = exports.TextEmbedder = exports.TextGenerator = void 0;
4
4
  const LlamaCppApiConfiguration_js_1 = require("./LlamaCppApiConfiguration.cjs");
5
5
  const LlamaCppTextEmbeddingModel_js_1 = require("./LlamaCppTextEmbeddingModel.cjs");
6
- const LlamaCppTextGenerationModel_js_1 = require("./LlamaCppTextGenerationModel.cjs");
6
+ const LlamaCppCompletionModel_js_1 = require("./LlamaCppCompletionModel.cjs");
7
7
  const LlamaCppTokenizer_js_1 = require("./LlamaCppTokenizer.cjs");
8
8
  function TextGenerator(settings = {}) {
9
- return new LlamaCppTextGenerationModel_js_1.LlamaCppTextGenerationModel(settings);
9
+ return new LlamaCppCompletionModel_js_1.LlamaCppCompletionModel(settings);
10
10
  }
11
11
  exports.TextGenerator = TextGenerator;
12
12
  function TextEmbedder(settings = {}) {
@@ -1,7 +1,7 @@
1
1
  import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
2
2
  import { LlamaCppTextEmbeddingModel, LlamaCppTextEmbeddingModelSettings } from "./LlamaCppTextEmbeddingModel.js";
3
- import { LlamaCppTextGenerationModel, LlamaCppTextGenerationModelSettings } from "./LlamaCppTextGenerationModel.js";
3
+ import { LlamaCppCompletionModel, LlamaCppCompletionModelSettings } from "./LlamaCppCompletionModel.js";
4
4
  import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
5
- export declare function TextGenerator<CONTEXT_WINDOW_SIZE extends number>(settings?: LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>): LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE>;
5
+ export declare function TextGenerator<CONTEXT_WINDOW_SIZE extends number>(settings?: LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>): LlamaCppCompletionModel<CONTEXT_WINDOW_SIZE>;
6
6
  export declare function TextEmbedder(settings?: LlamaCppTextEmbeddingModelSettings): LlamaCppTextEmbeddingModel;
7
7
  export declare function Tokenizer(api?: ApiConfiguration): LlamaCppTokenizer;
@@ -1,9 +1,9 @@
1
1
  import { LlamaCppApiConfiguration } from "./LlamaCppApiConfiguration.js";
2
2
  import { LlamaCppTextEmbeddingModel, } from "./LlamaCppTextEmbeddingModel.js";
3
- import { LlamaCppTextGenerationModel, } from "./LlamaCppTextGenerationModel.js";
3
+ import { LlamaCppCompletionModel, } from "./LlamaCppCompletionModel.js";
4
4
  import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
5
5
  export function TextGenerator(settings = {}) {
6
- return new LlamaCppTextGenerationModel(settings);
6
+ return new LlamaCppCompletionModel(settings);
7
7
  }
8
8
  export function TextEmbedder(settings = {}) {
9
9
  return new LlamaCppTextEmbeddingModel(settings);
@@ -33,5 +33,5 @@ var LlamaCppError_js_1 = require("./LlamaCppError.cjs");
33
33
  Object.defineProperty(exports, "LlamaCppError", { enumerable: true, get: function () { return LlamaCppError_js_1.LlamaCppError; } });
34
34
  exports.llamacpp = __importStar(require("./LlamaCppFacade.cjs"));
35
35
  __exportStar(require("./LlamaCppTextEmbeddingModel.cjs"), exports);
36
- __exportStar(require("./LlamaCppTextGenerationModel.cjs"), exports);
36
+ __exportStar(require("./LlamaCppCompletionModel.cjs"), exports);
37
37
  __exportStar(require("./LlamaCppTokenizer.cjs"), exports);