modelfusion 0.107.0 → 0.109.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/README.md +11 -10
- package/model-provider/llamacpp/LlamaCppCompletionModel.cjs +17 -3
- package/model-provider/llamacpp/LlamaCppCompletionModel.d.ts +99 -12
- package/model-provider/llamacpp/LlamaCppCompletionModel.js +17 -3
- package/model-provider/openai/AbstractOpenAIChatModel.cjs +2 -10
- package/model-provider/openai/AbstractOpenAIChatModel.d.ts +5 -187
- package/model-provider/openai/AbstractOpenAIChatModel.js +2 -10
- package/model-provider/openai/AbstractOpenAICompletionModel.cjs +167 -0
- package/model-provider/openai/AbstractOpenAICompletionModel.d.ts +199 -0
- package/model-provider/openai/AbstractOpenAICompletionModel.js +163 -0
- package/model-provider/openai/OpenAIChatFunctionCallStructureGenerationModel.d.ts +0 -2
- package/model-provider/openai/OpenAIChatModel.d.ts +3 -6
- package/model-provider/openai/OpenAICompletionModel.cjs +4 -156
- package/model-provider/openai/OpenAICompletionModel.d.ts +4 -191
- package/model-provider/openai/OpenAICompletionModel.js +3 -155
- package/model-provider/openai/index.cjs +1 -0
- package/model-provider/openai/index.d.ts +1 -0
- package/model-provider/openai/index.js +1 -0
- package/model-provider/openai-compatible/OpenAICompatibleChatModel.d.ts +4 -5
- package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.cjs +74 -0
- package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.d.ts +27 -0
- package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.js +70 -0
- package/model-provider/openai-compatible/OpenAICompatibleFacade.cjs +37 -6
- package/model-provider/openai-compatible/OpenAICompatibleFacade.d.ts +33 -5
- package/model-provider/openai-compatible/OpenAICompatibleFacade.js +35 -5
- package/model-provider/openai-compatible/OpenAICompatibleProviderName.cjs +2 -0
- package/model-provider/openai-compatible/OpenAICompatibleProviderName.d.ts +1 -0
- package/model-provider/openai-compatible/OpenAICompatibleProviderName.js +1 -0
- package/model-provider/openai-compatible/TogetherAIApiConfiguration.cjs +29 -0
- package/model-provider/openai-compatible/TogetherAIApiConfiguration.d.ts +18 -0
- package/model-provider/openai-compatible/TogetherAIApiConfiguration.js +25 -0
- package/model-provider/openai-compatible/index.cjs +4 -1
- package/model-provider/openai-compatible/index.d.ts +4 -1
- package/model-provider/openai-compatible/index.js +4 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
@@ -1,5 +1,49 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## v0.109.0 - 2023-12-30
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- [Open AI compatible completion model](https://modelfusion.dev/integration/model-provider/openaicompatible/). It e.g. works with Fireworks AI.
|
8
|
+
- Together AI API configuration (for Open AI compatible chat models):
|
9
|
+
|
10
|
+
```ts
|
11
|
+
import {
|
12
|
+
TogetherAIApiConfiguration,
|
13
|
+
openaicompatible,
|
14
|
+
streamText,
|
15
|
+
} from "modelfusion";
|
16
|
+
|
17
|
+
const textStream = await streamText(
|
18
|
+
openaicompatible
|
19
|
+
.ChatTextGenerator({
|
20
|
+
api: new TogetherAIApiConfiguration(),
|
21
|
+
model: "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
22
|
+
})
|
23
|
+
.withTextPrompt(),
|
24
|
+
|
25
|
+
"Write a story about a robot learning to love"
|
26
|
+
);
|
27
|
+
```
|
28
|
+
|
29
|
+
- Updated Llama.cpp model settings. GBNF grammars can be passed into the `grammar` setting:
|
30
|
+
|
31
|
+
```ts
|
32
|
+
const text = await generateText(
|
33
|
+
llamacpp
|
34
|
+
.TextGenerator({
|
35
|
+
maxGenerationTokens: 512,
|
36
|
+
temperature: 0,
|
37
|
+
// simple list grammar:
|
38
|
+
grammar: `root ::= ("- " item)+
|
39
|
+
item ::= [^\\n]+ "\\n"`,
|
40
|
+
})
|
41
|
+
.withTextPromptTemplate(MistralInstructPrompt.text()),
|
42
|
+
|
43
|
+
"List 5 ingredients for a lasagna:\n\n"
|
44
|
+
);
|
45
|
+
```
|
46
|
+
|
3
47
|
## v0.107.0 - 2023-12-29
|
4
48
|
|
5
49
|
### Added
|
package/README.md
CHANGED
@@ -538,16 +538,17 @@ const textStream = await streamText(
|
|
538
538
|
);
|
539
539
|
```
|
540
540
|
|
541
|
-
| Prompt Template
|
542
|
-
|
|
543
|
-
| OpenAI Chat
|
544
|
-
| Anthropic
|
545
|
-
| Llama 2
|
546
|
-
| ChatML
|
547
|
-
| NeuralChat
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
541
|
+
| Prompt Template | Text Prompt | Instruction Prompt | Chat Prompt |
|
542
|
+
| ---------------- | ----------- | ------------------ | ----------- |
|
543
|
+
| OpenAI Chat | ✅ | ✅ | ✅ |
|
544
|
+
| Anthropic | ✅ | ✅ | ✅ |
|
545
|
+
| Llama 2 | ✅ | ✅ | ✅ |
|
546
|
+
| ChatML | ✅ | ✅ | ✅ |
|
547
|
+
| NeuralChat | ✅ | ✅ | ✅ |
|
548
|
+
| Mistral Instruct | ✅ | ✅ | ✅ |
|
549
|
+
| Alpaca | ✅ | ✅ | ❌ |
|
550
|
+
| Vicuna | ❌ | ❌ | ✅ |
|
551
|
+
| Generic Text | ✅ | ✅ | ✅ |
|
551
552
|
|
552
553
|
### [Image Generation Prompt Templates](https://modelfusion.dev/guide/function/generate-image/prompt-format)
|
553
554
|
|
@@ -56,10 +56,10 @@ class LlamaCppCompletionModel extends AbstractModel_js_1.AbstractModel {
|
|
56
56
|
data,
|
57
57
|
}))
|
58
58
|
: undefined,
|
59
|
-
cache_prompt: this.settings.cachePrompt,
|
60
59
|
temperature: this.settings.temperature,
|
61
60
|
top_k: this.settings.topK,
|
62
61
|
top_p: this.settings.topP,
|
62
|
+
min_p: this.settings.minP,
|
63
63
|
n_predict: this.settings.maxGenerationTokens,
|
64
64
|
n_keep: this.settings.nKeep,
|
65
65
|
stop: this.settings.stopSequences,
|
@@ -68,12 +68,19 @@ class LlamaCppCompletionModel extends AbstractModel_js_1.AbstractModel {
|
|
68
68
|
repeat_penalty: this.settings.repeatPenalty,
|
69
69
|
repeat_last_n: this.settings.repeatLastN,
|
70
70
|
penalize_nl: this.settings.penalizeNl,
|
71
|
+
presence_penalty: this.settings.presencePenalty,
|
72
|
+
frequency_penalty: this.settings.frequencyPenalty,
|
73
|
+
penalty_prompt: this.settings.penaltyPrompt,
|
71
74
|
mirostat: this.settings.mirostat,
|
72
75
|
mirostat_tau: this.settings.mirostatTau,
|
73
76
|
mirostat_eta: this.settings.mirostatEta,
|
77
|
+
grammar: this.settings.grammar,
|
74
78
|
seed: this.settings.seed,
|
75
79
|
ignore_eos: this.settings.ignoreEos,
|
76
80
|
logit_bias: this.settings.logitBias,
|
81
|
+
n_probs: this.settings.nProbs,
|
82
|
+
cache_prompt: this.settings.cachePrompt,
|
83
|
+
slot_id: this.settings.slotId,
|
77
84
|
},
|
78
85
|
failedResponseHandler: LlamaCppError_js_1.failedLlamaCppCallResponseHandler,
|
79
86
|
successfulResponseHandler: responseFormat.handler,
|
@@ -85,22 +92,29 @@ class LlamaCppCompletionModel extends AbstractModel_js_1.AbstractModel {
|
|
85
92
|
const eventSettingProperties = [
|
86
93
|
...TextGenerationModel_js_1.textGenerationModelProperties,
|
87
94
|
"contextWindowSize",
|
88
|
-
"cachePrompt",
|
89
95
|
"temperature",
|
90
96
|
"topK",
|
91
97
|
"topP",
|
98
|
+
"minP",
|
92
99
|
"nKeep",
|
93
100
|
"tfsZ",
|
94
101
|
"typicalP",
|
95
102
|
"repeatPenalty",
|
96
103
|
"repeatLastN",
|
97
104
|
"penalizeNl",
|
105
|
+
"presencePenalty",
|
106
|
+
"frequencyPenalty",
|
107
|
+
"penaltyPrompt",
|
98
108
|
"mirostat",
|
99
109
|
"mirostatTau",
|
100
110
|
"mirostatEta",
|
111
|
+
"grammar",
|
101
112
|
"seed",
|
102
113
|
"ignoreEos",
|
103
114
|
"logitBias",
|
115
|
+
"nProbs",
|
116
|
+
"cachePrompt",
|
117
|
+
"slotId",
|
104
118
|
];
|
105
119
|
return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
|
106
120
|
}
|
@@ -221,7 +235,7 @@ const llamaCppTextGenerationResponseSchema = zod_1.z.object({
|
|
221
235
|
predicted_n: zod_1.z.number(),
|
222
236
|
predicted_per_second: zod_1.z.number().nullable(),
|
223
237
|
predicted_per_token_ms: zod_1.z.number().nullable(),
|
224
|
-
prompt_ms: zod_1.z.number().nullable(),
|
238
|
+
prompt_ms: zod_1.z.number().nullable().optional(),
|
225
239
|
prompt_n: zod_1.z.number(),
|
226
240
|
prompt_per_second: zod_1.z.number().nullable(),
|
227
241
|
prompt_per_token_ms: zod_1.z.number().nullable(),
|
@@ -16,24 +16,111 @@ export interface LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE extends num
|
|
16
16
|
*/
|
17
17
|
contextWindowSize?: CONTEXT_WINDOW_SIZE;
|
18
18
|
/**
|
19
|
-
*
|
19
|
+
* Adjust the randomness of the generated text (default: 0.8).
|
20
20
|
*/
|
21
|
-
cachePrompt?: boolean;
|
22
21
|
temperature?: number;
|
22
|
+
/**
|
23
|
+
* Limit the next token selection to the K most probable tokens (default: 40).
|
24
|
+
*/
|
23
25
|
topK?: number;
|
26
|
+
/**
|
27
|
+
* Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
|
28
|
+
*/
|
24
29
|
topP?: number;
|
30
|
+
/**
|
31
|
+
* The minimum probability for a token to be considered, relative to the probability of the most likely token (default: 0.05).
|
32
|
+
*/
|
33
|
+
minP?: number;
|
34
|
+
/**
|
35
|
+
* Specify the number of tokens from the prompt to retain when the context size is exceeded
|
36
|
+
* and tokens need to be discarded. By default, this value is set to 0 (meaning no tokens
|
37
|
+
* are kept). Use -1 to retain all tokens from the prompt.
|
38
|
+
*/
|
25
39
|
nKeep?: number;
|
40
|
+
/**
|
41
|
+
* Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
|
42
|
+
*/
|
26
43
|
tfsZ?: number;
|
44
|
+
/**
|
45
|
+
* Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled).
|
46
|
+
*/
|
27
47
|
typicalP?: number;
|
48
|
+
/**
|
49
|
+
* Control the repetition of token sequences in the generated text (default: 1.1).
|
50
|
+
*/
|
28
51
|
repeatPenalty?: number;
|
52
|
+
/**
|
53
|
+
* Last n tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size).
|
54
|
+
*/
|
29
55
|
repeatLastN?: number;
|
56
|
+
/**
|
57
|
+
* Penalize newline tokens when applying the repeat penalty (default: true).
|
58
|
+
*/
|
30
59
|
penalizeNl?: boolean;
|
60
|
+
/**
|
61
|
+
* Repeat alpha presence penalty (default: 0.0, 0.0 = disabled).
|
62
|
+
*/
|
63
|
+
presencePenalty?: number;
|
64
|
+
/**
|
65
|
+
* Repeat alpha frequency penalty (default: 0.0, 0.0 = disabled).
|
66
|
+
*/
|
67
|
+
frequencyPenalty?: number;
|
68
|
+
/**
|
69
|
+
* This will replace the prompt for the purpose of the penalty evaluation.
|
70
|
+
* Can be either null, a string or an array of numbers representing tokens
|
71
|
+
* (default: null = use the original prompt).
|
72
|
+
*/
|
73
|
+
penaltyPrompt?: string | number[];
|
74
|
+
/**
|
75
|
+
* Enable Mirostat sampling, controlling perplexity during text generation
|
76
|
+
* (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0).
|
77
|
+
*/
|
31
78
|
mirostat?: number;
|
79
|
+
/**
|
80
|
+
* Set the Mirostat target entropy, parameter tau (default: 5.0).
|
81
|
+
*/
|
32
82
|
mirostatTau?: number;
|
83
|
+
/**
|
84
|
+
* Set the Mirostat learning rate, parameter eta (default: 0.1).
|
85
|
+
*/
|
33
86
|
mirostatEta?: number;
|
87
|
+
/**
|
88
|
+
* Set grammar for grammar-based sampling (default: no grammar)
|
89
|
+
*
|
90
|
+
* @see https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md
|
91
|
+
*/
|
92
|
+
grammar?: string;
|
93
|
+
/**
|
94
|
+
* Set the random number generator (RNG) seed
|
95
|
+
* (default: -1, -1 = random seed).
|
96
|
+
*/
|
34
97
|
seed?: number;
|
98
|
+
/**
|
99
|
+
* Ignore end of stream token and continue generating (default: false).
|
100
|
+
*/
|
35
101
|
ignoreEos?: boolean;
|
102
|
+
/**
|
103
|
+
* Modify the likelihood of a token appearing in the generated text completion.
|
104
|
+
* For example, use "logit_bias": [[15043,1.0]] to increase the likelihood of the token
|
105
|
+
* 'Hello', or "logit_bias": [[15043,-1.0]] to decrease its likelihood.
|
106
|
+
* Setting the value to false, "logit_bias": [[15043,false]] ensures that the token Hello is
|
107
|
+
* never produced (default: []).
|
108
|
+
*/
|
36
109
|
logitBias?: Array<[number, number | false]>;
|
110
|
+
/**
|
111
|
+
* If greater than 0, the response also contains the probabilities of top N tokens
|
112
|
+
* for each generated token (default: 0)
|
113
|
+
*/
|
114
|
+
nProbs?: number;
|
115
|
+
/**
|
116
|
+
* Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
|
117
|
+
*/
|
118
|
+
cachePrompt?: boolean;
|
119
|
+
/**
|
120
|
+
* Assign the completion task to an specific slot.
|
121
|
+
* If is -1 the task will be assigned to a Idle slot (default: -1)
|
122
|
+
*/
|
123
|
+
slotId?: number;
|
37
124
|
}
|
38
125
|
export interface LlamaCppCompletionPrompt {
|
39
126
|
/**
|
@@ -96,10 +183,10 @@ export declare class LlamaCppCompletionModel<CONTEXT_WINDOW_SIZE extends number
|
|
96
183
|
predicted_n: number;
|
97
184
|
predicted_per_second: number | null;
|
98
185
|
predicted_per_token_ms: number | null;
|
99
|
-
prompt_ms: number | null;
|
100
186
|
prompt_n: number;
|
101
187
|
prompt_per_second: number | null;
|
102
188
|
prompt_per_token_ms: number | null;
|
189
|
+
prompt_ms?: number | null | undefined;
|
103
190
|
};
|
104
191
|
tokens_cached: number;
|
105
192
|
tokens_evaluated: number;
|
@@ -155,10 +242,10 @@ export declare class LlamaCppCompletionModel<CONTEXT_WINDOW_SIZE extends number
|
|
155
242
|
predicted_n: number;
|
156
243
|
predicted_per_second: number | null;
|
157
244
|
predicted_per_token_ms: number | null;
|
158
|
-
prompt_ms: number | null;
|
159
245
|
prompt_n: number;
|
160
246
|
prompt_per_second: number | null;
|
161
247
|
prompt_per_token_ms: number | null;
|
248
|
+
prompt_ms?: number | null | undefined;
|
162
249
|
};
|
163
250
|
tokens_cached: number;
|
164
251
|
tokens_evaluated: number;
|
@@ -267,7 +354,7 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
267
354
|
predicted_n: z.ZodNumber;
|
268
355
|
predicted_per_second: z.ZodNullable<z.ZodNumber>;
|
269
356
|
predicted_per_token_ms: z.ZodNullable<z.ZodNumber>;
|
270
|
-
prompt_ms: z.ZodNullable<z.ZodNumber
|
357
|
+
prompt_ms: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
271
358
|
prompt_n: z.ZodNumber;
|
272
359
|
prompt_per_second: z.ZodNullable<z.ZodNumber>;
|
273
360
|
prompt_per_token_ms: z.ZodNullable<z.ZodNumber>;
|
@@ -276,19 +363,19 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
276
363
|
predicted_n: number;
|
277
364
|
predicted_per_second: number | null;
|
278
365
|
predicted_per_token_ms: number | null;
|
279
|
-
prompt_ms: number | null;
|
280
366
|
prompt_n: number;
|
281
367
|
prompt_per_second: number | null;
|
282
368
|
prompt_per_token_ms: number | null;
|
369
|
+
prompt_ms?: number | null | undefined;
|
283
370
|
}, {
|
284
371
|
predicted_ms: number;
|
285
372
|
predicted_n: number;
|
286
373
|
predicted_per_second: number | null;
|
287
374
|
predicted_per_token_ms: number | null;
|
288
|
-
prompt_ms: number | null;
|
289
375
|
prompt_n: number;
|
290
376
|
prompt_per_second: number | null;
|
291
377
|
prompt_per_token_ms: number | null;
|
378
|
+
prompt_ms?: number | null | undefined;
|
292
379
|
}>;
|
293
380
|
tokens_cached: z.ZodNumber;
|
294
381
|
tokens_evaluated: z.ZodNumber;
|
@@ -333,10 +420,10 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
333
420
|
predicted_n: number;
|
334
421
|
predicted_per_second: number | null;
|
335
422
|
predicted_per_token_ms: number | null;
|
336
|
-
prompt_ms: number | null;
|
337
423
|
prompt_n: number;
|
338
424
|
prompt_per_second: number | null;
|
339
425
|
prompt_per_token_ms: number | null;
|
426
|
+
prompt_ms?: number | null | undefined;
|
340
427
|
};
|
341
428
|
tokens_cached: number;
|
342
429
|
tokens_evaluated: number;
|
@@ -381,10 +468,10 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
381
468
|
predicted_n: number;
|
382
469
|
predicted_per_second: number | null;
|
383
470
|
predicted_per_token_ms: number | null;
|
384
|
-
prompt_ms: number | null;
|
385
471
|
prompt_n: number;
|
386
472
|
prompt_per_second: number | null;
|
387
473
|
prompt_per_token_ms: number | null;
|
474
|
+
prompt_ms?: number | null | undefined;
|
388
475
|
};
|
389
476
|
tokens_cached: number;
|
390
477
|
tokens_evaluated: number;
|
@@ -431,10 +518,10 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
|
|
431
518
|
predicted_n: number;
|
432
519
|
predicted_per_second: number | null;
|
433
520
|
predicted_per_token_ms: number | null;
|
434
|
-
prompt_ms: number | null;
|
435
521
|
prompt_n: number;
|
436
522
|
prompt_per_second: number | null;
|
437
523
|
prompt_per_token_ms: number | null;
|
524
|
+
prompt_ms?: number | null | undefined;
|
438
525
|
};
|
439
526
|
tokens_cached: number;
|
440
527
|
tokens_evaluated: number;
|
@@ -494,10 +581,10 @@ export declare const LlamaCppCompletionResponseFormat: {
|
|
494
581
|
predicted_n: number;
|
495
582
|
predicted_per_second: number | null;
|
496
583
|
predicted_per_token_ms: number | null;
|
497
|
-
prompt_ms: number | null;
|
498
584
|
prompt_n: number;
|
499
585
|
prompt_per_second: number | null;
|
500
586
|
prompt_per_token_ms: number | null;
|
587
|
+
prompt_ms?: number | null | undefined;
|
501
588
|
};
|
502
589
|
tokens_cached: number;
|
503
590
|
tokens_evaluated: number;
|
@@ -552,10 +639,10 @@ export declare const LlamaCppCompletionResponseFormat: {
|
|
552
639
|
predicted_n: number;
|
553
640
|
predicted_per_second: number | null;
|
554
641
|
predicted_per_token_ms: number | null;
|
555
|
-
prompt_ms: number | null;
|
556
642
|
prompt_n: number;
|
557
643
|
prompt_per_second: number | null;
|
558
644
|
prompt_per_token_ms: number | null;
|
645
|
+
prompt_ms?: number | null | undefined;
|
559
646
|
};
|
560
647
|
tokens_cached: number;
|
561
648
|
tokens_evaluated: number;
|
@@ -53,10 +53,10 @@ export class LlamaCppCompletionModel extends AbstractModel {
|
|
53
53
|
data,
|
54
54
|
}))
|
55
55
|
: undefined,
|
56
|
-
cache_prompt: this.settings.cachePrompt,
|
57
56
|
temperature: this.settings.temperature,
|
58
57
|
top_k: this.settings.topK,
|
59
58
|
top_p: this.settings.topP,
|
59
|
+
min_p: this.settings.minP,
|
60
60
|
n_predict: this.settings.maxGenerationTokens,
|
61
61
|
n_keep: this.settings.nKeep,
|
62
62
|
stop: this.settings.stopSequences,
|
@@ -65,12 +65,19 @@ export class LlamaCppCompletionModel extends AbstractModel {
|
|
65
65
|
repeat_penalty: this.settings.repeatPenalty,
|
66
66
|
repeat_last_n: this.settings.repeatLastN,
|
67
67
|
penalize_nl: this.settings.penalizeNl,
|
68
|
+
presence_penalty: this.settings.presencePenalty,
|
69
|
+
frequency_penalty: this.settings.frequencyPenalty,
|
70
|
+
penalty_prompt: this.settings.penaltyPrompt,
|
68
71
|
mirostat: this.settings.mirostat,
|
69
72
|
mirostat_tau: this.settings.mirostatTau,
|
70
73
|
mirostat_eta: this.settings.mirostatEta,
|
74
|
+
grammar: this.settings.grammar,
|
71
75
|
seed: this.settings.seed,
|
72
76
|
ignore_eos: this.settings.ignoreEos,
|
73
77
|
logit_bias: this.settings.logitBias,
|
78
|
+
n_probs: this.settings.nProbs,
|
79
|
+
cache_prompt: this.settings.cachePrompt,
|
80
|
+
slot_id: this.settings.slotId,
|
74
81
|
},
|
75
82
|
failedResponseHandler: failedLlamaCppCallResponseHandler,
|
76
83
|
successfulResponseHandler: responseFormat.handler,
|
@@ -82,22 +89,29 @@ export class LlamaCppCompletionModel extends AbstractModel {
|
|
82
89
|
const eventSettingProperties = [
|
83
90
|
...textGenerationModelProperties,
|
84
91
|
"contextWindowSize",
|
85
|
-
"cachePrompt",
|
86
92
|
"temperature",
|
87
93
|
"topK",
|
88
94
|
"topP",
|
95
|
+
"minP",
|
89
96
|
"nKeep",
|
90
97
|
"tfsZ",
|
91
98
|
"typicalP",
|
92
99
|
"repeatPenalty",
|
93
100
|
"repeatLastN",
|
94
101
|
"penalizeNl",
|
102
|
+
"presencePenalty",
|
103
|
+
"frequencyPenalty",
|
104
|
+
"penaltyPrompt",
|
95
105
|
"mirostat",
|
96
106
|
"mirostatTau",
|
97
107
|
"mirostatEta",
|
108
|
+
"grammar",
|
98
109
|
"seed",
|
99
110
|
"ignoreEos",
|
100
111
|
"logitBias",
|
112
|
+
"nProbs",
|
113
|
+
"cachePrompt",
|
114
|
+
"slotId",
|
101
115
|
];
|
102
116
|
return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
|
103
117
|
}
|
@@ -217,7 +231,7 @@ const llamaCppTextGenerationResponseSchema = z.object({
|
|
217
231
|
predicted_n: z.number(),
|
218
232
|
predicted_per_second: z.number().nullable(),
|
219
233
|
predicted_per_token_ms: z.number().nullable(),
|
220
|
-
prompt_ms: z.number().nullable(),
|
234
|
+
prompt_ms: z.number().nullable().optional(),
|
221
235
|
prompt_n: z.number(),
|
222
236
|
prompt_per_second: z.number().nullable(),
|
223
237
|
prompt_per_token_ms: z.number().nullable(),
|
@@ -232,7 +232,7 @@ const openAIChatResponseSchema = zod_1.z.object({
|
|
232
232
|
total_tokens: zod_1.z.number(),
|
233
233
|
}),
|
234
234
|
});
|
235
|
-
const
|
235
|
+
const openaiChatChunkSchema = (0, ZodSchema_js_1.zodSchema)(zod_1.z.object({
|
236
236
|
object: zod_1.z.literal("chat.completion.chunk"),
|
237
237
|
id: zod_1.z.string(),
|
238
238
|
choices: zod_1.z.array(zod_1.z.object({
|
@@ -271,15 +271,7 @@ const chatCompletionChunkSchema = zod_1.z.object({
|
|
271
271
|
created: zod_1.z.number(),
|
272
272
|
model: zod_1.z.string(),
|
273
273
|
system_fingerprint: zod_1.z.string().optional().nullable(),
|
274
|
-
});
|
275
|
-
const openaiChatChunkSchema = (0, ZodSchema_js_1.zodSchema)(zod_1.z.union([
|
276
|
-
chatCompletionChunkSchema,
|
277
|
-
zod_1.z.object({
|
278
|
-
object: zod_1.z.string().refine((obj) => obj !== "chat.completion.chunk", {
|
279
|
-
message: "Object must be 'chat.completion.chunk'",
|
280
|
-
}),
|
281
|
-
}),
|
282
|
-
]));
|
274
|
+
}));
|
283
275
|
exports.OpenAIChatResponseFormat = {
|
284
276
|
/**
|
285
277
|
* Returns the response as a JSON object.
|