modelfusion 0.94.0 → 0.95.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,7 @@ class LlamaCppTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
57
57
|
"maxCompletionTokens",
|
58
58
|
"stopSequences",
|
59
59
|
"contextWindowSize",
|
60
|
+
"cachePrompt",
|
60
61
|
"temperature",
|
61
62
|
"topK",
|
62
63
|
"topP",
|
@@ -197,13 +198,14 @@ const llamaCppTextStreamingResponseSchema = new ZodSchema_js_1.ZodSchema(zod_1.z
|
|
197
198
|
}),
|
198
199
|
llamaCppTextGenerationResponseSchema,
|
199
200
|
]));
|
200
|
-
async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration_js_1.LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
|
201
|
+
async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration_js_1.LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, cachePrompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
|
201
202
|
return (0, postToApi_js_1.postJsonToApi)({
|
202
203
|
url: api.assembleUrl(`/completion`),
|
203
204
|
headers: api.headers,
|
204
205
|
body: {
|
205
206
|
stream: responseFormat.stream,
|
206
207
|
prompt: prompt.text,
|
208
|
+
cache_prompt: cachePrompt,
|
207
209
|
temperature,
|
208
210
|
top_k: topK,
|
209
211
|
top_p: topP,
|
@@ -15,6 +15,10 @@ export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends
|
|
15
15
|
* Llama.cpp server.
|
16
16
|
*/
|
17
17
|
contextWindowSize?: CONTEXT_WINDOW_SIZE;
|
18
|
+
/**
|
19
|
+
* Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
|
20
|
+
*/
|
21
|
+
cachePrompt?: boolean;
|
18
22
|
temperature?: number;
|
19
23
|
topK?: number;
|
20
24
|
topP?: number;
|
@@ -54,6 +54,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
54
54
|
"maxCompletionTokens",
|
55
55
|
"stopSequences",
|
56
56
|
"contextWindowSize",
|
57
|
+
"cachePrompt",
|
57
58
|
"temperature",
|
58
59
|
"topK",
|
59
60
|
"topP",
|
@@ -193,13 +194,14 @@ const llamaCppTextStreamingResponseSchema = new ZodSchema(z.discriminatedUnion("
|
|
193
194
|
}),
|
194
195
|
llamaCppTextGenerationResponseSchema,
|
195
196
|
]));
|
196
|
-
async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
|
197
|
+
async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, cachePrompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
|
197
198
|
return postJsonToApi({
|
198
199
|
url: api.assembleUrl(`/completion`),
|
199
200
|
headers: api.headers,
|
200
201
|
body: {
|
201
202
|
stream: responseFormat.stream,
|
202
203
|
prompt: prompt.text,
|
204
|
+
cache_prompt: cachePrompt,
|
203
205
|
temperature,
|
204
206
|
top_k: topK,
|
205
207
|
top_p: topP,
|