modelfusion 0.94.0 → 0.95.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,7 @@ class LlamaCppTextGenerationModel extends AbstractModel_js_1.AbstractModel {
57
57
  "maxCompletionTokens",
58
58
  "stopSequences",
59
59
  "contextWindowSize",
60
+ "cachePrompt",
60
61
  "temperature",
61
62
  "topK",
62
63
  "topP",
@@ -197,13 +198,14 @@ const llamaCppTextStreamingResponseSchema = new ZodSchema_js_1.ZodSchema(zod_1.z
197
198
  }),
198
199
  llamaCppTextGenerationResponseSchema,
199
200
  ]));
200
- async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration_js_1.LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
201
+ async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration_js_1.LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, cachePrompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
201
202
  return (0, postToApi_js_1.postJsonToApi)({
202
203
  url: api.assembleUrl(`/completion`),
203
204
  headers: api.headers,
204
205
  body: {
205
206
  stream: responseFormat.stream,
206
207
  prompt: prompt.text,
208
+ cache_prompt: cachePrompt,
207
209
  temperature,
208
210
  top_k: topK,
209
211
  top_p: topP,
@@ -15,6 +15,10 @@ export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends
15
15
  * Llama.cpp server.
16
16
  */
17
17
  contextWindowSize?: CONTEXT_WINDOW_SIZE;
18
+ /**
19
+ * Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
20
+ */
21
+ cachePrompt?: boolean;
18
22
  temperature?: number;
19
23
  topK?: number;
20
24
  topP?: number;
@@ -54,6 +54,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
54
54
  "maxCompletionTokens",
55
55
  "stopSequences",
56
56
  "contextWindowSize",
57
+ "cachePrompt",
57
58
  "temperature",
58
59
  "topK",
59
60
  "topP",
@@ -193,13 +194,14 @@ const llamaCppTextStreamingResponseSchema = new ZodSchema(z.discriminatedUnion("
193
194
  }),
194
195
  llamaCppTextGenerationResponseSchema,
195
196
  ]));
196
- async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
197
+ async function callLlamaCppTextGenerationAPI({ api = new LlamaCppApiConfiguration(), abortSignal, responseFormat, prompt, cachePrompt, temperature, topK, topP, nPredict, nKeep, stop, tfsZ, typicalP, repeatPenalty, repeatLastN, penalizeNl, mirostat, mirostatTau, mirostatEta, seed, ignoreEos, logitBias, }) {
197
198
  return postJsonToApi({
198
199
  url: api.assembleUrl(`/completion`),
199
200
  headers: api.headers,
200
201
  body: {
201
202
  stream: responseFormat.stream,
202
203
  prompt: prompt.text,
204
+ cache_prompt: cachePrompt,
203
205
  temperature,
204
206
  top_k: topK,
205
207
  top_p: topP,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "modelfusion",
3
3
  "description": "The TypeScript library for building multi-modal AI applications.",
4
- "version": "0.94.0",
4
+ "version": "0.95.0",
5
5
  "author": "Lars Grammel",
6
6
  "license": "MIT",
7
7
  "keywords": [