modelfusion 0.106.0 → 0.108.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +59 -0
- package/README.md +19 -59
- package/model-function/generate-text/prompt-template/ChatMLPromptTemplate.test.cjs +11 -0
- package/model-function/generate-text/prompt-template/ChatMLPromptTemplate.test.js +11 -0
- package/model-function/generate-text/prompt-template/Llama2PromptTemplate.cjs +9 -7
- package/model-function/generate-text/prompt-template/Llama2PromptTemplate.js +9 -7
- package/model-function/generate-text/prompt-template/Llama2PromptTemplate.test.cjs +11 -0
- package/model-function/generate-text/prompt-template/Llama2PromptTemplate.test.js +11 -0
- package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.cjs +150 -0
- package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.d.ts +62 -0
- package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.js +143 -0
- package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.test.cjs +60 -0
- package/model-function/generate-text/prompt-template/MistralInstructPromptTemplate.test.js +58 -0
- package/model-function/generate-text/prompt-template/NeuralChatPromptTemplate.test.cjs +11 -0
- package/model-function/generate-text/prompt-template/NeuralChatPromptTemplate.test.js +11 -0
- package/model-function/generate-text/prompt-template/TextPromptTemplate.test.cjs +11 -0
- package/model-function/generate-text/prompt-template/TextPromptTemplate.test.js +11 -0
- package/model-function/generate-text/prompt-template/VicunaPromptTemplate.test.cjs +11 -0
- package/model-function/generate-text/prompt-template/VicunaPromptTemplate.test.js +11 -0
- package/model-function/generate-text/prompt-template/index.cjs +2 -1
- package/model-function/generate-text/prompt-template/index.d.ts +1 -0
- package/model-function/generate-text/prompt-template/index.js +1 -0
- package/model-provider/llamacpp/LlamaCppBakLLaVA1PromptTemplate.d.ts +3 -3
- package/model-provider/llamacpp/{LlamaCppTextGenerationModel.cjs → LlamaCppCompletionModel.cjs} +25 -11
- package/model-provider/llamacpp/{LlamaCppTextGenerationModel.d.ts → LlamaCppCompletionModel.d.ts} +125 -38
- package/model-provider/llamacpp/{LlamaCppTextGenerationModel.js → LlamaCppCompletionModel.js} +23 -9
- package/model-provider/llamacpp/{LlamaCppTextGenerationModel.test.cjs → LlamaCppCompletionModel.test.cjs} +3 -3
- package/model-provider/llamacpp/LlamaCppCompletionModel.test.d.ts +1 -0
- package/model-provider/llamacpp/{LlamaCppTextGenerationModel.test.js → LlamaCppCompletionModel.test.js} +3 -3
- package/model-provider/llamacpp/LlamaCppFacade.cjs +2 -2
- package/model-provider/llamacpp/LlamaCppFacade.d.ts +2 -2
- package/model-provider/llamacpp/LlamaCppFacade.js +2 -2
- package/model-provider/llamacpp/index.cjs +1 -1
- package/model-provider/llamacpp/index.d.ts +1 -1
- package/model-provider/llamacpp/index.js +1 -1
- package/model-provider/mistral/MistralChatModel.cjs +4 -4
- package/model-provider/mistral/MistralChatModel.d.ts +6 -6
- package/model-provider/mistral/MistralChatModel.js +1 -1
- package/model-provider/mistral/index.cjs +3 -3
- package/model-provider/mistral/index.d.ts +2 -2
- package/model-provider/mistral/index.js +2 -2
- package/model-provider/openai/AbstractOpenAIChatModel.cjs +2 -10
- package/model-provider/openai/AbstractOpenAIChatModel.d.ts +13 -195
- package/model-provider/openai/AbstractOpenAIChatModel.js +2 -10
- package/model-provider/openai/AbstractOpenAICompletionModel.cjs +167 -0
- package/model-provider/openai/AbstractOpenAICompletionModel.d.ts +199 -0
- package/model-provider/openai/AbstractOpenAICompletionModel.js +163 -0
- package/model-provider/openai/OpenAIChatFunctionCallStructureGenerationModel.d.ts +1 -3
- package/model-provider/openai/OpenAIChatModel.d.ts +3 -6
- package/model-provider/openai/OpenAICompletionModel.cjs +4 -156
- package/model-provider/openai/OpenAICompletionModel.d.ts +4 -191
- package/model-provider/openai/OpenAICompletionModel.js +3 -155
- package/model-provider/openai/index.cjs +1 -0
- package/model-provider/openai/index.d.ts +1 -0
- package/model-provider/openai/index.js +1 -0
- package/model-provider/openai-compatible/OpenAICompatibleChatModel.d.ts +4 -5
- package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.cjs +74 -0
- package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.d.ts +27 -0
- package/model-provider/openai-compatible/OpenAICompatibleCompletionModel.js +70 -0
- package/model-provider/openai-compatible/OpenAICompatibleFacade.cjs +37 -6
- package/model-provider/openai-compatible/OpenAICompatibleFacade.d.ts +33 -5
- package/model-provider/openai-compatible/OpenAICompatibleFacade.js +35 -5
- package/model-provider/openai-compatible/OpenAICompatibleProviderName.cjs +2 -0
- package/model-provider/openai-compatible/OpenAICompatibleProviderName.d.ts +1 -0
- package/model-provider/openai-compatible/OpenAICompatibleProviderName.js +1 -0
- package/model-provider/openai-compatible/TogetherAIApiConfiguration.cjs +29 -0
- package/model-provider/openai-compatible/TogetherAIApiConfiguration.d.ts +18 -0
- package/model-provider/openai-compatible/TogetherAIApiConfiguration.js +25 -0
- package/model-provider/openai-compatible/index.cjs +4 -1
- package/model-provider/openai-compatible/index.d.ts +4 -1
- package/model-provider/openai-compatible/index.js +4 -1
- package/package.json +16 -16
- package/tool/generate-tool-call/index.cjs +1 -0
- package/tool/generate-tool-call/index.d.ts +1 -0
- package/tool/generate-tool-call/index.js +1 -0
- package/tool/generate-tool-call/jsonToolCallPrompt.cjs +30 -0
- package/tool/generate-tool-call/jsonToolCallPrompt.d.ts +5 -0
- package/tool/generate-tool-call/jsonToolCallPrompt.js +27 -0
- /package/{model-provider/llamacpp/LlamaCppTextGenerationModel.test.d.ts → model-function/generate-text/prompt-template/MistralInstructPromptTemplate.test.d.ts} +0 -0
- /package/model-provider/mistral/{MistralPromptTemplate.cjs → MistralChatPromptTemplate.cjs} +0 -0
- /package/model-provider/mistral/{MistralPromptTemplate.d.ts → MistralChatPromptTemplate.d.ts} +0 -0
- /package/model-provider/mistral/{MistralPromptTemplate.js → MistralChatPromptTemplate.js} +0 -0
package/model-provider/llamacpp/{LlamaCppTextGenerationModel.d.ts → LlamaCppCompletionModel.d.ts}
RENAMED
@@ -8,7 +8,7 @@ import { PromptTemplateTextStreamingModel } from "../../model-function/generate-
|
|
8
8
|
import { TextGenerationModelSettings, TextStreamingModel } from "../../model-function/generate-text/TextGenerationModel.js";
|
9
9
|
import { TextGenerationPromptTemplate } from "../../model-function/generate-text/TextGenerationPromptTemplate.js";
|
10
10
|
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
|
11
|
-
export interface
|
11
|
+
export interface LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends TextGenerationModelSettings {
|
12
12
|
api?: ApiConfiguration;
|
13
13
|
/**
|
14
14
|
* Specify the context window size of the model that you have loaded in your
|
@@ -16,26 +16,113 @@ export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends
|
|
16
16
|
*/
|
17
17
|
contextWindowSize?: CONTEXT_WINDOW_SIZE;
|
18
18
|
/**
|
19
|
-
*
|
19
|
+
* Adjust the randomness of the generated text (default: 0.8).
|
20
20
|
*/
|
21
|
-
cachePrompt?: boolean;
|
22
21
|
temperature?: number;
|
22
|
+
/**
|
23
|
+
* Limit the next token selection to the K most probable tokens (default: 40).
|
24
|
+
*/
|
23
25
|
topK?: number;
|
26
|
+
/**
|
27
|
+
* Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.95).
|
28
|
+
*/
|
24
29
|
topP?: number;
|
30
|
+
/**
|
31
|
+
* The minimum probability for a token to be considered, relative to the probability of the most likely token (default: 0.05).
|
32
|
+
*/
|
33
|
+
minP?: number;
|
34
|
+
/**
|
35
|
+
* Specify the number of tokens from the prompt to retain when the context size is exceeded
|
36
|
+
* and tokens need to be discarded. By default, this value is set to 0 (meaning no tokens
|
37
|
+
* are kept). Use -1 to retain all tokens from the prompt.
|
38
|
+
*/
|
25
39
|
nKeep?: number;
|
40
|
+
/**
|
41
|
+
* Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
|
42
|
+
*/
|
26
43
|
tfsZ?: number;
|
44
|
+
/**
|
45
|
+
* Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled).
|
46
|
+
*/
|
27
47
|
typicalP?: number;
|
48
|
+
/**
|
49
|
+
* Control the repetition of token sequences in the generated text (default: 1.1).
|
50
|
+
*/
|
28
51
|
repeatPenalty?: number;
|
52
|
+
/**
|
53
|
+
* Last n tokens to consider for penalizing repetition (default: 64, 0 = disabled, -1 = ctx-size).
|
54
|
+
*/
|
29
55
|
repeatLastN?: number;
|
56
|
+
/**
|
57
|
+
* Penalize newline tokens when applying the repeat penalty (default: true).
|
58
|
+
*/
|
30
59
|
penalizeNl?: boolean;
|
60
|
+
/**
|
61
|
+
* Repeat alpha presence penalty (default: 0.0, 0.0 = disabled).
|
62
|
+
*/
|
63
|
+
presencePenalty?: number;
|
64
|
+
/**
|
65
|
+
* Repeat alpha frequency penalty (default: 0.0, 0.0 = disabled).
|
66
|
+
*/
|
67
|
+
frequencyPenalty?: number;
|
68
|
+
/**
|
69
|
+
* This will replace the prompt for the purpose of the penalty evaluation.
|
70
|
+
* Can be either null, a string or an array of numbers representing tokens
|
71
|
+
* (default: null = use the original prompt).
|
72
|
+
*/
|
73
|
+
penaltyPrompt?: string | number[];
|
74
|
+
/**
|
75
|
+
* Enable Mirostat sampling, controlling perplexity during text generation
|
76
|
+
* (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0).
|
77
|
+
*/
|
31
78
|
mirostat?: number;
|
79
|
+
/**
|
80
|
+
* Set the Mirostat target entropy, parameter tau (default: 5.0).
|
81
|
+
*/
|
32
82
|
mirostatTau?: number;
|
83
|
+
/**
|
84
|
+
* Set the Mirostat learning rate, parameter eta (default: 0.1).
|
85
|
+
*/
|
33
86
|
mirostatEta?: number;
|
87
|
+
/**
|
88
|
+
* Set grammar for grammar-based sampling (default: no grammar)
|
89
|
+
*
|
90
|
+
* @see https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md
|
91
|
+
*/
|
92
|
+
grammar?: string;
|
93
|
+
/**
|
94
|
+
* Set the random number generator (RNG) seed
|
95
|
+
* (default: -1, -1 = random seed).
|
96
|
+
*/
|
34
97
|
seed?: number;
|
98
|
+
/**
|
99
|
+
* Ignore end of stream token and continue generating (default: false).
|
100
|
+
*/
|
35
101
|
ignoreEos?: boolean;
|
102
|
+
/**
|
103
|
+
* Modify the likelihood of a token appearing in the generated text completion.
|
104
|
+
* For example, use "logit_bias": [[15043,1.0]] to increase the likelihood of the token
|
105
|
+
* 'Hello', or "logit_bias": [[15043,-1.0]] to decrease its likelihood.
|
106
|
+
* Setting the value to false, "logit_bias": [[15043,false]] ensures that the token Hello is
|
107
|
+
* never produced (default: []).
|
108
|
+
*/
|
36
109
|
logitBias?: Array<[number, number | false]>;
|
110
|
+
/**
|
111
|
+
* If greater than 0, the response also contains the probabilities of top N tokens
|
112
|
+
* for each generated token (default: 0)
|
113
|
+
*/
|
114
|
+
nProbs?: number;
|
115
|
+
/**
|
116
|
+
* Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
|
117
|
+
*/
|
118
|
+
cachePrompt?: boolean;
|
119
|
+
/**
|
120
|
+
* Assign the completion task to an specific slot.
|
121
|
+
* If is -1 the task will be assigned to a Idle slot (default: -1)
|
122
|
+
*/
|
123
|
+
slotId?: number;
|
37
124
|
}
|
38
|
-
export interface
|
125
|
+
export interface LlamaCppCompletionPrompt {
|
39
126
|
/**
|
40
127
|
* Text prompt. Images can be included through references such as `[img-ID]`, e.g. `[img-1]`.
|
41
128
|
*/
|
@@ -45,18 +132,18 @@ export interface LlamaCppTextGenerationPrompt {
|
|
45
132
|
*/
|
46
133
|
images?: Record<number, string>;
|
47
134
|
}
|
48
|
-
export declare class
|
49
|
-
constructor(settings?:
|
135
|
+
export declare class LlamaCppCompletionModel<CONTEXT_WINDOW_SIZE extends number | undefined> extends AbstractModel<LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>> implements TextStreamingModel<LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>> {
|
136
|
+
constructor(settings?: LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>);
|
50
137
|
readonly provider = "llamacpp";
|
51
138
|
get modelName(): null;
|
52
139
|
get contextWindowSize(): CONTEXT_WINDOW_SIZE;
|
53
140
|
readonly tokenizer: LlamaCppTokenizer;
|
54
|
-
callAPI<RESPONSE>(prompt:
|
55
|
-
responseFormat:
|
141
|
+
callAPI<RESPONSE>(prompt: LlamaCppCompletionPrompt, options: {
|
142
|
+
responseFormat: LlamaCppCompletionResponseFormatType<RESPONSE>;
|
56
143
|
} & FunctionOptions): Promise<RESPONSE>;
|
57
|
-
get settingsForEvent(): Partial<
|
58
|
-
countPromptTokens(prompt:
|
59
|
-
doGenerateTexts(prompt:
|
144
|
+
get settingsForEvent(): Partial<LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>>;
|
145
|
+
countPromptTokens(prompt: LlamaCppCompletionPrompt): Promise<number>;
|
146
|
+
doGenerateTexts(prompt: LlamaCppCompletionPrompt, options?: FunctionOptions): Promise<{
|
60
147
|
response: {
|
61
148
|
model: string;
|
62
149
|
stop: true;
|
@@ -81,11 +168,11 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
81
168
|
presence_penalty: number;
|
82
169
|
repeat_last_n: number;
|
83
170
|
repeat_penalty: number;
|
84
|
-
temp: number;
|
85
171
|
tfs_z: number;
|
86
172
|
top_k: number;
|
87
173
|
top_p: number;
|
88
174
|
typical_p: number;
|
175
|
+
temperature?: number | undefined;
|
89
176
|
};
|
90
177
|
stopped_eos: boolean;
|
91
178
|
stopped_limit: boolean;
|
@@ -96,10 +183,10 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
96
183
|
predicted_n: number;
|
97
184
|
predicted_per_second: number | null;
|
98
185
|
predicted_per_token_ms: number | null;
|
99
|
-
prompt_ms: number | null;
|
100
186
|
prompt_n: number;
|
101
187
|
prompt_per_second: number | null;
|
102
188
|
prompt_per_token_ms: number | null;
|
189
|
+
prompt_ms?: number | null | undefined;
|
103
190
|
};
|
104
191
|
tokens_cached: number;
|
105
192
|
tokens_evaluated: number;
|
@@ -116,7 +203,7 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
116
203
|
totalTokens: number;
|
117
204
|
};
|
118
205
|
}>;
|
119
|
-
doStreamText(prompt:
|
206
|
+
doStreamText(prompt: LlamaCppCompletionPrompt, options?: FunctionOptions): Promise<AsyncIterable<Delta<{
|
120
207
|
model: string;
|
121
208
|
stop: true;
|
122
209
|
content: string;
|
@@ -140,11 +227,11 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
140
227
|
presence_penalty: number;
|
141
228
|
repeat_last_n: number;
|
142
229
|
repeat_penalty: number;
|
143
|
-
temp: number;
|
144
230
|
tfs_z: number;
|
145
231
|
top_k: number;
|
146
232
|
top_p: number;
|
147
233
|
typical_p: number;
|
234
|
+
temperature?: number | undefined;
|
148
235
|
};
|
149
236
|
stopped_eos: boolean;
|
150
237
|
stopped_limit: boolean;
|
@@ -155,10 +242,10 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
155
242
|
predicted_n: number;
|
156
243
|
predicted_per_second: number | null;
|
157
244
|
predicted_per_token_ms: number | null;
|
158
|
-
prompt_ms: number | null;
|
159
245
|
prompt_n: number;
|
160
246
|
prompt_per_second: number | null;
|
161
247
|
prompt_per_token_ms: number | null;
|
248
|
+
prompt_ms?: number | null | undefined;
|
162
249
|
};
|
163
250
|
tokens_cached: number;
|
164
251
|
tokens_evaluated: number;
|
@@ -169,16 +256,16 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
169
256
|
content: string;
|
170
257
|
}>>>;
|
171
258
|
extractTextDelta(delta: unknown): string;
|
172
|
-
withTextPrompt(): PromptTemplateTextStreamingModel<string,
|
259
|
+
withTextPrompt(): PromptTemplateTextStreamingModel<string, LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>;
|
173
260
|
/**
|
174
261
|
* Maps the prompt for a text version of the Llama.cpp prompt template (without image support).
|
175
262
|
*/
|
176
|
-
withTextPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, string>): PromptTemplateTextStreamingModel<INPUT_PROMPT, string,
|
263
|
+
withTextPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, string>): PromptTemplateTextStreamingModel<INPUT_PROMPT, string, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, PromptTemplateTextStreamingModel<string, LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>>;
|
177
264
|
/**
|
178
265
|
* Maps the prompt for the full Llama.cpp prompt template (incl. image support).
|
179
266
|
*/
|
180
|
-
withPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT,
|
181
|
-
withSettings(additionalSettings: Partial<
|
267
|
+
withPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, LlamaCppCompletionPrompt>): PromptTemplateTextStreamingModel<INPUT_PROMPT, LlamaCppCompletionPrompt, LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>;
|
268
|
+
withSettings(additionalSettings: Partial<LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>>): this;
|
182
269
|
}
|
183
270
|
declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
184
271
|
content: z.ZodString;
|
@@ -202,7 +289,7 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
202
289
|
seed: z.ZodNumber;
|
203
290
|
stop: z.ZodArray<z.ZodString, "many">;
|
204
291
|
stream: z.ZodBoolean;
|
205
|
-
|
292
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
206
293
|
tfs_z: z.ZodNumber;
|
207
294
|
top_k: z.ZodNumber;
|
208
295
|
top_p: z.ZodNumber;
|
@@ -226,11 +313,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
226
313
|
presence_penalty: number;
|
227
314
|
repeat_last_n: number;
|
228
315
|
repeat_penalty: number;
|
229
|
-
temp: number;
|
230
316
|
tfs_z: number;
|
231
317
|
top_k: number;
|
232
318
|
top_p: number;
|
233
319
|
typical_p: number;
|
320
|
+
temperature?: number | undefined;
|
234
321
|
}, {
|
235
322
|
model: string;
|
236
323
|
stream: boolean;
|
@@ -250,11 +337,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
250
337
|
presence_penalty: number;
|
251
338
|
repeat_last_n: number;
|
252
339
|
repeat_penalty: number;
|
253
|
-
temp: number;
|
254
340
|
tfs_z: number;
|
255
341
|
top_k: number;
|
256
342
|
top_p: number;
|
257
343
|
typical_p: number;
|
344
|
+
temperature?: number | undefined;
|
258
345
|
}>;
|
259
346
|
model: z.ZodString;
|
260
347
|
prompt: z.ZodString;
|
@@ -267,7 +354,7 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
267
354
|
predicted_n: z.ZodNumber;
|
268
355
|
predicted_per_second: z.ZodNullable<z.ZodNumber>;
|
269
356
|
predicted_per_token_ms: z.ZodNullable<z.ZodNumber>;
|
270
|
-
prompt_ms: z.ZodNullable<z.ZodNumber
|
357
|
+
prompt_ms: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
|
271
358
|
prompt_n: z.ZodNumber;
|
272
359
|
prompt_per_second: z.ZodNullable<z.ZodNumber>;
|
273
360
|
prompt_per_token_ms: z.ZodNullable<z.ZodNumber>;
|
@@ -276,19 +363,19 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
276
363
|
predicted_n: number;
|
277
364
|
predicted_per_second: number | null;
|
278
365
|
predicted_per_token_ms: number | null;
|
279
|
-
prompt_ms: number | null;
|
280
366
|
prompt_n: number;
|
281
367
|
prompt_per_second: number | null;
|
282
368
|
prompt_per_token_ms: number | null;
|
369
|
+
prompt_ms?: number | null | undefined;
|
283
370
|
}, {
|
284
371
|
predicted_ms: number;
|
285
372
|
predicted_n: number;
|
286
373
|
predicted_per_second: number | null;
|
287
374
|
predicted_per_token_ms: number | null;
|
288
|
-
prompt_ms: number | null;
|
289
375
|
prompt_n: number;
|
290
376
|
prompt_per_second: number | null;
|
291
377
|
prompt_per_token_ms: number | null;
|
378
|
+
prompt_ms?: number | null | undefined;
|
292
379
|
}>;
|
293
380
|
tokens_cached: z.ZodNumber;
|
294
381
|
tokens_evaluated: z.ZodNumber;
|
@@ -318,11 +405,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
318
405
|
presence_penalty: number;
|
319
406
|
repeat_last_n: number;
|
320
407
|
repeat_penalty: number;
|
321
|
-
temp: number;
|
322
408
|
tfs_z: number;
|
323
409
|
top_k: number;
|
324
410
|
top_p: number;
|
325
411
|
typical_p: number;
|
412
|
+
temperature?: number | undefined;
|
326
413
|
};
|
327
414
|
stopped_eos: boolean;
|
328
415
|
stopped_limit: boolean;
|
@@ -333,10 +420,10 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
333
420
|
predicted_n: number;
|
334
421
|
predicted_per_second: number | null;
|
335
422
|
predicted_per_token_ms: number | null;
|
336
|
-
prompt_ms: number | null;
|
337
423
|
prompt_n: number;
|
338
424
|
prompt_per_second: number | null;
|
339
425
|
prompt_per_token_ms: number | null;
|
426
|
+
prompt_ms?: number | null | undefined;
|
340
427
|
};
|
341
428
|
tokens_cached: number;
|
342
429
|
tokens_evaluated: number;
|
@@ -366,11 +453,11 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
366
453
|
presence_penalty: number;
|
367
454
|
repeat_last_n: number;
|
368
455
|
repeat_penalty: number;
|
369
|
-
temp: number;
|
370
456
|
tfs_z: number;
|
371
457
|
top_k: number;
|
372
458
|
top_p: number;
|
373
459
|
typical_p: number;
|
460
|
+
temperature?: number | undefined;
|
374
461
|
};
|
375
462
|
stopped_eos: boolean;
|
376
463
|
stopped_limit: boolean;
|
@@ -381,10 +468,10 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
381
468
|
predicted_n: number;
|
382
469
|
predicted_per_second: number | null;
|
383
470
|
predicted_per_token_ms: number | null;
|
384
|
-
prompt_ms: number | null;
|
385
471
|
prompt_n: number;
|
386
472
|
prompt_per_second: number | null;
|
387
473
|
prompt_per_token_ms: number | null;
|
474
|
+
prompt_ms?: number | null | undefined;
|
388
475
|
};
|
389
476
|
tokens_cached: number;
|
390
477
|
tokens_evaluated: number;
|
@@ -416,11 +503,11 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
|
|
416
503
|
presence_penalty: number;
|
417
504
|
repeat_last_n: number;
|
418
505
|
repeat_penalty: number;
|
419
|
-
temp: number;
|
420
506
|
tfs_z: number;
|
421
507
|
top_k: number;
|
422
508
|
top_p: number;
|
423
509
|
typical_p: number;
|
510
|
+
temperature?: number | undefined;
|
424
511
|
};
|
425
512
|
stopped_eos: boolean;
|
426
513
|
stopped_limit: boolean;
|
@@ -431,10 +518,10 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
|
|
431
518
|
predicted_n: number;
|
432
519
|
predicted_per_second: number | null;
|
433
520
|
predicted_per_token_ms: number | null;
|
434
|
-
prompt_ms: number | null;
|
435
521
|
prompt_n: number;
|
436
522
|
prompt_per_second: number | null;
|
437
523
|
prompt_per_token_ms: number | null;
|
524
|
+
prompt_ms?: number | null | undefined;
|
438
525
|
};
|
439
526
|
tokens_cached: number;
|
440
527
|
tokens_evaluated: number;
|
@@ -445,11 +532,11 @@ declare const llamaCppTextStreamChunkSchema: import("../../core/schema/ZodSchema
|
|
445
532
|
content: string;
|
446
533
|
}>;
|
447
534
|
export type LlamaCppTextStreamChunk = (typeof llamaCppTextStreamChunkSchema)["_type"];
|
448
|
-
export type
|
535
|
+
export type LlamaCppCompletionResponseFormatType<T> = {
|
449
536
|
stream: boolean;
|
450
537
|
handler: ResponseHandler<T>;
|
451
538
|
};
|
452
|
-
export declare const
|
539
|
+
export declare const LlamaCppCompletionResponseFormat: {
|
453
540
|
/**
|
454
541
|
* Returns the response as a JSON object.
|
455
542
|
*/
|
@@ -479,11 +566,11 @@ export declare const LlamaCppTextGenerationResponseFormat: {
|
|
479
566
|
presence_penalty: number;
|
480
567
|
repeat_last_n: number;
|
481
568
|
repeat_penalty: number;
|
482
|
-
temp: number;
|
483
569
|
tfs_z: number;
|
484
570
|
top_k: number;
|
485
571
|
top_p: number;
|
486
572
|
typical_p: number;
|
573
|
+
temperature?: number | undefined;
|
487
574
|
};
|
488
575
|
stopped_eos: boolean;
|
489
576
|
stopped_limit: boolean;
|
@@ -494,10 +581,10 @@ export declare const LlamaCppTextGenerationResponseFormat: {
|
|
494
581
|
predicted_n: number;
|
495
582
|
predicted_per_second: number | null;
|
496
583
|
predicted_per_token_ms: number | null;
|
497
|
-
prompt_ms: number | null;
|
498
584
|
prompt_n: number;
|
499
585
|
prompt_per_second: number | null;
|
500
586
|
prompt_per_token_ms: number | null;
|
587
|
+
prompt_ms?: number | null | undefined;
|
501
588
|
};
|
502
589
|
tokens_cached: number;
|
503
590
|
tokens_evaluated: number;
|
@@ -537,11 +624,11 @@ export declare const LlamaCppTextGenerationResponseFormat: {
|
|
537
624
|
presence_penalty: number;
|
538
625
|
repeat_last_n: number;
|
539
626
|
repeat_penalty: number;
|
540
|
-
temp: number;
|
541
627
|
tfs_z: number;
|
542
628
|
top_k: number;
|
543
629
|
top_p: number;
|
544
630
|
typical_p: number;
|
631
|
+
temperature?: number | undefined;
|
545
632
|
};
|
546
633
|
stopped_eos: boolean;
|
547
634
|
stopped_limit: boolean;
|
@@ -552,10 +639,10 @@ export declare const LlamaCppTextGenerationResponseFormat: {
|
|
552
639
|
predicted_n: number;
|
553
640
|
predicted_per_second: number | null;
|
554
641
|
predicted_per_token_ms: number | null;
|
555
|
-
prompt_ms: number | null;
|
556
642
|
prompt_n: number;
|
557
643
|
prompt_per_second: number | null;
|
558
644
|
prompt_per_token_ms: number | null;
|
645
|
+
prompt_ms?: number | null | undefined;
|
559
646
|
};
|
560
647
|
tokens_cached: number;
|
561
648
|
tokens_evaluated: number;
|
package/model-provider/llamacpp/{LlamaCppTextGenerationModel.js → LlamaCppCompletionModel.js}
RENAMED
@@ -11,7 +11,7 @@ import { parseEventSourceStream } from "../../util/streaming/parseEventSourceStr
|
|
11
11
|
import { LlamaCppApiConfiguration } from "./LlamaCppApiConfiguration.js";
|
12
12
|
import { failedLlamaCppCallResponseHandler } from "./LlamaCppError.js";
|
13
13
|
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
|
14
|
-
export class
|
14
|
+
export class LlamaCppCompletionModel extends AbstractModel {
|
15
15
|
constructor(settings = {}) {
|
16
16
|
super({ settings });
|
17
17
|
Object.defineProperty(this, "provider", {
|
@@ -53,10 +53,10 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
53
53
|
data,
|
54
54
|
}))
|
55
55
|
: undefined,
|
56
|
-
cache_prompt: this.settings.cachePrompt,
|
57
56
|
temperature: this.settings.temperature,
|
58
57
|
top_k: this.settings.topK,
|
59
58
|
top_p: this.settings.topP,
|
59
|
+
min_p: this.settings.minP,
|
60
60
|
n_predict: this.settings.maxGenerationTokens,
|
61
61
|
n_keep: this.settings.nKeep,
|
62
62
|
stop: this.settings.stopSequences,
|
@@ -65,12 +65,19 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
65
65
|
repeat_penalty: this.settings.repeatPenalty,
|
66
66
|
repeat_last_n: this.settings.repeatLastN,
|
67
67
|
penalize_nl: this.settings.penalizeNl,
|
68
|
+
presence_penalty: this.settings.presencePenalty,
|
69
|
+
frequency_penalty: this.settings.frequencyPenalty,
|
70
|
+
penalty_prompt: this.settings.penaltyPrompt,
|
68
71
|
mirostat: this.settings.mirostat,
|
69
72
|
mirostat_tau: this.settings.mirostatTau,
|
70
73
|
mirostat_eta: this.settings.mirostatEta,
|
74
|
+
grammar: this.settings.grammar,
|
71
75
|
seed: this.settings.seed,
|
72
76
|
ignore_eos: this.settings.ignoreEos,
|
73
77
|
logit_bias: this.settings.logitBias,
|
78
|
+
n_probs: this.settings.nProbs,
|
79
|
+
cache_prompt: this.settings.cachePrompt,
|
80
|
+
slot_id: this.settings.slotId,
|
74
81
|
},
|
75
82
|
failedResponseHandler: failedLlamaCppCallResponseHandler,
|
76
83
|
successfulResponseHandler: responseFormat.handler,
|
@@ -82,22 +89,29 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
82
89
|
const eventSettingProperties = [
|
83
90
|
...textGenerationModelProperties,
|
84
91
|
"contextWindowSize",
|
85
|
-
"cachePrompt",
|
86
92
|
"temperature",
|
87
93
|
"topK",
|
88
94
|
"topP",
|
95
|
+
"minP",
|
89
96
|
"nKeep",
|
90
97
|
"tfsZ",
|
91
98
|
"typicalP",
|
92
99
|
"repeatPenalty",
|
93
100
|
"repeatLastN",
|
94
101
|
"penalizeNl",
|
102
|
+
"presencePenalty",
|
103
|
+
"frequencyPenalty",
|
104
|
+
"penaltyPrompt",
|
95
105
|
"mirostat",
|
96
106
|
"mirostatTau",
|
97
107
|
"mirostatEta",
|
108
|
+
"grammar",
|
98
109
|
"seed",
|
99
110
|
"ignoreEos",
|
100
111
|
"logitBias",
|
112
|
+
"nProbs",
|
113
|
+
"cachePrompt",
|
114
|
+
"slotId",
|
101
115
|
];
|
102
116
|
return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
|
103
117
|
}
|
@@ -108,7 +122,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
108
122
|
async doGenerateTexts(prompt, options) {
|
109
123
|
const response = await this.callAPI(prompt, {
|
110
124
|
...options,
|
111
|
-
responseFormat:
|
125
|
+
responseFormat: LlamaCppCompletionResponseFormat.json,
|
112
126
|
});
|
113
127
|
return {
|
114
128
|
response,
|
@@ -132,7 +146,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
132
146
|
doStreamText(prompt, options) {
|
133
147
|
return this.callAPI(prompt, {
|
134
148
|
...options,
|
135
|
-
responseFormat:
|
149
|
+
responseFormat: LlamaCppCompletionResponseFormat.deltaIterable,
|
136
150
|
});
|
137
151
|
}
|
138
152
|
extractTextDelta(delta) {
|
@@ -175,7 +189,7 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
175
189
|
});
|
176
190
|
}
|
177
191
|
withSettings(additionalSettings) {
|
178
|
-
return new
|
192
|
+
return new LlamaCppCompletionModel(Object.assign({}, this.settings, additionalSettings));
|
179
193
|
}
|
180
194
|
}
|
181
195
|
const llamaCppTextGenerationResponseSchema = z.object({
|
@@ -200,7 +214,7 @@ const llamaCppTextGenerationResponseSchema = z.object({
|
|
200
214
|
seed: z.number(),
|
201
215
|
stop: z.array(z.string()),
|
202
216
|
stream: z.boolean(),
|
203
|
-
|
217
|
+
temperature: z.number().optional(), // optional for backwards compatibility
|
204
218
|
tfs_z: z.number(),
|
205
219
|
top_k: z.number(),
|
206
220
|
top_p: z.number(),
|
@@ -217,7 +231,7 @@ const llamaCppTextGenerationResponseSchema = z.object({
|
|
217
231
|
predicted_n: z.number(),
|
218
232
|
predicted_per_second: z.number().nullable(),
|
219
233
|
predicted_per_token_ms: z.number().nullable(),
|
220
|
-
prompt_ms: z.number().nullable(),
|
234
|
+
prompt_ms: z.number().nullable().optional(),
|
221
235
|
prompt_n: z.number(),
|
222
236
|
prompt_per_second: z.number().nullable(),
|
223
237
|
prompt_per_token_ms: z.number().nullable(),
|
@@ -263,7 +277,7 @@ async function createLlamaCppFullDeltaIterableQueue(stream) {
|
|
263
277
|
});
|
264
278
|
return queue;
|
265
279
|
}
|
266
|
-
export const
|
280
|
+
export const LlamaCppCompletionResponseFormat = {
|
267
281
|
/**
|
268
282
|
* Returns the response as a JSON object.
|
269
283
|
*/
|
@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const streamText_js_1 = require("../../model-function/generate-text/streamText.cjs");
|
4
4
|
const StreamingTestServer_js_1 = require("../../test/StreamingTestServer.cjs");
|
5
5
|
const arrayFromAsync_js_1 = require("../../test/arrayFromAsync.cjs");
|
6
|
-
const
|
6
|
+
const LlamaCppCompletionModel_js_1 = require("./LlamaCppCompletionModel.cjs");
|
7
7
|
describe("streamText", () => {
|
8
8
|
const server = new StreamingTestServer_js_1.StreamingTestServer("http://127.0.0.1:8080/completion");
|
9
9
|
server.setupTestEnvironment();
|
@@ -17,7 +17,7 @@ describe("streamText", () => {
|
|
17
17
|
`"mirostat_eta":0.10000000149011612,"mirostat_tau":5.0,"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
|
18
18
|
`"n_ctx":4096,"n_keep":0,"n_predict":-1,"n_probs":0,"penalize_nl":true,"penalty_prompt_tokens":[],` +
|
19
19
|
`"presence_penalty":0.0,"repeat_last_n":64,"repeat_penalty":1.100000023841858,"seed":4294967295,` +
|
20
|
-
`"stop":[],"stream":true,"
|
20
|
+
`"stop":[],"stream":true,"temperature":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
|
21
21
|
`"typical_p":1.0,"use_penalty_prompt_tokens":false},"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
|
22
22
|
`"prompt":"hello","slot_id":0,"stop":true,"stopped_eos":true,"stopped_limit":false,` +
|
23
23
|
`"stopped_word":false,"stopping_word":"","timings":{"predicted_ms":1054.704,"predicted_n":69,` +
|
@@ -26,7 +26,7 @@ describe("streamText", () => {
|
|
26
26
|
`"prompt_per_token_ms":48.845600000000005},"tokens_cached":74,"tokens_evaluated":5,` +
|
27
27
|
`"tokens_predicted":69,"truncated":false}\n\n`,
|
28
28
|
];
|
29
|
-
const stream = await (0, streamText_js_1.streamText)(new
|
29
|
+
const stream = await (0, streamText_js_1.streamText)(new LlamaCppCompletionModel_js_1.LlamaCppCompletionModel().withTextPrompt(), "hello");
|
30
30
|
// note: space moved to last chunk bc of trimming
|
31
31
|
expect(await (0, arrayFromAsync_js_1.arrayFromAsync)(stream)).toStrictEqual([
|
32
32
|
"Hello",
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { streamText } from "../../model-function/generate-text/streamText.js";
|
2
2
|
import { StreamingTestServer } from "../../test/StreamingTestServer.js";
|
3
3
|
import { arrayFromAsync } from "../../test/arrayFromAsync.js";
|
4
|
-
import {
|
4
|
+
import { LlamaCppCompletionModel } from "./LlamaCppCompletionModel.js";
|
5
5
|
describe("streamText", () => {
|
6
6
|
const server = new StreamingTestServer("http://127.0.0.1:8080/completion");
|
7
7
|
server.setupTestEnvironment();
|
@@ -15,7 +15,7 @@ describe("streamText", () => {
|
|
15
15
|
`"mirostat_eta":0.10000000149011612,"mirostat_tau":5.0,"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
|
16
16
|
`"n_ctx":4096,"n_keep":0,"n_predict":-1,"n_probs":0,"penalize_nl":true,"penalty_prompt_tokens":[],` +
|
17
17
|
`"presence_penalty":0.0,"repeat_last_n":64,"repeat_penalty":1.100000023841858,"seed":4294967295,` +
|
18
|
-
`"stop":[],"stream":true,"
|
18
|
+
`"stop":[],"stream":true,"temperature":0.800000011920929,"tfs_z":1.0,"top_k":40,"top_p":0.949999988079071,` +
|
19
19
|
`"typical_p":1.0,"use_penalty_prompt_tokens":false},"model":"models/llama-2-7b-chat.Q4_K_M.gguf",` +
|
20
20
|
`"prompt":"hello","slot_id":0,"stop":true,"stopped_eos":true,"stopped_limit":false,` +
|
21
21
|
`"stopped_word":false,"stopping_word":"","timings":{"predicted_ms":1054.704,"predicted_n":69,` +
|
@@ -24,7 +24,7 @@ describe("streamText", () => {
|
|
24
24
|
`"prompt_per_token_ms":48.845600000000005},"tokens_cached":74,"tokens_evaluated":5,` +
|
25
25
|
`"tokens_predicted":69,"truncated":false}\n\n`,
|
26
26
|
];
|
27
|
-
const stream = await streamText(new
|
27
|
+
const stream = await streamText(new LlamaCppCompletionModel().withTextPrompt(), "hello");
|
28
28
|
// note: space moved to last chunk bc of trimming
|
29
29
|
expect(await arrayFromAsync(stream)).toStrictEqual([
|
30
30
|
"Hello",
|
@@ -3,10 +3,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.Tokenizer = exports.TextEmbedder = exports.TextGenerator = void 0;
|
4
4
|
const LlamaCppApiConfiguration_js_1 = require("./LlamaCppApiConfiguration.cjs");
|
5
5
|
const LlamaCppTextEmbeddingModel_js_1 = require("./LlamaCppTextEmbeddingModel.cjs");
|
6
|
-
const
|
6
|
+
const LlamaCppCompletionModel_js_1 = require("./LlamaCppCompletionModel.cjs");
|
7
7
|
const LlamaCppTokenizer_js_1 = require("./LlamaCppTokenizer.cjs");
|
8
8
|
function TextGenerator(settings = {}) {
|
9
|
-
return new
|
9
|
+
return new LlamaCppCompletionModel_js_1.LlamaCppCompletionModel(settings);
|
10
10
|
}
|
11
11
|
exports.TextGenerator = TextGenerator;
|
12
12
|
function TextEmbedder(settings = {}) {
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
|
2
2
|
import { LlamaCppTextEmbeddingModel, LlamaCppTextEmbeddingModelSettings } from "./LlamaCppTextEmbeddingModel.js";
|
3
|
-
import {
|
3
|
+
import { LlamaCppCompletionModel, LlamaCppCompletionModelSettings } from "./LlamaCppCompletionModel.js";
|
4
4
|
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
|
5
|
-
export declare function TextGenerator<CONTEXT_WINDOW_SIZE extends number>(settings?:
|
5
|
+
export declare function TextGenerator<CONTEXT_WINDOW_SIZE extends number>(settings?: LlamaCppCompletionModelSettings<CONTEXT_WINDOW_SIZE>): LlamaCppCompletionModel<CONTEXT_WINDOW_SIZE>;
|
6
6
|
export declare function TextEmbedder(settings?: LlamaCppTextEmbeddingModelSettings): LlamaCppTextEmbeddingModel;
|
7
7
|
export declare function Tokenizer(api?: ApiConfiguration): LlamaCppTokenizer;
|
@@ -1,9 +1,9 @@
|
|
1
1
|
import { LlamaCppApiConfiguration } from "./LlamaCppApiConfiguration.js";
|
2
2
|
import { LlamaCppTextEmbeddingModel, } from "./LlamaCppTextEmbeddingModel.js";
|
3
|
-
import {
|
3
|
+
import { LlamaCppCompletionModel, } from "./LlamaCppCompletionModel.js";
|
4
4
|
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
|
5
5
|
export function TextGenerator(settings = {}) {
|
6
|
-
return new
|
6
|
+
return new LlamaCppCompletionModel(settings);
|
7
7
|
}
|
8
8
|
export function TextEmbedder(settings = {}) {
|
9
9
|
return new LlamaCppTextEmbeddingModel(settings);
|
@@ -33,5 +33,5 @@ var LlamaCppError_js_1 = require("./LlamaCppError.cjs");
|
|
33
33
|
Object.defineProperty(exports, "LlamaCppError", { enumerable: true, get: function () { return LlamaCppError_js_1.LlamaCppError; } });
|
34
34
|
exports.llamacpp = __importStar(require("./LlamaCppFacade.cjs"));
|
35
35
|
__exportStar(require("./LlamaCppTextEmbeddingModel.cjs"), exports);
|
36
|
-
__exportStar(require("./
|
36
|
+
__exportStar(require("./LlamaCppCompletionModel.cjs"), exports);
|
37
37
|
__exportStar(require("./LlamaCppTokenizer.cjs"), exports);
|