modelfusion 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -5
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs +1 -1
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js +1 -1
- package/model-function/Model.d.ts +2 -2
- package/model-function/generate-text/TextGenerationModel.d.ts +15 -15
- package/model-function/generate-text/generateText.cjs +2 -2
- package/model-function/generate-text/generateText.js +2 -2
- package/model-provider/cohere/CohereTextGenerationModel.cjs +15 -16
- package/model-provider/cohere/CohereTextGenerationModel.d.ts +1 -6
- package/model-provider/cohere/CohereTextGenerationModel.js +15 -16
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.cjs +9 -14
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.d.ts +1 -5
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.js +9 -14
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.cjs +12 -12
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.d.ts +6 -11
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.js +12 -12
- package/model-provider/openai/OpenAITextGenerationModel.cjs +16 -14
- package/model-provider/openai/OpenAITextGenerationModel.d.ts +1 -6
- package/model-provider/openai/OpenAITextGenerationModel.js +16 -14
- package/model-provider/openai/chat/OpenAIChatModel.cjs +11 -14
- package/model-provider/openai/chat/OpenAIChatModel.d.ts +2 -5
- package/model-provider/openai/chat/OpenAIChatModel.js +11 -14
- package/package.json +3 -3
- package/prompt/AlpacaPromptFormat.cjs +1 -1
- package/prompt/AlpacaPromptFormat.js +1 -1
- package/prompt/Llama2PromptFormat.cjs +2 -2
- package/prompt/Llama2PromptFormat.js +2 -2
- package/prompt/OpenAIChatPromptFormat.cjs +2 -2
- package/prompt/OpenAIChatPromptFormat.js +2 -2
- package/prompt/PromptFormat.d.ts +2 -2
- package/prompt/PromptFormatTextGenerationModel.cjs +3 -16
- package/prompt/PromptFormatTextGenerationModel.d.ts +0 -3
- package/prompt/PromptFormatTextGenerationModel.js +3 -16
- package/prompt/TextPromptFormat.cjs +2 -2
- package/prompt/TextPromptFormat.js +2 -2
- package/prompt/VicunaPromptFormat.cjs +1 -1
- package/prompt/VicunaPromptFormat.js +1 -1
- package/prompt/chat/trimChatPrompt.cjs +1 -1
- package/prompt/chat/trimChatPrompt.js +1 -1
package/README.md
CHANGED
@@ -58,7 +58,10 @@ const text = await generateText(
|
|
58
58
|
|
59
59
|
```ts
|
60
60
|
const textStream = await streamText(
|
61
|
-
new OpenAIChatModel({
|
61
|
+
new OpenAIChatModel({
|
62
|
+
model: "gpt-3.5-turbo",
|
63
|
+
maxCompletionTokens: 1000,
|
64
|
+
}),
|
62
65
|
[
|
63
66
|
OpenAIChatMessage.system("You are a story writer."),
|
64
67
|
OpenAIChatMessage.user("Write a story about a robot learning to love"),
|
@@ -78,7 +81,7 @@ for await (const textFragment of textStream) {
|
|
78
81
|
const text = await generateText(
|
79
82
|
new LlamaCppTextGenerationModel({
|
80
83
|
contextWindowSize: 4096, // Llama 2 context window size
|
81
|
-
|
84
|
+
maxCompletionTokens: 1000,
|
82
85
|
}).withPromptFormat(Llama2InstructionPromptFormat()),
|
83
86
|
{
|
84
87
|
system: "You are a story writer.",
|
@@ -111,7 +114,7 @@ ModelFusion model functions return rich results that include the original respon
|
|
111
114
|
const { response, metadata } = await generateText(
|
112
115
|
new OpenAITextGenerationModel({
|
113
116
|
model: "text-davinci-003",
|
114
|
-
|
117
|
+
maxCompletionTokens: 1000,
|
115
118
|
n: 2, // generate 2 completions
|
116
119
|
}),
|
117
120
|
"Write a short story about a robot learning to love:\n\n",
|
@@ -134,7 +137,7 @@ const value = await generateJson(
|
|
134
137
|
new OpenAIChatModel({
|
135
138
|
model: "gpt-3.5-turbo",
|
136
139
|
temperature: 0,
|
137
|
-
|
140
|
+
maxCompletionTokens: 50,
|
138
141
|
}),
|
139
142
|
{
|
140
143
|
name: "sentiment" as const,
|
@@ -165,7 +168,10 @@ It either matches one of the schemas or is text reponse.
|
|
165
168
|
|
166
169
|
```ts
|
167
170
|
const { schema, value, text } = await generateJsonOrText(
|
168
|
-
new OpenAIChatModel({
|
171
|
+
new OpenAIChatModel({
|
172
|
+
model: "gpt-3.5-turbo",
|
173
|
+
maxCompletionTokens: 1000,
|
174
|
+
}),
|
169
175
|
[
|
170
176
|
{
|
171
177
|
name: "getCurrentWeather" as const, // mark 'as const' for type inference
|
package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs
CHANGED
@@ -10,7 +10,7 @@ const summarizeRecursively_js_1 = require("./summarizeRecursively.cjs");
|
|
10
10
|
* while leaving enough space for the model to generate text.
|
11
11
|
*/
|
12
12
|
async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
|
13
|
-
(model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
|
13
|
+
(model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
|
14
14
|
const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
|
15
15
|
return (0, summarizeRecursively_js_1.summarizeRecursively)({
|
16
16
|
split: (0, splitRecursively_js_1.splitAtToken)({
|
package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js
CHANGED
@@ -7,7 +7,7 @@ import { summarizeRecursively } from "./summarizeRecursively.js";
|
|
7
7
|
* while leaving enough space for the model to generate text.
|
8
8
|
*/
|
9
9
|
export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, model, prompt, tokenLimit = model.contextWindowSize -
|
10
|
-
(model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
|
10
|
+
(model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
|
11
11
|
const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
|
12
12
|
return summarizeRecursively({
|
13
13
|
split: splitAtToken({
|
@@ -12,11 +12,11 @@ export interface Model<SETTINGS> {
|
|
12
12
|
* @example
|
13
13
|
* const model = new OpenAITextGenerationModel({
|
14
14
|
* model: "text-davinci-003",
|
15
|
-
*
|
15
|
+
* maxCompletionTokens: 500,
|
16
16
|
* });
|
17
17
|
*
|
18
18
|
* const modelWithMoreTokens = model.withSettings({
|
19
|
-
*
|
19
|
+
* maxCompletionTokens: 1000,
|
20
20
|
* });
|
21
21
|
*/
|
22
22
|
withSettings(additionalSettings: Partial<SETTINGS>): this;
|
@@ -5,7 +5,21 @@ import { Model, ModelSettings } from "../Model.js";
|
|
5
5
|
import { BasicTokenizer, FullTokenizer } from "../tokenize-text/Tokenizer.js";
|
6
6
|
import { DeltaEvent } from "./DeltaEvent.js";
|
7
7
|
export interface TextGenerationModelSettings extends ModelSettings {
|
8
|
-
|
8
|
+
/**
|
9
|
+
* Maximum number of tokens to generate.
|
10
|
+
* Does nothing if the model does not support this setting.
|
11
|
+
*/
|
12
|
+
maxCompletionTokens?: number | undefined;
|
13
|
+
/**
|
14
|
+
* Stop sequences to use. Stop sequences are not included in the generated text.
|
15
|
+
* Does nothing if the model does not support this setting.
|
16
|
+
*/
|
17
|
+
stopSequences?: string[] | undefined;
|
18
|
+
/**
|
19
|
+
* When true, the leading and trailing white space and line terminator characters
|
20
|
+
* are removed from the generated text.
|
21
|
+
*/
|
22
|
+
trimWhitespace?: boolean;
|
9
23
|
}
|
10
24
|
export interface TextGenerationModel<PROMPT, RESPONSE, FULL_DELTA, SETTINGS extends TextGenerationModelSettings> extends Model<SETTINGS> {
|
11
25
|
readonly contextWindowSize: number | undefined;
|
@@ -25,18 +39,4 @@ export interface TextGenerationModel<PROMPT, RESPONSE, FULL_DELTA, SETTINGS exte
|
|
25
39
|
*/
|
26
40
|
readonly extractTextDelta: ((fullDelta: FULL_DELTA) => string | undefined) | undefined;
|
27
41
|
withPromptFormat<INPUT_PROMPT>(promptFormat: PromptFormat<INPUT_PROMPT, PROMPT>): PromptFormatTextGenerationModel<INPUT_PROMPT, PROMPT, RESPONSE, FULL_DELTA, SETTINGS, this>;
|
28
|
-
/**
|
29
|
-
* Maximum number of tokens to generate.
|
30
|
-
*/
|
31
|
-
readonly maxCompletionTokens: number | undefined;
|
32
|
-
/**
|
33
|
-
* Sets the maximum number of tokens to generate.
|
34
|
-
* Does nothing if the model does not support this setting.
|
35
|
-
*/
|
36
|
-
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
37
|
-
/**
|
38
|
-
* Sets the stop tokens to use. Stop tokens are not included in the generated text.
|
39
|
-
* Does nothing if the model does not support this setting.
|
40
|
-
*/
|
41
|
-
withStopTokens(stopTokens: string[]): this;
|
42
42
|
}
|
@@ -10,8 +10,8 @@ model, prompt, options) {
|
|
10
10
|
options,
|
11
11
|
generateResponse: (options) => model.generateTextResponse(prompt, options),
|
12
12
|
extractOutputValue: (result) => {
|
13
|
-
const
|
14
|
-
return
|
13
|
+
const shouldTrimWhitespace = model.settings.trimWhitespace ?? true;
|
14
|
+
return shouldTrimWhitespace
|
15
15
|
? model.extractText(result).trim()
|
16
16
|
: model.extractText(result);
|
17
17
|
},
|
@@ -7,8 +7,8 @@ model, prompt, options) {
|
|
7
7
|
options,
|
8
8
|
generateResponse: (options) => model.generateTextResponse(prompt, options),
|
9
9
|
extractOutputValue: (result) => {
|
10
|
-
const
|
11
|
-
return
|
10
|
+
const shouldTrimWhitespace = model.settings.trimWhitespace ?? true;
|
11
|
+
return shouldTrimWhitespace
|
12
12
|
? model.extractText(result).trim()
|
13
13
|
: model.extractText(result);
|
14
14
|
},
|
@@ -37,7 +37,7 @@ exports.COHERE_TEXT_GENERATION_MODELS = {
|
|
37
37
|
* const model = new CohereTextGenerationModel({
|
38
38
|
* model: "command-nightly",
|
39
39
|
* temperature: 0.7,
|
40
|
-
*
|
40
|
+
* maxCompletionTokens: 500,
|
41
41
|
* });
|
42
42
|
*
|
43
43
|
* const text = await generateText(
|
@@ -91,13 +91,21 @@ class CohereTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
91
91
|
}
|
92
92
|
async callAPI(prompt, options) {
|
93
93
|
const { run, settings, responseFormat } = options;
|
94
|
-
const
|
94
|
+
const combinedSettings = {
|
95
|
+
...this.settings,
|
96
|
+
settings,
|
97
|
+
};
|
98
|
+
const callSettings = {
|
95
99
|
apiKey: this.apiKey,
|
96
|
-
|
100
|
+
...combinedSettings,
|
101
|
+
// use endSequences instead of stopSequences
|
102
|
+
// to exclude stop tokens from the generated text
|
103
|
+
endSequences: combinedSettings.stopSequences,
|
104
|
+
maxTokens: combinedSettings.maxCompletionTokens,
|
97
105
|
abortSignal: run?.abortSignal,
|
98
106
|
prompt,
|
99
107
|
responseFormat,
|
100
|
-
}
|
108
|
+
};
|
101
109
|
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
102
110
|
retry: this.settings.retry,
|
103
111
|
throttle: this.settings.throttle,
|
@@ -124,24 +132,15 @@ class CohereTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
124
132
|
}
|
125
133
|
withPromptFormat(promptFormat) {
|
126
134
|
return new PromptFormatTextGenerationModel_js_1.PromptFormatTextGenerationModel({
|
127
|
-
model: this.
|
135
|
+
model: this.withSettings({
|
136
|
+
stopSequences: promptFormat.stopSequences,
|
137
|
+
}),
|
128
138
|
promptFormat,
|
129
139
|
});
|
130
140
|
}
|
131
141
|
withSettings(additionalSettings) {
|
132
142
|
return new CohereTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
133
143
|
}
|
134
|
-
get maxCompletionTokens() {
|
135
|
-
return this.settings.maxTokens;
|
136
|
-
}
|
137
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
138
|
-
return this.withSettings({ maxTokens: maxCompletionTokens });
|
139
|
-
}
|
140
|
-
withStopTokens(stopTokens) {
|
141
|
-
// use endSequences instead of stopSequences
|
142
|
-
// to exclude stop tokens from the generated text
|
143
|
-
return this.withSettings({ endSequences: stopTokens });
|
144
|
-
}
|
145
144
|
}
|
146
145
|
exports.CohereTextGenerationModel = CohereTextGenerationModel;
|
147
146
|
const cohereTextGenerationResponseSchema = zod_1.z.object({
|
@@ -35,13 +35,11 @@ export interface CohereTextGenerationModelSettings extends TextGenerationModelSe
|
|
35
35
|
throttle?: ThrottleFunction;
|
36
36
|
};
|
37
37
|
numGenerations?: number;
|
38
|
-
maxTokens?: number;
|
39
38
|
temperature?: number;
|
40
39
|
k?: number;
|
41
40
|
p?: number;
|
42
41
|
frequencyPenalty?: number;
|
43
42
|
presencePenalty?: number;
|
44
|
-
endSequences?: string[];
|
45
43
|
stopSequences?: string[];
|
46
44
|
returnLikelihoods?: "GENERATION" | "ALL" | "NONE";
|
47
45
|
logitBias?: Record<string, number>;
|
@@ -56,7 +54,7 @@ export interface CohereTextGenerationModelSettings extends TextGenerationModelSe
|
|
56
54
|
* const model = new CohereTextGenerationModel({
|
57
55
|
* model: "command-nightly",
|
58
56
|
* temperature: 0.7,
|
59
|
-
*
|
57
|
+
* maxCompletionTokens: 500,
|
60
58
|
* });
|
61
59
|
*
|
62
60
|
* const text = await generateText(
|
@@ -94,9 +92,6 @@ export declare class CohereTextGenerationModel extends AbstractModel<CohereTextG
|
|
94
92
|
extractTextDelta(fullDelta: CohereTextGenerationDelta): string | undefined;
|
95
93
|
withPromptFormat<INPUT_PROMPT>(promptFormat: PromptFormat<INPUT_PROMPT, string>): PromptFormatTextGenerationModel<INPUT_PROMPT, string, CohereTextGenerationResponse, CohereTextGenerationDelta, CohereTextGenerationModelSettings, this>;
|
96
94
|
withSettings(additionalSettings: Partial<CohereTextGenerationModelSettings>): this;
|
97
|
-
get maxCompletionTokens(): number | undefined;
|
98
|
-
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
99
|
-
withStopTokens(stopTokens: string[]): this;
|
100
95
|
}
|
101
96
|
declare const cohereTextGenerationResponseSchema: z.ZodObject<{
|
102
97
|
id: z.ZodString;
|
@@ -31,7 +31,7 @@ export const COHERE_TEXT_GENERATION_MODELS = {
|
|
31
31
|
* const model = new CohereTextGenerationModel({
|
32
32
|
* model: "command-nightly",
|
33
33
|
* temperature: 0.7,
|
34
|
-
*
|
34
|
+
* maxCompletionTokens: 500,
|
35
35
|
* });
|
36
36
|
*
|
37
37
|
* const text = await generateText(
|
@@ -85,13 +85,21 @@ export class CohereTextGenerationModel extends AbstractModel {
|
|
85
85
|
}
|
86
86
|
async callAPI(prompt, options) {
|
87
87
|
const { run, settings, responseFormat } = options;
|
88
|
-
const
|
88
|
+
const combinedSettings = {
|
89
|
+
...this.settings,
|
90
|
+
settings,
|
91
|
+
};
|
92
|
+
const callSettings = {
|
89
93
|
apiKey: this.apiKey,
|
90
|
-
|
94
|
+
...combinedSettings,
|
95
|
+
// use endSequences instead of stopSequences
|
96
|
+
// to exclude stop tokens from the generated text
|
97
|
+
endSequences: combinedSettings.stopSequences,
|
98
|
+
maxTokens: combinedSettings.maxCompletionTokens,
|
91
99
|
abortSignal: run?.abortSignal,
|
92
100
|
prompt,
|
93
101
|
responseFormat,
|
94
|
-
}
|
102
|
+
};
|
95
103
|
return callWithRetryAndThrottle({
|
96
104
|
retry: this.settings.retry,
|
97
105
|
throttle: this.settings.throttle,
|
@@ -118,24 +126,15 @@ export class CohereTextGenerationModel extends AbstractModel {
|
|
118
126
|
}
|
119
127
|
withPromptFormat(promptFormat) {
|
120
128
|
return new PromptFormatTextGenerationModel({
|
121
|
-
model: this.
|
129
|
+
model: this.withSettings({
|
130
|
+
stopSequences: promptFormat.stopSequences,
|
131
|
+
}),
|
122
132
|
promptFormat,
|
123
133
|
});
|
124
134
|
}
|
125
135
|
withSettings(additionalSettings) {
|
126
136
|
return new CohereTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
127
137
|
}
|
128
|
-
get maxCompletionTokens() {
|
129
|
-
return this.settings.maxTokens;
|
130
|
-
}
|
131
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
132
|
-
return this.withSettings({ maxTokens: maxCompletionTokens });
|
133
|
-
}
|
134
|
-
withStopTokens(stopTokens) {
|
135
|
-
// use endSequences instead of stopSequences
|
136
|
-
// to exclude stop tokens from the generated text
|
137
|
-
return this.withSettings({ endSequences: stopTokens });
|
138
|
-
}
|
139
138
|
}
|
140
139
|
const cohereTextGenerationResponseSchema = z.object({
|
141
140
|
id: z.string(),
|
@@ -19,7 +19,7 @@ const PromptFormatTextGenerationModel_js_1 = require("../../prompt/PromptFormatT
|
|
19
19
|
* const model = new HuggingFaceTextGenerationModel({
|
20
20
|
* model: "tiiuae/falcon-7b",
|
21
21
|
* temperature: 0.7,
|
22
|
-
*
|
22
|
+
* maxCompletionTokens: 500,
|
23
23
|
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
24
24
|
* });
|
25
25
|
*
|
@@ -81,16 +81,21 @@ class HuggingFaceTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
81
81
|
async callAPI(prompt, options) {
|
82
82
|
const run = options?.run;
|
83
83
|
const settings = options?.settings;
|
84
|
-
const
|
84
|
+
const combinedSettings = {
|
85
|
+
...this.settings,
|
86
|
+
...settings,
|
87
|
+
};
|
88
|
+
const callSettings = {
|
85
89
|
apiKey: this.apiKey,
|
86
90
|
options: {
|
87
91
|
useCache: true,
|
88
92
|
waitForModel: true,
|
89
93
|
},
|
90
|
-
|
94
|
+
...combinedSettings,
|
95
|
+
maxNewTokens: combinedSettings.maxCompletionTokens,
|
91
96
|
abortSignal: run?.abortSignal,
|
92
97
|
inputs: prompt,
|
93
|
-
}
|
98
|
+
};
|
94
99
|
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
95
100
|
retry: this.settings.retry,
|
96
101
|
throttle: this.settings.throttle,
|
@@ -112,16 +117,6 @@ class HuggingFaceTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
112
117
|
withSettings(additionalSettings) {
|
113
118
|
return new HuggingFaceTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
114
119
|
}
|
115
|
-
get maxCompletionTokens() {
|
116
|
-
return this.settings.maxNewTokens;
|
117
|
-
}
|
118
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
119
|
-
return this.withSettings({ maxNewTokens: maxCompletionTokens });
|
120
|
-
}
|
121
|
-
withStopTokens() {
|
122
|
-
// stop tokens are not supported by the HuggingFace API
|
123
|
-
return this;
|
124
|
-
}
|
125
120
|
}
|
126
121
|
exports.HuggingFaceTextGenerationModel = HuggingFaceTextGenerationModel;
|
127
122
|
const huggingFaceTextGenerationResponseSchema = zod_1.default.array(zod_1.default.object({
|
@@ -16,7 +16,6 @@ export interface HuggingFaceTextGenerationModelSettings extends TextGenerationMo
|
|
16
16
|
topP?: number;
|
17
17
|
temperature?: number;
|
18
18
|
repetitionPenalty?: number;
|
19
|
-
maxNewTokens?: number;
|
20
19
|
maxTime?: number;
|
21
20
|
numReturnSequences?: number;
|
22
21
|
doSample?: boolean;
|
@@ -34,7 +33,7 @@ export interface HuggingFaceTextGenerationModelSettings extends TextGenerationMo
|
|
34
33
|
* const model = new HuggingFaceTextGenerationModel({
|
35
34
|
* model: "tiiuae/falcon-7b",
|
36
35
|
* temperature: 0.7,
|
37
|
-
*
|
36
|
+
* maxCompletionTokens: 500,
|
38
37
|
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
39
38
|
* });
|
40
39
|
*
|
@@ -60,9 +59,6 @@ export declare class HuggingFaceTextGenerationModel extends AbstractModel<Huggin
|
|
60
59
|
extractTextDelta: undefined;
|
61
60
|
withPromptFormat<INPUT_PROMPT>(promptFormat: PromptFormat<INPUT_PROMPT, string>): PromptFormatTextGenerationModel<INPUT_PROMPT, string, HuggingFaceTextGenerationResponse, undefined, HuggingFaceTextGenerationModelSettings, this>;
|
62
61
|
withSettings(additionalSettings: Partial<HuggingFaceTextGenerationModelSettings>): this;
|
63
|
-
get maxCompletionTokens(): number | undefined;
|
64
|
-
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
65
|
-
withStopTokens(): this;
|
66
62
|
}
|
67
63
|
declare const huggingFaceTextGenerationResponseSchema: z.ZodArray<z.ZodObject<{
|
68
64
|
generated_text: z.ZodString;
|
@@ -13,7 +13,7 @@ import { PromptFormatTextGenerationModel } from "../../prompt/PromptFormatTextGe
|
|
13
13
|
* const model = new HuggingFaceTextGenerationModel({
|
14
14
|
* model: "tiiuae/falcon-7b",
|
15
15
|
* temperature: 0.7,
|
16
|
-
*
|
16
|
+
* maxCompletionTokens: 500,
|
17
17
|
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
18
18
|
* });
|
19
19
|
*
|
@@ -75,16 +75,21 @@ export class HuggingFaceTextGenerationModel extends AbstractModel {
|
|
75
75
|
async callAPI(prompt, options) {
|
76
76
|
const run = options?.run;
|
77
77
|
const settings = options?.settings;
|
78
|
-
const
|
78
|
+
const combinedSettings = {
|
79
|
+
...this.settings,
|
80
|
+
...settings,
|
81
|
+
};
|
82
|
+
const callSettings = {
|
79
83
|
apiKey: this.apiKey,
|
80
84
|
options: {
|
81
85
|
useCache: true,
|
82
86
|
waitForModel: true,
|
83
87
|
},
|
84
|
-
|
88
|
+
...combinedSettings,
|
89
|
+
maxNewTokens: combinedSettings.maxCompletionTokens,
|
85
90
|
abortSignal: run?.abortSignal,
|
86
91
|
inputs: prompt,
|
87
|
-
}
|
92
|
+
};
|
88
93
|
return callWithRetryAndThrottle({
|
89
94
|
retry: this.settings.retry,
|
90
95
|
throttle: this.settings.throttle,
|
@@ -106,16 +111,6 @@ export class HuggingFaceTextGenerationModel extends AbstractModel {
|
|
106
111
|
withSettings(additionalSettings) {
|
107
112
|
return new HuggingFaceTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
108
113
|
}
|
109
|
-
get maxCompletionTokens() {
|
110
|
-
return this.settings.maxNewTokens;
|
111
|
-
}
|
112
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
113
|
-
return this.withSettings({ maxNewTokens: maxCompletionTokens });
|
114
|
-
}
|
115
|
-
withStopTokens() {
|
116
|
-
// stop tokens are not supported by the HuggingFace API
|
117
|
-
return this;
|
118
|
-
}
|
119
114
|
}
|
120
115
|
const huggingFaceTextGenerationResponseSchema = z.array(z.object({
|
121
116
|
generated_text: z.string(),
|
@@ -43,11 +43,18 @@ class LlamaCppTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
43
43
|
}
|
44
44
|
async callAPI(prompt, options) {
|
45
45
|
const { run, settings, responseFormat } = options;
|
46
|
-
const
|
46
|
+
const combinedSettings = {
|
47
|
+
...this.settings,
|
48
|
+
...settings,
|
49
|
+
};
|
50
|
+
const callSettings = {
|
51
|
+
...combinedSettings,
|
52
|
+
nPredict: combinedSettings.maxCompletionTokens,
|
53
|
+
stop: combinedSettings.stopSequences,
|
47
54
|
abortSignal: run?.abortSignal,
|
48
55
|
prompt,
|
49
56
|
responseFormat,
|
50
|
-
}
|
57
|
+
};
|
51
58
|
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
52
59
|
retry: this.settings.retry,
|
53
60
|
throttle: this.settings.throttle,
|
@@ -78,22 +85,15 @@ class LlamaCppTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
78
85
|
}
|
79
86
|
withPromptFormat(promptFormat) {
|
80
87
|
return new PromptFormatTextGenerationModel_js_1.PromptFormatTextGenerationModel({
|
81
|
-
model: this.
|
88
|
+
model: this.withSettings({
|
89
|
+
stopSequences: promptFormat.stopSequences,
|
90
|
+
}),
|
82
91
|
promptFormat,
|
83
92
|
});
|
84
93
|
}
|
85
94
|
withSettings(additionalSettings) {
|
86
95
|
return new LlamaCppTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
87
96
|
}
|
88
|
-
get maxCompletionTokens() {
|
89
|
-
return this.settings.nPredict;
|
90
|
-
}
|
91
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
92
|
-
return this.withSettings({ nPredict: maxCompletionTokens });
|
93
|
-
}
|
94
|
-
withStopTokens(stopTokens) {
|
95
|
-
return this.withSettings({ stop: stopTokens });
|
96
|
-
}
|
97
97
|
}
|
98
98
|
exports.LlamaCppTextGenerationModel = LlamaCppTextGenerationModel;
|
99
99
|
const llamaCppTextGenerationResponseSchema = zod_1.default.object({
|
@@ -25,9 +25,7 @@ export interface LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends
|
|
25
25
|
temperature?: number;
|
26
26
|
topK?: number;
|
27
27
|
topP?: number;
|
28
|
-
nPredict?: number;
|
29
28
|
nKeep?: number;
|
30
|
-
stop?: string[];
|
31
29
|
tfsZ?: number;
|
32
30
|
typicalP?: number;
|
33
31
|
repeatPenalty?: number;
|
@@ -59,8 +57,8 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
59
57
|
model: string;
|
60
58
|
stream: boolean;
|
61
59
|
seed: number;
|
62
|
-
stop: string[];
|
63
60
|
mirostat: number;
|
61
|
+
stop: string[];
|
64
62
|
frequency_penalty: number;
|
65
63
|
ignore_eos: boolean;
|
66
64
|
logit_bias: number[];
|
@@ -104,9 +102,6 @@ export declare class LlamaCppTextGenerationModel<CONTEXT_WINDOW_SIZE extends num
|
|
104
102
|
extractTextDelta(fullDelta: LlamaCppTextGenerationDelta): string | undefined;
|
105
103
|
withPromptFormat<INPUT_PROMPT>(promptFormat: PromptFormat<INPUT_PROMPT, string>): PromptFormatTextGenerationModel<INPUT_PROMPT, string, LlamaCppTextGenerationResponse, LlamaCppTextGenerationDelta, LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>;
|
106
104
|
withSettings(additionalSettings: Partial<LlamaCppTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): this;
|
107
|
-
get maxCompletionTokens(): number | undefined;
|
108
|
-
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
109
|
-
withStopTokens(stopTokens: string[]): this;
|
110
105
|
}
|
111
106
|
declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
112
107
|
content: z.ZodString;
|
@@ -139,8 +134,8 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
139
134
|
model: string;
|
140
135
|
stream: boolean;
|
141
136
|
seed: number;
|
142
|
-
stop: string[];
|
143
137
|
mirostat: number;
|
138
|
+
stop: string[];
|
144
139
|
frequency_penalty: number;
|
145
140
|
ignore_eos: boolean;
|
146
141
|
logit_bias: number[];
|
@@ -163,8 +158,8 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
163
158
|
model: string;
|
164
159
|
stream: boolean;
|
165
160
|
seed: number;
|
166
|
-
stop: string[];
|
167
161
|
mirostat: number;
|
162
|
+
stop: string[];
|
168
163
|
frequency_penalty: number;
|
169
164
|
ignore_eos: boolean;
|
170
165
|
logit_bias: number[];
|
@@ -231,8 +226,8 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
231
226
|
model: string;
|
232
227
|
stream: boolean;
|
233
228
|
seed: number;
|
234
|
-
stop: string[];
|
235
229
|
mirostat: number;
|
230
|
+
stop: string[];
|
236
231
|
frequency_penalty: number;
|
237
232
|
ignore_eos: boolean;
|
238
233
|
logit_bias: number[];
|
@@ -279,8 +274,8 @@ declare const llamaCppTextGenerationResponseSchema: z.ZodObject<{
|
|
279
274
|
model: string;
|
280
275
|
stream: boolean;
|
281
276
|
seed: number;
|
282
|
-
stop: string[];
|
283
277
|
mirostat: number;
|
278
|
+
stop: string[];
|
284
279
|
frequency_penalty: number;
|
285
280
|
ignore_eos: boolean;
|
286
281
|
logit_bias: number[];
|
@@ -344,8 +339,8 @@ export declare const LlamaCppTextGenerationResponseFormat: {
|
|
344
339
|
model: string;
|
345
340
|
stream: boolean;
|
346
341
|
seed: number;
|
347
|
-
stop: string[];
|
348
342
|
mirostat: number;
|
343
|
+
stop: string[];
|
349
344
|
frequency_penalty: number;
|
350
345
|
ignore_eos: boolean;
|
351
346
|
logit_bias: number[];
|
@@ -37,11 +37,18 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
37
37
|
}
|
38
38
|
async callAPI(prompt, options) {
|
39
39
|
const { run, settings, responseFormat } = options;
|
40
|
-
const
|
40
|
+
const combinedSettings = {
|
41
|
+
...this.settings,
|
42
|
+
...settings,
|
43
|
+
};
|
44
|
+
const callSettings = {
|
45
|
+
...combinedSettings,
|
46
|
+
nPredict: combinedSettings.maxCompletionTokens,
|
47
|
+
stop: combinedSettings.stopSequences,
|
41
48
|
abortSignal: run?.abortSignal,
|
42
49
|
prompt,
|
43
50
|
responseFormat,
|
44
|
-
}
|
51
|
+
};
|
45
52
|
return callWithRetryAndThrottle({
|
46
53
|
retry: this.settings.retry,
|
47
54
|
throttle: this.settings.throttle,
|
@@ -72,22 +79,15 @@ export class LlamaCppTextGenerationModel extends AbstractModel {
|
|
72
79
|
}
|
73
80
|
withPromptFormat(promptFormat) {
|
74
81
|
return new PromptFormatTextGenerationModel({
|
75
|
-
model: this.
|
82
|
+
model: this.withSettings({
|
83
|
+
stopSequences: promptFormat.stopSequences,
|
84
|
+
}),
|
76
85
|
promptFormat,
|
77
86
|
});
|
78
87
|
}
|
79
88
|
withSettings(additionalSettings) {
|
80
89
|
return new LlamaCppTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
81
90
|
}
|
82
|
-
get maxCompletionTokens() {
|
83
|
-
return this.settings.nPredict;
|
84
|
-
}
|
85
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
86
|
-
return this.withSettings({ nPredict: maxCompletionTokens });
|
87
|
-
}
|
88
|
-
withStopTokens(stopTokens) {
|
89
|
-
return this.withSettings({ stop: stopTokens });
|
90
|
-
}
|
91
91
|
}
|
92
92
|
const llamaCppTextGenerationResponseSchema = z.object({
|
93
93
|
content: z.string(),
|
@@ -75,7 +75,7 @@ exports.calculateOpenAITextGenerationCostInMillicents = calculateOpenAITextGener
|
|
75
75
|
* const model = new OpenAITextGenerationModel({
|
76
76
|
* model: "text-davinci-003",
|
77
77
|
* temperature: 0.7,
|
78
|
-
*
|
78
|
+
* maxCompletionTokens: 500,
|
79
79
|
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
80
80
|
* });
|
81
81
|
*
|
@@ -124,14 +124,23 @@ class OpenAITextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
124
124
|
}
|
125
125
|
async callAPI(prompt, options) {
|
126
126
|
const { run, settings, responseFormat } = options;
|
127
|
-
const
|
127
|
+
const combinedSettings = {
|
128
|
+
...this.settings,
|
129
|
+
...settings,
|
130
|
+
};
|
131
|
+
const callSettings = {
|
128
132
|
apiKey: this.apiKey,
|
129
133
|
user: this.settings.isUserIdForwardingEnabled ? run?.userId : undefined,
|
130
|
-
|
134
|
+
// Copied settings:
|
135
|
+
...combinedSettings,
|
136
|
+
// map to OpenAI API names:
|
137
|
+
stop: combinedSettings.stopSequences,
|
138
|
+
maxTokens: combinedSettings.maxCompletionTokens,
|
139
|
+
// other settings:
|
131
140
|
abortSignal: run?.abortSignal,
|
132
141
|
prompt,
|
133
142
|
responseFormat,
|
134
|
-
}
|
143
|
+
};
|
135
144
|
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
136
145
|
retry: callSettings.retry,
|
137
146
|
throttle: callSettings.throttle,
|
@@ -158,22 +167,15 @@ class OpenAITextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
|
158
167
|
}
|
159
168
|
withPromptFormat(promptFormat) {
|
160
169
|
return new PromptFormatTextGenerationModel_js_1.PromptFormatTextGenerationModel({
|
161
|
-
model: this.
|
170
|
+
model: this.withSettings({
|
171
|
+
stopSequences: promptFormat.stopSequences,
|
172
|
+
}),
|
162
173
|
promptFormat,
|
163
174
|
});
|
164
175
|
}
|
165
176
|
withSettings(additionalSettings) {
|
166
177
|
return new OpenAITextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
167
178
|
}
|
168
|
-
get maxCompletionTokens() {
|
169
|
-
return this.settings.maxTokens;
|
170
|
-
}
|
171
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
172
|
-
return this.withSettings({ maxTokens: maxCompletionTokens });
|
173
|
-
}
|
174
|
-
withStopTokens(stopTokens) {
|
175
|
-
return this.withSettings({ stop: stopTokens });
|
176
|
-
}
|
177
179
|
}
|
178
180
|
exports.OpenAITextGenerationModel = OpenAITextGenerationModel;
|
179
181
|
const openAITextGenerationResponseSchema = zod_1.default.object({
|
@@ -72,13 +72,11 @@ export interface OpenAITextGenerationModelSettings extends TextGenerationModelSe
|
|
72
72
|
throttle?: ThrottleFunction;
|
73
73
|
isUserIdForwardingEnabled?: boolean;
|
74
74
|
suffix?: string;
|
75
|
-
maxTokens?: number;
|
76
75
|
temperature?: number;
|
77
76
|
topP?: number;
|
78
77
|
n?: number;
|
79
78
|
logprobs?: number;
|
80
79
|
echo?: boolean;
|
81
|
-
stop?: string | string[];
|
82
80
|
presencePenalty?: number;
|
83
81
|
frequencyPenalty?: number;
|
84
82
|
bestOf?: number;
|
@@ -92,7 +90,7 @@ export interface OpenAITextGenerationModelSettings extends TextGenerationModelSe
|
|
92
90
|
* const model = new OpenAITextGenerationModel({
|
93
91
|
* model: "text-davinci-003",
|
94
92
|
* temperature: 0.7,
|
95
|
-
*
|
93
|
+
* maxCompletionTokens: 500,
|
96
94
|
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
97
95
|
* });
|
98
96
|
*
|
@@ -136,9 +134,6 @@ export declare class OpenAITextGenerationModel extends AbstractModel<OpenAITextG
|
|
136
134
|
extractTextDelta(fullDelta: OpenAITextGenerationDelta): string | undefined;
|
137
135
|
withPromptFormat<INPUT_PROMPT>(promptFormat: PromptFormat<INPUT_PROMPT, string>): PromptFormatTextGenerationModel<INPUT_PROMPT, string, OpenAITextGenerationResponse, OpenAITextGenerationDelta, OpenAITextGenerationModelSettings, this>;
|
138
136
|
withSettings(additionalSettings: Partial<OpenAITextGenerationModelSettings>): this;
|
139
|
-
get maxCompletionTokens(): number | undefined;
|
140
|
-
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
141
|
-
withStopTokens(stopTokens: string[]): this;
|
142
137
|
}
|
143
138
|
declare const openAITextGenerationResponseSchema: z.ZodObject<{
|
144
139
|
id: z.ZodString;
|
@@ -67,7 +67,7 @@ export const calculateOpenAITextGenerationCostInMillicents = ({ model, response,
|
|
67
67
|
* const model = new OpenAITextGenerationModel({
|
68
68
|
* model: "text-davinci-003",
|
69
69
|
* temperature: 0.7,
|
70
|
-
*
|
70
|
+
* maxCompletionTokens: 500,
|
71
71
|
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
72
72
|
* });
|
73
73
|
*
|
@@ -116,14 +116,23 @@ export class OpenAITextGenerationModel extends AbstractModel {
|
|
116
116
|
}
|
117
117
|
async callAPI(prompt, options) {
|
118
118
|
const { run, settings, responseFormat } = options;
|
119
|
-
const
|
119
|
+
const combinedSettings = {
|
120
|
+
...this.settings,
|
121
|
+
...settings,
|
122
|
+
};
|
123
|
+
const callSettings = {
|
120
124
|
apiKey: this.apiKey,
|
121
125
|
user: this.settings.isUserIdForwardingEnabled ? run?.userId : undefined,
|
122
|
-
|
126
|
+
// Copied settings:
|
127
|
+
...combinedSettings,
|
128
|
+
// map to OpenAI API names:
|
129
|
+
stop: combinedSettings.stopSequences,
|
130
|
+
maxTokens: combinedSettings.maxCompletionTokens,
|
131
|
+
// other settings:
|
123
132
|
abortSignal: run?.abortSignal,
|
124
133
|
prompt,
|
125
134
|
responseFormat,
|
126
|
-
}
|
135
|
+
};
|
127
136
|
return callWithRetryAndThrottle({
|
128
137
|
retry: callSettings.retry,
|
129
138
|
throttle: callSettings.throttle,
|
@@ -150,22 +159,15 @@ export class OpenAITextGenerationModel extends AbstractModel {
|
|
150
159
|
}
|
151
160
|
withPromptFormat(promptFormat) {
|
152
161
|
return new PromptFormatTextGenerationModel({
|
153
|
-
model: this.
|
162
|
+
model: this.withSettings({
|
163
|
+
stopSequences: promptFormat.stopSequences,
|
164
|
+
}),
|
154
165
|
promptFormat,
|
155
166
|
});
|
156
167
|
}
|
157
168
|
withSettings(additionalSettings) {
|
158
169
|
return new OpenAITextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
159
170
|
}
|
160
|
-
get maxCompletionTokens() {
|
161
|
-
return this.settings.maxTokens;
|
162
|
-
}
|
163
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
164
|
-
return this.withSettings({ maxTokens: maxCompletionTokens });
|
165
|
-
}
|
166
|
-
withStopTokens(stopTokens) {
|
167
|
-
return this.withSettings({ stop: stopTokens });
|
168
|
-
}
|
169
171
|
}
|
170
172
|
const openAITextGenerationResponseSchema = z.object({
|
171
173
|
id: z.string(),
|
@@ -93,7 +93,7 @@ exports.calculateOpenAIChatCostInMillicents = calculateOpenAIChatCostInMillicent
|
|
93
93
|
* const model = new OpenAIChatModel({
|
94
94
|
* model: "gpt-3.5-turbo",
|
95
95
|
* temperature: 0.7,
|
96
|
-
*
|
96
|
+
* maxCompletionTokens: 500,
|
97
97
|
* });
|
98
98
|
*
|
99
99
|
* const text = await generateText([
|
@@ -150,14 +150,20 @@ class OpenAIChatModel extends AbstractModel_js_1.AbstractModel {
|
|
150
150
|
}
|
151
151
|
async callAPI(messages, options) {
|
152
152
|
const { run, settings, responseFormat } = options;
|
153
|
-
const
|
153
|
+
const combinedSettings = {
|
154
|
+
...this.settings,
|
155
|
+
...settings,
|
156
|
+
};
|
157
|
+
const callSettings = {
|
154
158
|
apiKey: this.apiKey,
|
155
159
|
user: this.settings.isUserIdForwardingEnabled ? run?.userId : undefined,
|
156
|
-
|
160
|
+
...combinedSettings,
|
161
|
+
stop: combinedSettings.stopSequences,
|
162
|
+
maxTokens: combinedSettings.maxCompletionTokens,
|
157
163
|
abortSignal: run?.abortSignal,
|
158
164
|
messages,
|
159
165
|
responseFormat,
|
160
|
-
}
|
166
|
+
};
|
161
167
|
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
162
168
|
retry: callSettings.retry,
|
163
169
|
throttle: callSettings.throttle,
|
@@ -207,22 +213,13 @@ class OpenAIChatModel extends AbstractModel_js_1.AbstractModel {
|
|
207
213
|
}
|
208
214
|
withPromptFormat(promptFormat) {
|
209
215
|
return new PromptFormatTextGenerationModel_js_1.PromptFormatTextGenerationModel({
|
210
|
-
model: this.
|
216
|
+
model: this.withSettings({ stopSequences: promptFormat.stopSequences }),
|
211
217
|
promptFormat,
|
212
218
|
});
|
213
219
|
}
|
214
220
|
withSettings(additionalSettings) {
|
215
221
|
return new OpenAIChatModel(Object.assign({}, this.settings, additionalSettings));
|
216
222
|
}
|
217
|
-
get maxCompletionTokens() {
|
218
|
-
return this.settings.maxTokens;
|
219
|
-
}
|
220
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
221
|
-
return this.withSettings({ maxTokens: maxCompletionTokens });
|
222
|
-
}
|
223
|
-
withStopTokens(stopTokens) {
|
224
|
-
return this.withSettings({ stop: stopTokens });
|
225
|
-
}
|
226
223
|
}
|
227
224
|
exports.OpenAIChatModel = OpenAIChatModel;
|
228
225
|
const openAIChatResponseSchema = zod_1.default.object({
|
@@ -95,7 +95,7 @@ export interface OpenAIChatCallSettings {
|
|
95
95
|
presencePenalty?: number;
|
96
96
|
frequencyPenalty?: number;
|
97
97
|
}
|
98
|
-
export interface OpenAIChatSettings extends TextGenerationModelSettings, OpenAIModelSettings, OpenAIChatCallSettings {
|
98
|
+
export interface OpenAIChatSettings extends TextGenerationModelSettings, OpenAIModelSettings, Omit<OpenAIChatCallSettings, "stop" | "maxTokens"> {
|
99
99
|
isUserIdForwardingEnabled?: boolean;
|
100
100
|
}
|
101
101
|
/**
|
@@ -107,7 +107,7 @@ export interface OpenAIChatSettings extends TextGenerationModelSettings, OpenAIM
|
|
107
107
|
* const model = new OpenAIChatModel({
|
108
108
|
* model: "gpt-3.5-turbo",
|
109
109
|
* temperature: 0.7,
|
110
|
-
*
|
110
|
+
* maxCompletionTokens: 500,
|
111
111
|
* });
|
112
112
|
*
|
113
113
|
* const text = await generateText([
|
@@ -172,9 +172,6 @@ export declare class OpenAIChatModel extends AbstractModel<OpenAIChatSettings> i
|
|
172
172
|
extractJson(response: OpenAIChatResponse): unknown;
|
173
173
|
withPromptFormat<INPUT_PROMPT>(promptFormat: PromptFormat<INPUT_PROMPT, OpenAIChatMessage[]>): PromptFormatTextGenerationModel<INPUT_PROMPT, OpenAIChatMessage[], OpenAIChatResponse, OpenAIChatDelta, OpenAIChatSettings, this>;
|
174
174
|
withSettings(additionalSettings: Partial<OpenAIChatSettings>): this;
|
175
|
-
get maxCompletionTokens(): number | undefined;
|
176
|
-
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
177
|
-
withStopTokens(stopTokens: string[]): this;
|
178
175
|
}
|
179
176
|
declare const openAIChatResponseSchema: z.ZodObject<{
|
180
177
|
id: z.ZodString;
|
@@ -85,7 +85,7 @@ export const calculateOpenAIChatCostInMillicents = ({ model, response, }) => res
|
|
85
85
|
* const model = new OpenAIChatModel({
|
86
86
|
* model: "gpt-3.5-turbo",
|
87
87
|
* temperature: 0.7,
|
88
|
-
*
|
88
|
+
* maxCompletionTokens: 500,
|
89
89
|
* });
|
90
90
|
*
|
91
91
|
* const text = await generateText([
|
@@ -142,14 +142,20 @@ export class OpenAIChatModel extends AbstractModel {
|
|
142
142
|
}
|
143
143
|
async callAPI(messages, options) {
|
144
144
|
const { run, settings, responseFormat } = options;
|
145
|
-
const
|
145
|
+
const combinedSettings = {
|
146
|
+
...this.settings,
|
147
|
+
...settings,
|
148
|
+
};
|
149
|
+
const callSettings = {
|
146
150
|
apiKey: this.apiKey,
|
147
151
|
user: this.settings.isUserIdForwardingEnabled ? run?.userId : undefined,
|
148
|
-
|
152
|
+
...combinedSettings,
|
153
|
+
stop: combinedSettings.stopSequences,
|
154
|
+
maxTokens: combinedSettings.maxCompletionTokens,
|
149
155
|
abortSignal: run?.abortSignal,
|
150
156
|
messages,
|
151
157
|
responseFormat,
|
152
|
-
}
|
158
|
+
};
|
153
159
|
return callWithRetryAndThrottle({
|
154
160
|
retry: callSettings.retry,
|
155
161
|
throttle: callSettings.throttle,
|
@@ -199,22 +205,13 @@ export class OpenAIChatModel extends AbstractModel {
|
|
199
205
|
}
|
200
206
|
withPromptFormat(promptFormat) {
|
201
207
|
return new PromptFormatTextGenerationModel({
|
202
|
-
model: this.
|
208
|
+
model: this.withSettings({ stopSequences: promptFormat.stopSequences }),
|
203
209
|
promptFormat,
|
204
210
|
});
|
205
211
|
}
|
206
212
|
withSettings(additionalSettings) {
|
207
213
|
return new OpenAIChatModel(Object.assign({}, this.settings, additionalSettings));
|
208
214
|
}
|
209
|
-
get maxCompletionTokens() {
|
210
|
-
return this.settings.maxTokens;
|
211
|
-
}
|
212
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
213
|
-
return this.withSettings({ maxTokens: maxCompletionTokens });
|
214
|
-
}
|
215
|
-
withStopTokens(stopTokens) {
|
216
|
-
return this.withSettings({ stop: stopTokens });
|
217
|
-
}
|
218
215
|
}
|
219
216
|
const openAIChatResponseSchema = z.object({
|
220
217
|
id: z.string(),
|
package/package.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"name": "modelfusion",
|
3
3
|
"description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
|
4
|
-
"version": "0.
|
4
|
+
"version": "0.15.0",
|
5
5
|
"author": "Lars Grammel",
|
6
6
|
"license": "MIT",
|
7
7
|
"keywords": [
|
@@ -67,11 +67,11 @@
|
|
67
67
|
"eslint": "^8.45.0",
|
68
68
|
"eslint-config-prettier": "9.0.0",
|
69
69
|
"husky": "^8.0.3",
|
70
|
-
"lint-staged": "14.0.
|
70
|
+
"lint-staged": "14.0.1",
|
71
71
|
"prettier": "3.0.2",
|
72
72
|
"rimraf": "5.0.1",
|
73
73
|
"typescript": "5.1.6",
|
74
|
-
"zod": "3.22.
|
74
|
+
"zod": "3.22.2",
|
75
75
|
"zod-to-json-schema": "3.21.4"
|
76
76
|
},
|
77
77
|
"peerDependencies": {
|
@@ -12,7 +12,7 @@ const DEFAULT_SYSTEM_PROMPT_NO_INPUT = "Below is an instruction that describes a
|
|
12
12
|
* @see https://github.com/tatsu-lab/stanford_alpaca#data-release
|
13
13
|
*/
|
14
14
|
const AlpacaInstructionPromptFormat = () => ({
|
15
|
-
|
15
|
+
stopSequences: [],
|
16
16
|
format: (instruction) => {
|
17
17
|
let text = instruction.system ??
|
18
18
|
(instruction.input != null
|
@@ -9,7 +9,7 @@ const DEFAULT_SYSTEM_PROMPT_NO_INPUT = "Below is an instruction that describes a
|
|
9
9
|
* @see https://github.com/tatsu-lab/stanford_alpaca#data-release
|
10
10
|
*/
|
11
11
|
export const AlpacaInstructionPromptFormat = () => ({
|
12
|
-
|
12
|
+
stopSequences: [],
|
13
13
|
format: (instruction) => {
|
14
14
|
let text = instruction.system ??
|
15
15
|
(instruction.input != null
|
@@ -15,7 +15,7 @@ const END_SYSTEM = "\n<</SYS>>\n\n";
|
|
15
15
|
* @see https://www.philschmid.de/llama-2#how-to-prompt-llama-2-chat
|
16
16
|
*/
|
17
17
|
const Llama2InstructionPromptFormat = () => ({
|
18
|
-
|
18
|
+
stopSequences: [END_SEGMENT],
|
19
19
|
format: (instruction) => `${BEGIN_SEGMENT}${BEGIN_INSTRUCTION}${instruction.system != null
|
20
20
|
? ` ${BEGIN_SYSTEM}${instruction.system}${END_SYSTEM}`
|
21
21
|
: ""} ${instruction.instruction}${instruction.input != null ? `\n\n${instruction.input}` : ""} ${END_INSTRUCTION}\n`,
|
@@ -54,6 +54,6 @@ const Llama2ChatPromptFormat = () => ({
|
|
54
54
|
}
|
55
55
|
return text;
|
56
56
|
},
|
57
|
-
|
57
|
+
stopSequences: [END_SEGMENT],
|
58
58
|
});
|
59
59
|
exports.Llama2ChatPromptFormat = Llama2ChatPromptFormat;
|
@@ -12,7 +12,7 @@ const END_SYSTEM = "\n<</SYS>>\n\n";
|
|
12
12
|
* @see https://www.philschmid.de/llama-2#how-to-prompt-llama-2-chat
|
13
13
|
*/
|
14
14
|
export const Llama2InstructionPromptFormat = () => ({
|
15
|
-
|
15
|
+
stopSequences: [END_SEGMENT],
|
16
16
|
format: (instruction) => `${BEGIN_SEGMENT}${BEGIN_INSTRUCTION}${instruction.system != null
|
17
17
|
? ` ${BEGIN_SYSTEM}${instruction.system}${END_SYSTEM}`
|
18
18
|
: ""} ${instruction.instruction}${instruction.input != null ? `\n\n${instruction.input}` : ""} ${END_INSTRUCTION}\n`,
|
@@ -50,5 +50,5 @@ export const Llama2ChatPromptFormat = () => ({
|
|
50
50
|
}
|
51
51
|
return text;
|
52
52
|
},
|
53
|
-
|
53
|
+
stopSequences: [END_SEGMENT],
|
54
54
|
});
|
@@ -26,7 +26,7 @@ const OpenAIChatInstructionPromptFormat = () => ({
|
|
26
26
|
}
|
27
27
|
return messages;
|
28
28
|
},
|
29
|
-
|
29
|
+
stopSequences: [],
|
30
30
|
});
|
31
31
|
exports.OpenAIChatInstructionPromptFormat = OpenAIChatInstructionPromptFormat;
|
32
32
|
/**
|
@@ -69,6 +69,6 @@ const OpenAIChatChatPromptFormat = () => ({
|
|
69
69
|
}
|
70
70
|
return messages;
|
71
71
|
},
|
72
|
-
|
72
|
+
stopSequences: [],
|
73
73
|
});
|
74
74
|
exports.OpenAIChatChatPromptFormat = OpenAIChatChatPromptFormat;
|
@@ -23,7 +23,7 @@ export const OpenAIChatInstructionPromptFormat = () => ({
|
|
23
23
|
}
|
24
24
|
return messages;
|
25
25
|
},
|
26
|
-
|
26
|
+
stopSequences: [],
|
27
27
|
});
|
28
28
|
/**
|
29
29
|
* Formats a chat prompt as an OpenAI chat prompt.
|
@@ -65,5 +65,5 @@ export const OpenAIChatChatPromptFormat = () => ({
|
|
65
65
|
}
|
66
66
|
return messages;
|
67
67
|
},
|
68
|
-
|
68
|
+
stopSequences: [],
|
69
69
|
});
|
package/prompt/PromptFormat.d.ts
CHANGED
@@ -7,8 +7,8 @@ export interface PromptFormat<SOURCE_PROMPT, TARGET_PROMPT> {
|
|
7
7
|
*/
|
8
8
|
format(sourcePrompt: SOURCE_PROMPT): TARGET_PROMPT;
|
9
9
|
/**
|
10
|
-
* The
|
10
|
+
* The texts that should be used as default stop sequences.
|
11
11
|
* This is e.g. important for chat formats.
|
12
12
|
*/
|
13
|
-
|
13
|
+
stopSequences: string[];
|
14
14
|
}
|
@@ -59,7 +59,9 @@ class PromptFormatTextGenerationModel {
|
|
59
59
|
}
|
60
60
|
withPromptFormat(promptFormat) {
|
61
61
|
return new PromptFormatTextGenerationModel({
|
62
|
-
model: this.
|
62
|
+
model: this.withSettings({
|
63
|
+
stopSequences: promptFormat.stopSequences,
|
64
|
+
}),
|
63
65
|
promptFormat,
|
64
66
|
});
|
65
67
|
}
|
@@ -69,20 +71,5 @@ class PromptFormatTextGenerationModel {
|
|
69
71
|
promptFormat: this.promptFormat,
|
70
72
|
});
|
71
73
|
}
|
72
|
-
get maxCompletionTokens() {
|
73
|
-
return this.model.maxCompletionTokens;
|
74
|
-
}
|
75
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
76
|
-
return new PromptFormatTextGenerationModel({
|
77
|
-
model: this.model.withMaxCompletionTokens(maxCompletionTokens),
|
78
|
-
promptFormat: this.promptFormat,
|
79
|
-
});
|
80
|
-
}
|
81
|
-
withStopTokens(stopTokens) {
|
82
|
-
return new PromptFormatTextGenerationModel({
|
83
|
-
model: this.model.withStopTokens(stopTokens),
|
84
|
-
promptFormat: this.promptFormat,
|
85
|
-
});
|
86
|
-
}
|
87
74
|
}
|
88
75
|
exports.PromptFormatTextGenerationModel = PromptFormatTextGenerationModel;
|
@@ -20,7 +20,4 @@ export declare class PromptFormatTextGenerationModel<PROMPT, MODEL_PROMPT, RESPO
|
|
20
20
|
get extractTextDelta(): MODEL["extractTextDelta"];
|
21
21
|
withPromptFormat<INPUT_PROMPT>(promptFormat: PromptFormat<INPUT_PROMPT, PROMPT>): PromptFormatTextGenerationModel<INPUT_PROMPT, PROMPT, RESPONSE, FULL_DELTA, SETTINGS, this>;
|
22
22
|
withSettings(additionalSettings: Partial<SETTINGS>): this;
|
23
|
-
get maxCompletionTokens(): MODEL["maxCompletionTokens"];
|
24
|
-
withMaxCompletionTokens(maxCompletionTokens: number): this;
|
25
|
-
withStopTokens(stopTokens: string[]): this;
|
26
23
|
}
|
@@ -56,7 +56,9 @@ export class PromptFormatTextGenerationModel {
|
|
56
56
|
}
|
57
57
|
withPromptFormat(promptFormat) {
|
58
58
|
return new PromptFormatTextGenerationModel({
|
59
|
-
model: this.
|
59
|
+
model: this.withSettings({
|
60
|
+
stopSequences: promptFormat.stopSequences,
|
61
|
+
}),
|
60
62
|
promptFormat,
|
61
63
|
});
|
62
64
|
}
|
@@ -66,19 +68,4 @@ export class PromptFormatTextGenerationModel {
|
|
66
68
|
promptFormat: this.promptFormat,
|
67
69
|
});
|
68
70
|
}
|
69
|
-
get maxCompletionTokens() {
|
70
|
-
return this.model.maxCompletionTokens;
|
71
|
-
}
|
72
|
-
withMaxCompletionTokens(maxCompletionTokens) {
|
73
|
-
return new PromptFormatTextGenerationModel({
|
74
|
-
model: this.model.withMaxCompletionTokens(maxCompletionTokens),
|
75
|
-
promptFormat: this.promptFormat,
|
76
|
-
});
|
77
|
-
}
|
78
|
-
withStopTokens(stopTokens) {
|
79
|
-
return new PromptFormatTextGenerationModel({
|
80
|
-
model: this.model.withStopTokens(stopTokens),
|
81
|
-
promptFormat: this.promptFormat,
|
82
|
-
});
|
83
|
-
}
|
84
71
|
}
|
@@ -6,7 +6,7 @@ const validateChatPrompt_js_1 = require("./chat/validateChatPrompt.cjs");
|
|
6
6
|
* Formats an instruction prompt as a basic text prompt.
|
7
7
|
*/
|
8
8
|
const TextInstructionPromptFormat = () => ({
|
9
|
-
|
9
|
+
stopSequences: [],
|
10
10
|
format: (instruction) => {
|
11
11
|
let text = "";
|
12
12
|
if (instruction.system != null) {
|
@@ -56,6 +56,6 @@ const TextChatPromptFormat = ({ user, ai }) => ({
|
|
56
56
|
text += `${ai}:\n`;
|
57
57
|
return text;
|
58
58
|
},
|
59
|
-
|
59
|
+
stopSequences: [`\n${user}:`],
|
60
60
|
});
|
61
61
|
exports.TextChatPromptFormat = TextChatPromptFormat;
|
@@ -3,7 +3,7 @@ import { validateChatPrompt } from "./chat/validateChatPrompt.js";
|
|
3
3
|
* Formats an instruction prompt as a basic text prompt.
|
4
4
|
*/
|
5
5
|
export const TextInstructionPromptFormat = () => ({
|
6
|
-
|
6
|
+
stopSequences: [],
|
7
7
|
format: (instruction) => {
|
8
8
|
let text = "";
|
9
9
|
if (instruction.system != null) {
|
@@ -52,5 +52,5 @@ export const TextChatPromptFormat = ({ user, ai }) => ({
|
|
52
52
|
text += `${ai}:\n`;
|
53
53
|
return text;
|
54
54
|
},
|
55
|
-
|
55
|
+
stopSequences: [`\n${user}:`],
|
56
56
|
});
|
@@ -13,7 +13,7 @@ const validateChatPrompt_js_1 = require("./validateChatPrompt.cjs");
|
|
13
13
|
* @see https://modelfusion.dev/guide/function/generate-text/prompt-format#limiting-the-chat-length
|
14
14
|
*/
|
15
15
|
async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
16
|
-
(model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
16
|
+
(model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
17
17
|
(0, validateChatPrompt_js_1.validateChatPrompt)(prompt);
|
18
18
|
const startsWithSystemMessage = "system" in prompt[0];
|
19
19
|
const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];
|
@@ -10,7 +10,7 @@ import { validateChatPrompt } from "./validateChatPrompt.js";
|
|
10
10
|
* @see https://modelfusion.dev/guide/function/generate-text/prompt-format#limiting-the-chat-length
|
11
11
|
*/
|
12
12
|
export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
13
|
-
(model.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
13
|
+
(model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
14
14
|
validateChatPrompt(prompt);
|
15
15
|
const startsWithSystemMessage = "system" in prompt[0];
|
16
16
|
const systemMessage = startsWithSystemMessage ? [prompt[0]] : [];
|