modelfusion 0.98.0 → 0.100.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -19
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs +1 -1
- package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js +1 -1
- package/guard/fixStructure.cjs +3 -3
- package/guard/fixStructure.d.ts +3 -3
- package/guard/fixStructure.js +3 -3
- package/model-function/Model.d.ts +2 -2
- package/model-function/generate-structure/generateStructure.d.ts +2 -2
- package/model-function/generate-structure/streamStructure.d.ts +1 -1
- package/model-function/generate-text/PromptTemplateTextGenerationModel.cjs +2 -2
- package/model-function/generate-text/PromptTemplateTextGenerationModel.d.ts +2 -2
- package/model-function/generate-text/PromptTemplateTextGenerationModel.js +2 -2
- package/model-function/generate-text/TextGenerationModel.d.ts +31 -5
- package/model-function/generate-text/generateText.cjs +10 -4
- package/model-function/generate-text/generateText.d.ts +1 -0
- package/model-function/generate-text/generateText.js +10 -4
- package/model-function/generate-text/prompt-template/trimChatPrompt.cjs +1 -1
- package/model-function/generate-text/prompt-template/trimChatPrompt.js +1 -1
- package/model-provider/anthropic/AnthropicTextGenerationModel.cjs +27 -31
- package/model-provider/anthropic/AnthropicTextGenerationModel.d.ts +2 -2
- package/model-provider/anthropic/AnthropicTextGenerationModel.js +27 -31
- package/model-provider/cohere/CohereFacade.cjs +1 -1
- package/model-provider/cohere/CohereFacade.d.ts +1 -1
- package/model-provider/cohere/CohereFacade.js +1 -1
- package/model-provider/cohere/CohereTextEmbeddingModel.d.ts +3 -3
- package/model-provider/cohere/CohereTextGenerationModel.cjs +34 -43
- package/model-provider/cohere/CohereTextGenerationModel.d.ts +3 -4
- package/model-provider/cohere/CohereTextGenerationModel.js +34 -43
- package/model-provider/huggingface/HuggingFaceFacade.cjs +1 -1
- package/model-provider/huggingface/HuggingFaceFacade.d.ts +1 -1
- package/model-provider/huggingface/HuggingFaceFacade.js +1 -1
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.cjs +31 -41
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.d.ts +3 -4
- package/model-provider/huggingface/HuggingFaceTextGenerationModel.js +31 -41
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.cjs +4 -4
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.d.ts +2 -2
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.js +4 -4
- package/model-provider/mistral/{MistralTextGenerationModel.cjs → MistralChatModel.cjs} +18 -18
- package/model-provider/mistral/{MistralTextGenerationModel.d.ts → MistralChatModel.d.ts} +22 -21
- package/model-provider/mistral/{MistralTextGenerationModel.js → MistralChatModel.js} +16 -16
- package/model-provider/mistral/MistralFacade.cjs +5 -5
- package/model-provider/mistral/MistralFacade.d.ts +3 -2
- package/model-provider/mistral/MistralFacade.js +3 -3
- package/model-provider/mistral/MistralPromptTemplate.d.ts +4 -4
- package/model-provider/mistral/index.cjs +1 -1
- package/model-provider/mistral/index.d.ts +1 -1
- package/model-provider/mistral/index.js +1 -1
- package/model-provider/ollama/OllamaApiConfiguration.d.ts +6 -5
- package/model-provider/ollama/OllamaChatModel.cjs +303 -0
- package/model-provider/ollama/OllamaChatModel.d.ts +171 -0
- package/model-provider/ollama/OllamaChatModel.js +299 -0
- package/model-provider/ollama/OllamaChatPromptTemplate.cjs +76 -0
- package/model-provider/ollama/OllamaChatPromptTemplate.d.ts +20 -0
- package/model-provider/ollama/OllamaChatPromptTemplate.js +69 -0
- package/model-provider/ollama/{OllamaTextGenerationModel.cjs → OllamaCompletionModel.cjs} +17 -15
- package/model-provider/ollama/OllamaCompletionModel.d.ts +159 -0
- package/model-provider/ollama/{OllamaTextGenerationModel.js → OllamaCompletionModel.js} +15 -13
- package/model-provider/ollama/{OllamaTextGenerationModel.test.cjs → OllamaCompletionModel.test.cjs} +3 -3
- package/model-provider/ollama/{OllamaTextGenerationModel.test.js → OllamaCompletionModel.test.js} +3 -3
- package/model-provider/ollama/OllamaFacade.cjs +15 -5
- package/model-provider/ollama/OllamaFacade.d.ts +7 -2
- package/model-provider/ollama/OllamaFacade.js +11 -3
- package/model-provider/ollama/OllamaTextGenerationSettings.cjs +2 -0
- package/model-provider/ollama/OllamaTextGenerationSettings.d.ts +87 -0
- package/model-provider/ollama/OllamaTextGenerationSettings.js +1 -0
- package/model-provider/ollama/index.cjs +4 -1
- package/model-provider/ollama/index.d.ts +4 -1
- package/model-provider/ollama/index.js +4 -1
- package/model-provider/openai/OpenAICompletionModel.cjs +48 -53
- package/model-provider/openai/OpenAICompletionModel.d.ts +3 -6
- package/model-provider/openai/OpenAICompletionModel.js +48 -53
- package/model-provider/openai/OpenAIFacade.cjs +6 -4
- package/model-provider/openai/OpenAIFacade.d.ts +5 -3
- package/model-provider/openai/OpenAIFacade.js +4 -3
- package/model-provider/openai/chat/AbstractOpenAIChatModel.cjs +50 -54
- package/model-provider/openai/chat/AbstractOpenAIChatModel.d.ts +7 -28
- package/model-provider/openai/chat/AbstractOpenAIChatModel.js +50 -54
- package/model-provider/openai/chat/OpenAIChatFunctionCallStructureGenerationModel.d.ts +3 -3
- package/model-provider/openai/chat/OpenAIChatModel.cjs +4 -4
- package/model-provider/openai/chat/OpenAIChatModel.d.ts +3 -3
- package/model-provider/openai/chat/OpenAIChatModel.js +4 -4
- package/model-provider/openai/chat/OpenAIChatModel.test.cjs +1 -1
- package/model-provider/openai/chat/OpenAIChatModel.test.js +1 -1
- package/model-provider/openai/chat/OpenAIChatPromptTemplate.d.ts +5 -5
- package/model-provider/openai/chat/OpenAIChatPromptTemplate.js +1 -1
- package/model-provider/openai-compatible/OpenAICompatibleChatModel.cjs +2 -2
- package/model-provider/openai-compatible/OpenAICompatibleChatModel.js +2 -2
- package/model-provider/openai-compatible/OpenAICompatibleFacade.cjs +2 -2
- package/model-provider/openai-compatible/OpenAICompatibleFacade.d.ts +2 -2
- package/model-provider/openai-compatible/OpenAICompatibleFacade.js +2 -2
- package/package.json +1 -1
- package/model-provider/ollama/OllamaTextGenerationModel.d.ts +0 -230
- /package/model-provider/ollama/{OllamaTextGenerationModel.test.d.ts → OllamaCompletionModel.test.d.ts} +0 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
import { z } from "zod";
|
2
|
+
import { FunctionOptions } from "../../core/FunctionOptions.js";
|
3
|
+
import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
|
4
|
+
import { ResponseHandler } from "../../core/api/postToApi.js";
|
5
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
6
|
+
import { Delta } from "../../model-function/Delta.js";
|
7
|
+
import { PromptTemplateTextStreamingModel } from "../../model-function/generate-text/PromptTemplateTextStreamingModel.js";
|
8
|
+
import { TextStreamingModel } from "../../model-function/generate-text/TextGenerationModel.js";
|
9
|
+
import { TextGenerationPromptTemplate } from "../../model-function/generate-text/TextGenerationPromptTemplate.js";
|
10
|
+
import { TextGenerationToolCallModel, ToolCallPromptTemplate } from "../../tool/generate-tool-call/TextGenerationToolCallModel.js";
|
11
|
+
import { TextGenerationToolCallsOrGenerateTextModel, ToolCallsOrGenerateTextPromptTemplate } from "../../tool/generate-tool-calls-or-text/TextGenerationToolCallsOrGenerateTextModel.js";
|
12
|
+
import { OllamaTextGenerationSettings } from "./OllamaTextGenerationSettings.js";
|
13
|
+
export interface OllamaCompletionPrompt {
|
14
|
+
/**
|
15
|
+
* Text prompt.
|
16
|
+
*/
|
17
|
+
prompt: string;
|
18
|
+
/**
|
19
|
+
Images. Supports base64-encoded `png` and `jpeg` images up to 100MB in size.
|
20
|
+
*/
|
21
|
+
images?: Array<string>;
|
22
|
+
}
|
23
|
+
/**
|
24
|
+
* Text generation model that uses the Ollama completion API.
|
25
|
+
*
|
26
|
+
* @see https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
|
27
|
+
*/
|
28
|
+
export interface OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends OllamaTextGenerationSettings {
|
29
|
+
api?: ApiConfiguration;
|
30
|
+
/**
|
31
|
+
* Specify the context window size of the model that you have loaded in your
|
32
|
+
* Ollama server. (Default: 2048)
|
33
|
+
*/
|
34
|
+
contextWindowSize?: CONTEXT_WINDOW_SIZE;
|
35
|
+
/**
|
36
|
+
* When set to true, no formatting will be applied to the prompt and no context
|
37
|
+
* will be returned.
|
38
|
+
*/
|
39
|
+
raw?: boolean;
|
40
|
+
system?: string;
|
41
|
+
context?: number[];
|
42
|
+
}
|
43
|
+
export declare class OllamaCompletionModel<CONTEXT_WINDOW_SIZE extends number | undefined> extends AbstractModel<OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>> implements TextStreamingModel<OllamaCompletionPrompt, OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>> {
|
44
|
+
constructor(settings: OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>);
|
45
|
+
readonly provider = "ollama";
|
46
|
+
get modelName(): string;
|
47
|
+
readonly tokenizer: undefined;
|
48
|
+
readonly countPromptTokens: undefined;
|
49
|
+
get contextWindowSize(): CONTEXT_WINDOW_SIZE;
|
50
|
+
callAPI<RESPONSE>(prompt: OllamaCompletionPrompt, options: {
|
51
|
+
responseFormat: OllamaCompletionResponseFormatType<RESPONSE>;
|
52
|
+
} & FunctionOptions): Promise<RESPONSE>;
|
53
|
+
get settingsForEvent(): Partial<OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>>;
|
54
|
+
doGenerateTexts(prompt: OllamaCompletionPrompt, options?: FunctionOptions): Promise<{
|
55
|
+
response: {
|
56
|
+
response: string;
|
57
|
+
model: string;
|
58
|
+
done: true;
|
59
|
+
created_at: string;
|
60
|
+
total_duration: number;
|
61
|
+
prompt_eval_count: number;
|
62
|
+
eval_count: number;
|
63
|
+
eval_duration: number;
|
64
|
+
load_duration?: number | undefined;
|
65
|
+
prompt_eval_duration?: number | undefined;
|
66
|
+
context?: number[] | undefined;
|
67
|
+
};
|
68
|
+
texts: string[];
|
69
|
+
}>;
|
70
|
+
doStreamText(prompt: OllamaCompletionPrompt, options?: FunctionOptions): Promise<AsyncIterable<Delta<string>>>;
|
71
|
+
asToolCallGenerationModel<INPUT_PROMPT>(promptTemplate: ToolCallPromptTemplate<INPUT_PROMPT, OllamaCompletionPrompt>): TextGenerationToolCallModel<INPUT_PROMPT, OllamaCompletionPrompt, this>;
|
72
|
+
asToolCallsOrTextGenerationModel<INPUT_PROMPT>(promptTemplate: ToolCallsOrGenerateTextPromptTemplate<INPUT_PROMPT, OllamaCompletionPrompt>): TextGenerationToolCallsOrGenerateTextModel<INPUT_PROMPT, OllamaCompletionPrompt, this>;
|
73
|
+
withTextPrompt(): PromptTemplateTextStreamingModel<string, OllamaCompletionPrompt, OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>;
|
74
|
+
withPromptTemplate<INPUT_PROMPT>(promptTemplate: TextGenerationPromptTemplate<INPUT_PROMPT, OllamaCompletionPrompt>): PromptTemplateTextStreamingModel<INPUT_PROMPT, OllamaCompletionPrompt, OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>, this>;
|
75
|
+
withSettings(additionalSettings: Partial<OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>>): this;
|
76
|
+
}
|
77
|
+
declare const ollamaCompletionResponseSchema: z.ZodObject<{
|
78
|
+
done: z.ZodLiteral<true>;
|
79
|
+
model: z.ZodString;
|
80
|
+
created_at: z.ZodString;
|
81
|
+
response: z.ZodString;
|
82
|
+
total_duration: z.ZodNumber;
|
83
|
+
load_duration: z.ZodOptional<z.ZodNumber>;
|
84
|
+
prompt_eval_count: z.ZodNumber;
|
85
|
+
prompt_eval_duration: z.ZodOptional<z.ZodNumber>;
|
86
|
+
eval_count: z.ZodNumber;
|
87
|
+
eval_duration: z.ZodNumber;
|
88
|
+
context: z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>;
|
89
|
+
}, "strip", z.ZodTypeAny, {
|
90
|
+
response: string;
|
91
|
+
model: string;
|
92
|
+
done: true;
|
93
|
+
created_at: string;
|
94
|
+
total_duration: number;
|
95
|
+
prompt_eval_count: number;
|
96
|
+
eval_count: number;
|
97
|
+
eval_duration: number;
|
98
|
+
load_duration?: number | undefined;
|
99
|
+
prompt_eval_duration?: number | undefined;
|
100
|
+
context?: number[] | undefined;
|
101
|
+
}, {
|
102
|
+
response: string;
|
103
|
+
model: string;
|
104
|
+
done: true;
|
105
|
+
created_at: string;
|
106
|
+
total_duration: number;
|
107
|
+
prompt_eval_count: number;
|
108
|
+
eval_count: number;
|
109
|
+
eval_duration: number;
|
110
|
+
load_duration?: number | undefined;
|
111
|
+
prompt_eval_duration?: number | undefined;
|
112
|
+
context?: number[] | undefined;
|
113
|
+
}>;
|
114
|
+
export type OllamaCompletionResponse = z.infer<typeof ollamaCompletionResponseSchema>;
|
115
|
+
export type OllamaCompletionDelta = {
|
116
|
+
content: string;
|
117
|
+
isComplete: boolean;
|
118
|
+
delta: string;
|
119
|
+
};
|
120
|
+
export type OllamaCompletionResponseFormatType<T> = {
|
121
|
+
stream: boolean;
|
122
|
+
handler: ResponseHandler<T>;
|
123
|
+
};
|
124
|
+
export declare const OllamaCompletionResponseFormat: {
|
125
|
+
/**
|
126
|
+
* Returns the response as a JSON object.
|
127
|
+
*/
|
128
|
+
json: {
|
129
|
+
stream: false;
|
130
|
+
handler: ({ response, url, requestBodyValues }: {
|
131
|
+
url: string;
|
132
|
+
requestBodyValues: unknown;
|
133
|
+
response: Response;
|
134
|
+
}) => Promise<{
|
135
|
+
response: string;
|
136
|
+
model: string;
|
137
|
+
done: true;
|
138
|
+
created_at: string;
|
139
|
+
total_duration: number;
|
140
|
+
prompt_eval_count: number;
|
141
|
+
eval_count: number;
|
142
|
+
eval_duration: number;
|
143
|
+
load_duration?: number | undefined;
|
144
|
+
prompt_eval_duration?: number | undefined;
|
145
|
+
context?: number[] | undefined;
|
146
|
+
}>;
|
147
|
+
};
|
148
|
+
/**
|
149
|
+
* Returns an async iterable over the full deltas (all choices, including full current state at time of event)
|
150
|
+
* of the response stream.
|
151
|
+
*/
|
152
|
+
deltaIterable: {
|
153
|
+
stream: true;
|
154
|
+
handler: ({ response }: {
|
155
|
+
response: Response;
|
156
|
+
}) => Promise<AsyncIterable<Delta<string>>>;
|
157
|
+
};
|
158
|
+
};
|
159
|
+
export {};
|
@@ -12,7 +12,7 @@ import { AsyncQueue } from "../../util/AsyncQueue.js";
|
|
12
12
|
import { parseJsonStream } from "../../util/streaming/parseJsonStream.js";
|
13
13
|
import { OllamaApiConfiguration } from "./OllamaApiConfiguration.js";
|
14
14
|
import { failedOllamaCallResponseHandler } from "./OllamaError.js";
|
15
|
-
export class
|
15
|
+
export class OllamaCompletionModel extends AbstractModel {
|
16
16
|
constructor(settings) {
|
17
17
|
super({ settings });
|
18
18
|
Object.defineProperty(this, "provider", {
|
@@ -63,7 +63,7 @@ export class OllamaTextGenerationModel extends AbstractModel {
|
|
63
63
|
num_ctx: this.settings.contextWindowSize,
|
64
64
|
num_gpu: this.settings.numGpu,
|
65
65
|
num_gqa: this.settings.numGqa,
|
66
|
-
num_predict: this.settings.
|
66
|
+
num_predict: this.settings.maxGenerationTokens,
|
67
67
|
num_threads: this.settings.numThreads,
|
68
68
|
repeat_last_n: this.settings.repeatLastN,
|
69
69
|
repeat_penalty: this.settings.repeatPenalty,
|
@@ -87,7 +87,7 @@ export class OllamaTextGenerationModel extends AbstractModel {
|
|
87
87
|
}
|
88
88
|
get settingsForEvent() {
|
89
89
|
const eventSettingProperties = [
|
90
|
-
"
|
90
|
+
"maxGenerationTokens",
|
91
91
|
"stopSequences",
|
92
92
|
"contextWindowSize",
|
93
93
|
"temperature",
|
@@ -111,20 +111,20 @@ export class OllamaTextGenerationModel extends AbstractModel {
|
|
111
111
|
];
|
112
112
|
return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
|
113
113
|
}
|
114
|
-
async
|
114
|
+
async doGenerateTexts(prompt, options) {
|
115
115
|
const response = await this.callAPI(prompt, {
|
116
116
|
...options,
|
117
|
-
responseFormat:
|
117
|
+
responseFormat: OllamaCompletionResponseFormat.json,
|
118
118
|
});
|
119
119
|
return {
|
120
120
|
response,
|
121
|
-
|
121
|
+
texts: [response.response],
|
122
122
|
};
|
123
123
|
}
|
124
124
|
doStreamText(prompt, options) {
|
125
125
|
return this.callAPI(prompt, {
|
126
126
|
...options,
|
127
|
-
responseFormat:
|
127
|
+
responseFormat: OllamaCompletionResponseFormat.deltaIterable,
|
128
128
|
});
|
129
129
|
}
|
130
130
|
asToolCallGenerationModel(promptTemplate) {
|
@@ -159,21 +159,23 @@ export class OllamaTextGenerationModel extends AbstractModel {
|
|
159
159
|
});
|
160
160
|
}
|
161
161
|
withSettings(additionalSettings) {
|
162
|
-
return new
|
162
|
+
return new OllamaCompletionModel(Object.assign({}, this.settings, additionalSettings));
|
163
163
|
}
|
164
164
|
}
|
165
|
-
const
|
165
|
+
const ollamaCompletionResponseSchema = z.object({
|
166
166
|
done: z.literal(true),
|
167
167
|
model: z.string(),
|
168
|
+
created_at: z.string(),
|
168
169
|
response: z.string(),
|
169
170
|
total_duration: z.number(),
|
170
171
|
load_duration: z.number().optional(),
|
171
172
|
prompt_eval_count: z.number(),
|
173
|
+
prompt_eval_duration: z.number().optional(),
|
172
174
|
eval_count: z.number(),
|
173
175
|
eval_duration: z.number(),
|
174
176
|
context: z.array(z.number()).optional(),
|
175
177
|
});
|
176
|
-
const
|
178
|
+
const ollamaCompletionStreamSchema = new ZodSchema(z.discriminatedUnion("done", [
|
177
179
|
z.object({
|
178
180
|
done: z.literal(false),
|
179
181
|
model: z.string(),
|
@@ -201,7 +203,7 @@ async function createOllamaFullDeltaIterableQueue(stream) {
|
|
201
203
|
// process the stream asynchonously (no 'await' on purpose):
|
202
204
|
parseJsonStream({
|
203
205
|
stream,
|
204
|
-
schema:
|
206
|
+
schema: ollamaCompletionStreamSchema,
|
205
207
|
process(event) {
|
206
208
|
if (event.done === true) {
|
207
209
|
queue.push({
|
@@ -233,7 +235,7 @@ async function createOllamaFullDeltaIterableQueue(stream) {
|
|
233
235
|
});
|
234
236
|
return queue;
|
235
237
|
}
|
236
|
-
export const
|
238
|
+
export const OllamaCompletionResponseFormat = {
|
237
239
|
/**
|
238
240
|
* Returns the response as a JSON object.
|
239
241
|
*/
|
@@ -244,7 +246,7 @@ export const OllamaTextGenerationResponseFormat = {
|
|
244
246
|
const parsedResult = safeParseJSON({
|
245
247
|
text: responseBody,
|
246
248
|
schema: new ZodSchema(z.union([
|
247
|
-
|
249
|
+
ollamaCompletionResponseSchema,
|
248
250
|
z.object({
|
249
251
|
done: z.literal(false),
|
250
252
|
model: z.string(),
|
package/model-provider/ollama/{OllamaTextGenerationModel.test.cjs → OllamaCompletionModel.test.cjs}
RENAMED
@@ -7,7 +7,7 @@ const ApiCallError_js_1 = require("../../core/api/ApiCallError.cjs");
|
|
7
7
|
const retryNever_js_1 = require("../../core/api/retryNever.cjs");
|
8
8
|
const generateText_js_1 = require("../../model-function/generate-text/generateText.cjs");
|
9
9
|
const OllamaApiConfiguration_js_1 = require("./OllamaApiConfiguration.cjs");
|
10
|
-
const
|
10
|
+
const OllamaCompletionModel_js_1 = require("./OllamaCompletionModel.cjs");
|
11
11
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
12
12
|
let responseBodyJson = {};
|
13
13
|
const server = (0, node_1.setupServer)(msw_1.http.post("http://127.0.0.1:11434/api/generate", () => msw_1.HttpResponse.json(responseBodyJson)));
|
@@ -34,7 +34,7 @@ describe("generateText", () => {
|
|
34
34
|
eval_count: 113,
|
35
35
|
eval_duration: 1325948000,
|
36
36
|
};
|
37
|
-
const result = await (0, generateText_js_1.generateText)(new
|
37
|
+
const result = await (0, generateText_js_1.generateText)(new OllamaCompletionModel_js_1.OllamaCompletionModel({
|
38
38
|
model: "test-model",
|
39
39
|
}).withTextPrompt(), "test prompt");
|
40
40
|
expect(result).toEqual("test response");
|
@@ -47,7 +47,7 @@ describe("generateText", () => {
|
|
47
47
|
done: false,
|
48
48
|
};
|
49
49
|
try {
|
50
|
-
await (0, generateText_js_1.generateText)(new
|
50
|
+
await (0, generateText_js_1.generateText)(new OllamaCompletionModel_js_1.OllamaCompletionModel({
|
51
51
|
api: new OllamaApiConfiguration_js_1.OllamaApiConfiguration({
|
52
52
|
retry: (0, retryNever_js_1.retryNever)(),
|
53
53
|
}),
|
package/model-provider/ollama/{OllamaTextGenerationModel.test.js → OllamaCompletionModel.test.js}
RENAMED
@@ -5,7 +5,7 @@ import { ApiCallError } from "../../core/api/ApiCallError.js";
|
|
5
5
|
import { retryNever } from "../../core/api/retryNever.js";
|
6
6
|
import { generateText } from "../../model-function/generate-text/generateText.js";
|
7
7
|
import { OllamaApiConfiguration } from "./OllamaApiConfiguration.js";
|
8
|
-
import {
|
8
|
+
import { OllamaCompletionModel } from "./OllamaCompletionModel.js";
|
9
9
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
10
10
|
let responseBodyJson = {};
|
11
11
|
const server = setupServer(http.post("http://127.0.0.1:11434/api/generate", () => HttpResponse.json(responseBodyJson)));
|
@@ -32,7 +32,7 @@ describe("generateText", () => {
|
|
32
32
|
eval_count: 113,
|
33
33
|
eval_duration: 1325948000,
|
34
34
|
};
|
35
|
-
const result = await generateText(new
|
35
|
+
const result = await generateText(new OllamaCompletionModel({
|
36
36
|
model: "test-model",
|
37
37
|
}).withTextPrompt(), "test prompt");
|
38
38
|
expect(result).toEqual("test response");
|
@@ -45,7 +45,7 @@ describe("generateText", () => {
|
|
45
45
|
done: false,
|
46
46
|
};
|
47
47
|
try {
|
48
|
-
await generateText(new
|
48
|
+
await generateText(new OllamaCompletionModel({
|
49
49
|
api: new OllamaApiConfiguration({
|
50
50
|
retry: retryNever(),
|
51
51
|
}),
|
@@ -1,12 +1,22 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
-
exports.TextEmbedder = exports.
|
3
|
+
exports.TextEmbedder = exports.ChatTextGenerator = exports.CompletionTextGenerator = exports.Api = void 0;
|
4
|
+
const OllamaChatModel_js_1 = require("./OllamaChatModel.cjs");
|
4
5
|
const OllamaTextEmbeddingModel_js_1 = require("./OllamaTextEmbeddingModel.cjs");
|
5
|
-
const
|
6
|
-
|
7
|
-
|
6
|
+
const OllamaCompletionModel_js_1 = require("./OllamaCompletionModel.cjs");
|
7
|
+
const OllamaApiConfiguration_js_1 = require("./OllamaApiConfiguration.cjs");
|
8
|
+
function Api(settings) {
|
9
|
+
return new OllamaApiConfiguration_js_1.OllamaApiConfiguration(settings);
|
8
10
|
}
|
9
|
-
exports.
|
11
|
+
exports.Api = Api;
|
12
|
+
function CompletionTextGenerator(settings) {
|
13
|
+
return new OllamaCompletionModel_js_1.OllamaCompletionModel(settings);
|
14
|
+
}
|
15
|
+
exports.CompletionTextGenerator = CompletionTextGenerator;
|
16
|
+
function ChatTextGenerator(settings) {
|
17
|
+
return new OllamaChatModel_js_1.OllamaChatModel(settings);
|
18
|
+
}
|
19
|
+
exports.ChatTextGenerator = ChatTextGenerator;
|
10
20
|
function TextEmbedder(settings) {
|
11
21
|
return new OllamaTextEmbeddingModel_js_1.OllamaTextEmbeddingModel(settings);
|
12
22
|
}
|
@@ -1,4 +1,9 @@
|
|
1
|
+
import { OllamaChatModel, OllamaChatModelSettings } from "./OllamaChatModel.js";
|
1
2
|
import { OllamaTextEmbeddingModel, OllamaTextEmbeddingModelSettings } from "./OllamaTextEmbeddingModel.js";
|
2
|
-
import {
|
3
|
-
|
3
|
+
import { OllamaCompletionModel, OllamaCompletionModelSettings } from "./OllamaCompletionModel.js";
|
4
|
+
import { OllamaApiConfiguration, OllamaApiConfigurationSettings } from "./OllamaApiConfiguration.js";
|
5
|
+
export declare function Api(settings: OllamaApiConfigurationSettings): OllamaApiConfiguration;
|
6
|
+
export declare function CompletionTextGenerator<CONTEXT_WINDOW_SIZE extends number>(settings: OllamaCompletionModelSettings<CONTEXT_WINDOW_SIZE>): OllamaCompletionModel<CONTEXT_WINDOW_SIZE>;
|
7
|
+
export declare function ChatTextGenerator(settings: OllamaChatModelSettings): OllamaChatModel;
|
4
8
|
export declare function TextEmbedder(settings: OllamaTextEmbeddingModelSettings): OllamaTextEmbeddingModel;
|
9
|
+
export { OllamaChatMessage as ChatMessage, OllamaChatPrompt as ChatPrompt, } from "./OllamaChatModel.js";
|
@@ -1,7 +1,15 @@
|
|
1
|
+
import { OllamaChatModel } from "./OllamaChatModel.js";
|
1
2
|
import { OllamaTextEmbeddingModel, } from "./OllamaTextEmbeddingModel.js";
|
2
|
-
import {
|
3
|
-
|
4
|
-
|
3
|
+
import { OllamaCompletionModel, } from "./OllamaCompletionModel.js";
|
4
|
+
import { OllamaApiConfiguration, } from "./OllamaApiConfiguration.js";
|
5
|
+
export function Api(settings) {
|
6
|
+
return new OllamaApiConfiguration(settings);
|
7
|
+
}
|
8
|
+
export function CompletionTextGenerator(settings) {
|
9
|
+
return new OllamaCompletionModel(settings);
|
10
|
+
}
|
11
|
+
export function ChatTextGenerator(settings) {
|
12
|
+
return new OllamaChatModel(settings);
|
5
13
|
}
|
6
14
|
export function TextEmbedder(settings) {
|
7
15
|
return new OllamaTextEmbeddingModel(settings);
|
@@ -0,0 +1,87 @@
|
|
1
|
+
import { TextGenerationModelSettings } from "../../model-function/generate-text/TextGenerationModel.js";
|
2
|
+
export interface OllamaTextGenerationSettings extends TextGenerationModelSettings {
|
3
|
+
/**
|
4
|
+
* The name of the model to use. For example, 'mistral'.
|
5
|
+
*
|
6
|
+
* @see https://ollama.ai/library
|
7
|
+
*/
|
8
|
+
model: string;
|
9
|
+
/**
|
10
|
+
* The temperature of the model. Increasing the temperature will make the model
|
11
|
+
* answer more creatively. (Default: 0.8)
|
12
|
+
*/
|
13
|
+
temperature?: number;
|
14
|
+
/**
|
15
|
+
* Enable Mirostat sampling for controlling perplexity.
|
16
|
+
* (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
|
17
|
+
*/
|
18
|
+
mirostat?: number;
|
19
|
+
/**
|
20
|
+
* Influences how quickly the algorithm responds to feedback from the generated text.
|
21
|
+
* A lower learning rate will result in slower adjustments,
|
22
|
+
* while a higher learning rate will make the algorithm more responsive. (Default: 0.1)
|
23
|
+
*/
|
24
|
+
mirostatEta?: number;
|
25
|
+
/**
|
26
|
+
* Controls the balance between coherence and diversity of the output.
|
27
|
+
* A lower value will result in more focused and coherent text. (Default: 5.0)
|
28
|
+
*/
|
29
|
+
mirostatTau?: number;
|
30
|
+
/**
|
31
|
+
* The number of GQA groups in the transformer layer. Required for some models,
|
32
|
+
* for example it is 8 for llama2:70b
|
33
|
+
*/
|
34
|
+
numGqa?: number;
|
35
|
+
/**
|
36
|
+
* The number of layers to send to the GPU(s). On macOS it defaults to 1 to
|
37
|
+
* enable metal support, 0 to disable.
|
38
|
+
*/
|
39
|
+
numGpu?: number;
|
40
|
+
/**
|
41
|
+
* Sets the number of threads to use during computation. By default, Ollama will
|
42
|
+
* detect this for optimal performance. It is recommended to set this value to the
|
43
|
+
* number of physical CPU cores your system has (as opposed to the logical number of cores).
|
44
|
+
*/
|
45
|
+
numThreads?: number;
|
46
|
+
/**
|
47
|
+
* Sets how far back for the model to look back to prevent repetition.
|
48
|
+
* (Default: 64, 0 = disabled, -1 = num_ctx)
|
49
|
+
*/
|
50
|
+
repeatLastN?: number;
|
51
|
+
/**
|
52
|
+
* Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
53
|
+
* will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
54
|
+
* will be more lenient. (Default: 1.1)
|
55
|
+
*/
|
56
|
+
repeatPenalty?: number;
|
57
|
+
/**
|
58
|
+
* Sets the random number seed to use for generation. Setting this to a
|
59
|
+
* specific number will make the model generate the same text for the same prompt.
|
60
|
+
* (Default: 0)
|
61
|
+
*/
|
62
|
+
seed?: number;
|
63
|
+
/**
|
64
|
+
* Tail free sampling is used to reduce the impact of less probable tokens
|
65
|
+
* from the output. A higher value (e.g., 2.0) will reduce the impact more,
|
66
|
+
* while a value of 1.0 disables this setting. (default: 1)
|
67
|
+
*/
|
68
|
+
tfsZ?: number;
|
69
|
+
/**
|
70
|
+
* Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
71
|
+
* will give more diverse answers, while a lower value (e.g. 10) will be more
|
72
|
+
* conservative. (Default: 40)
|
73
|
+
*/
|
74
|
+
topK?: number;
|
75
|
+
/**
|
76
|
+
* Works together with top-k. A higher value (e.g., 0.95) will lead to more
|
77
|
+
* diverse text, while a lower value (e.g., 0.5) will generate more focused
|
78
|
+
* and conservative text. (Default: 0.9)
|
79
|
+
*/
|
80
|
+
topP?: number;
|
81
|
+
/**
|
82
|
+
* The format to return a response in. Currently the only accepted value is 'json'.
|
83
|
+
* Leave undefined to return a string.
|
84
|
+
*/
|
85
|
+
format?: "json";
|
86
|
+
template?: string;
|
87
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|
@@ -28,6 +28,9 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|
28
28
|
Object.defineProperty(exports, "__esModule", { value: true });
|
29
29
|
exports.ollama = void 0;
|
30
30
|
__exportStar(require("./OllamaApiConfiguration.cjs"), exports);
|
31
|
+
__exportStar(require("./OllamaChatModel.cjs"), exports);
|
32
|
+
__exportStar(require("./OllamaChatPromptTemplate.cjs"), exports);
|
33
|
+
__exportStar(require("./OllamaCompletionModel.cjs"), exports);
|
31
34
|
exports.ollama = __importStar(require("./OllamaFacade.cjs"));
|
32
35
|
__exportStar(require("./OllamaTextEmbeddingModel.cjs"), exports);
|
33
|
-
__exportStar(require("./
|
36
|
+
__exportStar(require("./OllamaTextGenerationSettings.cjs"), exports);
|
@@ -1,5 +1,8 @@
|
|
1
1
|
export * from "./OllamaApiConfiguration.js";
|
2
|
+
export * from "./OllamaChatModel.js";
|
3
|
+
export * from "./OllamaChatPromptTemplate.js";
|
4
|
+
export * from "./OllamaCompletionModel.js";
|
2
5
|
export { OllamaErrorData } from "./OllamaError.js";
|
3
6
|
export * as ollama from "./OllamaFacade.js";
|
4
7
|
export * from "./OllamaTextEmbeddingModel.js";
|
5
|
-
export * from "./
|
8
|
+
export * from "./OllamaTextGenerationSettings.js";
|
@@ -1,4 +1,7 @@
|
|
1
1
|
export * from "./OllamaApiConfiguration.js";
|
2
|
+
export * from "./OllamaChatModel.js";
|
3
|
+
export * from "./OllamaChatPromptTemplate.js";
|
4
|
+
export * from "./OllamaCompletionModel.js";
|
2
5
|
export * as ollama from "./OllamaFacade.js";
|
3
6
|
export * from "./OllamaTextEmbeddingModel.js";
|
4
|
-
export * from "./
|
7
|
+
export * from "./OllamaTextGenerationSettings.js";
|
@@ -137,7 +137,7 @@ exports.calculateOpenAICompletionCostInMillicents = calculateOpenAICompletionCos
|
|
137
137
|
* const model = new OpenAICompletionModel({
|
138
138
|
* model: "gpt-3.5-turbo-instruct",
|
139
139
|
* temperature: 0.7,
|
140
|
-
*
|
140
|
+
* maxGenerationTokens: 500,
|
141
141
|
* retry: retryWithExponentialBackoff({ maxTries: 5 }),
|
142
142
|
* });
|
143
143
|
*
|
@@ -180,33 +180,60 @@ class OpenAICompletionModel extends AbstractModel_js_1.AbstractModel {
|
|
180
180
|
return (0, countTokens_js_1.countTokens)(this.tokenizer, input);
|
181
181
|
}
|
182
182
|
async callAPI(prompt, options) {
|
183
|
-
const
|
184
|
-
const
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
maxTokens: this.settings.maxCompletionTokens,
|
191
|
-
// other settings:
|
192
|
-
abortSignal: run?.abortSignal,
|
193
|
-
prompt,
|
194
|
-
responseFormat,
|
195
|
-
};
|
183
|
+
const api = this.settings.api ?? new OpenAIApiConfiguration_js_1.OpenAIApiConfiguration();
|
184
|
+
const user = this.settings.isUserIdForwardingEnabled
|
185
|
+
? options.run?.userId
|
186
|
+
: undefined;
|
187
|
+
const abortSignal = options.run?.abortSignal;
|
188
|
+
let { stopSequences } = this.settings;
|
189
|
+
const openaiResponseFormat = options.responseFormat;
|
196
190
|
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
197
|
-
retry:
|
198
|
-
throttle:
|
199
|
-
call: async () =>
|
191
|
+
retry: api.retry,
|
192
|
+
throttle: api.throttle,
|
193
|
+
call: async () => {
|
194
|
+
// empty arrays are not allowed for stop:
|
195
|
+
if (stopSequences != null &&
|
196
|
+
Array.isArray(stopSequences) &&
|
197
|
+
stopSequences.length === 0) {
|
198
|
+
stopSequences = undefined;
|
199
|
+
}
|
200
|
+
return (0, postToApi_js_1.postJsonToApi)({
|
201
|
+
url: api.assembleUrl("/completions"),
|
202
|
+
headers: api.headers,
|
203
|
+
body: {
|
204
|
+
stream: openaiResponseFormat.stream,
|
205
|
+
model: this.settings.model,
|
206
|
+
prompt,
|
207
|
+
suffix: this.settings.suffix,
|
208
|
+
max_tokens: this.settings.maxGenerationTokens,
|
209
|
+
temperature: this.settings.temperature,
|
210
|
+
top_p: this.settings.topP,
|
211
|
+
n: this.settings.numberOfGenerations,
|
212
|
+
logprobs: this.settings.logprobs,
|
213
|
+
echo: this.settings.echo,
|
214
|
+
stop: this.settings.stopSequences,
|
215
|
+
seed: this.settings.seed,
|
216
|
+
presence_penalty: this.settings.presencePenalty,
|
217
|
+
frequency_penalty: this.settings.frequencyPenalty,
|
218
|
+
best_of: this.settings.bestOf,
|
219
|
+
logit_bias: this.settings.logitBias,
|
220
|
+
user,
|
221
|
+
},
|
222
|
+
failedResponseHandler: OpenAIError_js_1.failedOpenAICallResponseHandler,
|
223
|
+
successfulResponseHandler: openaiResponseFormat.handler,
|
224
|
+
abortSignal,
|
225
|
+
});
|
226
|
+
},
|
200
227
|
});
|
201
228
|
}
|
202
229
|
get settingsForEvent() {
|
203
230
|
const eventSettingProperties = [
|
204
|
-
"
|
231
|
+
"maxGenerationTokens",
|
205
232
|
"stopSequences",
|
233
|
+
"numberOfGenerations",
|
206
234
|
"suffix",
|
207
235
|
"temperature",
|
208
236
|
"topP",
|
209
|
-
"n",
|
210
237
|
"logprobs",
|
211
238
|
"echo",
|
212
239
|
"presencePenalty",
|
@@ -217,14 +244,14 @@ class OpenAICompletionModel extends AbstractModel_js_1.AbstractModel {
|
|
217
244
|
];
|
218
245
|
return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
|
219
246
|
}
|
220
|
-
async
|
247
|
+
async doGenerateTexts(prompt, options) {
|
221
248
|
const response = await this.callAPI(prompt, {
|
222
249
|
...options,
|
223
250
|
responseFormat: exports.OpenAITextResponseFormat.json,
|
224
251
|
});
|
225
252
|
return {
|
226
253
|
response,
|
227
|
-
|
254
|
+
texts: response.choices.map((choice) => choice.text),
|
228
255
|
usage: {
|
229
256
|
promptTokens: response.usage.prompt_tokens,
|
230
257
|
completionTokens: response.usage.completion_tokens,
|
@@ -287,38 +314,6 @@ const OpenAICompletionResponseSchema = zod_1.z.object({
|
|
287
314
|
total_tokens: zod_1.z.number(),
|
288
315
|
}),
|
289
316
|
});
|
290
|
-
async function callOpenAICompletionAPI({ api = new OpenAIApiConfiguration_js_1.OpenAIApiConfiguration(), abortSignal, responseFormat, model, prompt, suffix, maxTokens, temperature, topP, n, logprobs, echo, stop, presencePenalty, frequencyPenalty, bestOf, logitBias, seed, user, }) {
|
291
|
-
// empty arrays are not allowed for stop:
|
292
|
-
if (stop != null && Array.isArray(stop) && stop.length === 0) {
|
293
|
-
stop = undefined;
|
294
|
-
}
|
295
|
-
return (0, postToApi_js_1.postJsonToApi)({
|
296
|
-
url: api.assembleUrl("/completions"),
|
297
|
-
headers: api.headers,
|
298
|
-
body: {
|
299
|
-
stream: responseFormat.stream,
|
300
|
-
model,
|
301
|
-
prompt,
|
302
|
-
suffix,
|
303
|
-
max_tokens: maxTokens,
|
304
|
-
temperature,
|
305
|
-
top_p: topP,
|
306
|
-
n,
|
307
|
-
logprobs,
|
308
|
-
echo,
|
309
|
-
stop,
|
310
|
-
seed,
|
311
|
-
presence_penalty: presencePenalty,
|
312
|
-
frequency_penalty: frequencyPenalty,
|
313
|
-
best_of: bestOf,
|
314
|
-
logit_bias: logitBias,
|
315
|
-
user,
|
316
|
-
},
|
317
|
-
failedResponseHandler: OpenAIError_js_1.failedOpenAICallResponseHandler,
|
318
|
-
successfulResponseHandler: responseFormat.handler,
|
319
|
-
abortSignal,
|
320
|
-
});
|
321
|
-
}
|
322
317
|
exports.OpenAITextResponseFormat = {
|
323
318
|
/**
|
324
319
|
* Returns the response as a JSON object.
|