modelfusion 0.54.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -2
- package/browser/readEventSourceStream.cjs +1 -1
- package/browser/readEventSourceStream.js +1 -1
- package/index.cjs +0 -1
- package/index.d.ts +0 -1
- package/index.js +0 -1
- package/model-provider/anthropic/AnthropicTextGenerationModel.cjs +1 -1
- package/model-provider/anthropic/AnthropicTextGenerationModel.js +1 -1
- package/model-provider/cohere/CohereTextGenerationModel.cjs +31 -50
- package/model-provider/cohere/CohereTextGenerationModel.js +31 -50
- package/model-provider/index.cjs +1 -0
- package/model-provider/index.d.ts +1 -0
- package/model-provider/index.js +1 -0
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.cjs +1 -1
- package/model-provider/llamacpp/LlamaCppTextGenerationModel.js +1 -1
- package/model-provider/ollama/OllamaApiConfiguration.cjs +15 -0
- package/model-provider/ollama/OllamaApiConfiguration.d.ts +10 -0
- package/model-provider/ollama/OllamaApiConfiguration.js +11 -0
- package/model-provider/ollama/OllamaError.cjs +29 -0
- package/model-provider/ollama/OllamaError.d.ts +22 -0
- package/model-provider/ollama/OllamaError.js +24 -0
- package/model-provider/ollama/OllamaTextGenerationModel.cjs +216 -0
- package/model-provider/ollama/OllamaTextGenerationModel.d.ts +134 -0
- package/model-provider/ollama/OllamaTextGenerationModel.js +212 -0
- package/model-provider/ollama/index.cjs +21 -0
- package/model-provider/ollama/index.d.ts +3 -0
- package/model-provider/ollama/index.js +3 -0
- package/model-provider/openai/OpenAICompletionModel.cjs +2 -2
- package/model-provider/openai/OpenAICompletionModel.js +2 -2
- package/model-provider/openai/chat/OpenAIChatStreamIterable.cjs +1 -1
- package/model-provider/openai/chat/OpenAIChatStreamIterable.js +1 -1
- package/package.json +1 -1
- package/util/index.cjs +1 -0
- package/util/index.d.ts +1 -0
- package/util/index.js +1 -0
- package/util/streaming/parseJsonStream.cjs +35 -0
- package/util/streaming/parseJsonStream.d.ts +6 -0
- package/util/streaming/parseJsonStream.js +31 -0
- /package/{event-source → util/streaming}/EventSourceParserStream.cjs +0 -0
- /package/{event-source → util/streaming}/EventSourceParserStream.d.ts +0 -0
- /package/{event-source → util/streaming}/EventSourceParserStream.js +0 -0
- /package/{event-source → util/streaming}/convertReadableStreamToAsyncIterable.cjs +0 -0
- /package/{event-source → util/streaming}/convertReadableStreamToAsyncIterable.d.ts +0 -0
- /package/{event-source → util/streaming}/convertReadableStreamToAsyncIterable.js +0 -0
- /package/{event-source → util/streaming}/createEventSourceStream.cjs +0 -0
- /package/{event-source → util/streaming}/createEventSourceStream.d.ts +0 -0
- /package/{event-source → util/streaming}/createEventSourceStream.js +0 -0
- /package/{event-source → util/streaming}/index.cjs +0 -0
- /package/{event-source → util/streaming}/index.d.ts +0 -0
- /package/{event-source → util/streaming}/index.js +0 -0
- /package/{event-source → util/streaming}/parseEventSourceStream.cjs +0 -0
- /package/{event-source → util/streaming}/parseEventSourceStream.d.ts +0 -0
- /package/{event-source → util/streaming}/parseEventSourceStream.js +0 -0
@@ -0,0 +1,216 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.OllamaTextGenerationResponseFormat = exports.OllamaTextGenerationModel = void 0;
|
4
|
+
const zod_1 = require("zod");
|
5
|
+
const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
|
6
|
+
const postToApi_js_1 = require("../../core/api/postToApi.cjs");
|
7
|
+
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
|
8
|
+
const PromptFormatTextStreamingModel_js_1 = require("../../model-function/generate-text/PromptFormatTextStreamingModel.cjs");
|
9
|
+
const AsyncQueue_js_1 = require("../../util/AsyncQueue.cjs");
|
10
|
+
const parseJsonStream_js_1 = require("../../util/streaming/parseJsonStream.cjs");
|
11
|
+
const OllamaApiConfiguration_js_1 = require("./OllamaApiConfiguration.cjs");
|
12
|
+
const OllamaError_js_1 = require("./OllamaError.cjs");
|
13
|
+
class OllamaTextGenerationModel extends AbstractModel_js_1.AbstractModel {
|
14
|
+
constructor(settings) {
|
15
|
+
super({ settings });
|
16
|
+
Object.defineProperty(this, "provider", {
|
17
|
+
enumerable: true,
|
18
|
+
configurable: true,
|
19
|
+
writable: true,
|
20
|
+
value: "ollama"
|
21
|
+
});
|
22
|
+
Object.defineProperty(this, "tokenizer", {
|
23
|
+
enumerable: true,
|
24
|
+
configurable: true,
|
25
|
+
writable: true,
|
26
|
+
value: undefined
|
27
|
+
});
|
28
|
+
Object.defineProperty(this, "countPromptTokens", {
|
29
|
+
enumerable: true,
|
30
|
+
configurable: true,
|
31
|
+
writable: true,
|
32
|
+
value: undefined
|
33
|
+
});
|
34
|
+
}
|
35
|
+
get modelName() {
|
36
|
+
return this.settings.model;
|
37
|
+
}
|
38
|
+
get contextWindowSize() {
|
39
|
+
return this.settings.contextWindowSize;
|
40
|
+
}
|
41
|
+
async callAPI(prompt, options) {
|
42
|
+
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
43
|
+
retry: this.settings.api?.retry,
|
44
|
+
throttle: this.settings.api?.throttle,
|
45
|
+
call: async () => callOllamaTextGenerationAPI({
|
46
|
+
...this.settings,
|
47
|
+
// other
|
48
|
+
abortSignal: options.run?.abortSignal,
|
49
|
+
prompt,
|
50
|
+
responseFormat: options.responseFormat,
|
51
|
+
}),
|
52
|
+
});
|
53
|
+
}
|
54
|
+
get settingsForEvent() {
|
55
|
+
const eventSettingProperties = [
|
56
|
+
// "maxCompletionTokens",
|
57
|
+
// "stopSequences",
|
58
|
+
"contextWindowSize",
|
59
|
+
];
|
60
|
+
return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
|
61
|
+
}
|
62
|
+
async doGenerateText(prompt, options) {
|
63
|
+
const response = await this.callAPI(prompt, {
|
64
|
+
...options,
|
65
|
+
responseFormat: exports.OllamaTextGenerationResponseFormat.json,
|
66
|
+
});
|
67
|
+
return {
|
68
|
+
response,
|
69
|
+
text: response.response,
|
70
|
+
};
|
71
|
+
}
|
72
|
+
doStreamText(prompt, options) {
|
73
|
+
return this.callAPI(prompt, {
|
74
|
+
...options,
|
75
|
+
responseFormat: exports.OllamaTextGenerationResponseFormat.deltaIterable,
|
76
|
+
});
|
77
|
+
}
|
78
|
+
withPromptFormat(promptFormat) {
|
79
|
+
return new PromptFormatTextStreamingModel_js_1.PromptFormatTextStreamingModel({
|
80
|
+
model: this.withSettings({
|
81
|
+
stopSequences: [
|
82
|
+
...(this.settings.stopSequences ?? []),
|
83
|
+
...promptFormat.stopSequences,
|
84
|
+
],
|
85
|
+
}),
|
86
|
+
promptFormat,
|
87
|
+
});
|
88
|
+
}
|
89
|
+
withSettings(additionalSettings) {
|
90
|
+
return new OllamaTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
91
|
+
}
|
92
|
+
}
|
93
|
+
exports.OllamaTextGenerationModel = OllamaTextGenerationModel;
|
94
|
+
const ollamaTextGenerationResponseSchema = zod_1.z.object({
|
95
|
+
done: zod_1.z.literal(true),
|
96
|
+
model: zod_1.z.string(),
|
97
|
+
response: zod_1.z.string(),
|
98
|
+
total_duration: zod_1.z.number(),
|
99
|
+
load_duration: zod_1.z.number(),
|
100
|
+
prompt_eval_count: zod_1.z.number(),
|
101
|
+
eval_count: zod_1.z.number(),
|
102
|
+
eval_duration: zod_1.z.number(),
|
103
|
+
context: zod_1.z.array(zod_1.z.number()),
|
104
|
+
});
|
105
|
+
const ollamaTextStreamingResponseSchema = zod_1.z.discriminatedUnion("done", [
|
106
|
+
zod_1.z.object({
|
107
|
+
done: zod_1.z.literal(false),
|
108
|
+
model: zod_1.z.string(),
|
109
|
+
created_at: zod_1.z.string(),
|
110
|
+
response: zod_1.z.string(),
|
111
|
+
}),
|
112
|
+
zod_1.z.object({
|
113
|
+
done: zod_1.z.literal(true),
|
114
|
+
model: zod_1.z.string(),
|
115
|
+
created_at: zod_1.z.string(),
|
116
|
+
total_duration: zod_1.z.number(),
|
117
|
+
load_duration: zod_1.z.number(),
|
118
|
+
sample_count: zod_1.z.number().optional(),
|
119
|
+
sample_duration: zod_1.z.number().optional(),
|
120
|
+
prompt_eval_count: zod_1.z.number(),
|
121
|
+
prompt_eval_duration: zod_1.z.number().optional(),
|
122
|
+
eval_count: zod_1.z.number(),
|
123
|
+
eval_duration: zod_1.z.number(),
|
124
|
+
context: zod_1.z.array(zod_1.z.number()),
|
125
|
+
}),
|
126
|
+
]);
|
127
|
+
async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration_js_1.OllamaApiConfiguration(), abortSignal, responseFormat, prompt, model, contextWindowSize, maxCompletionTokens, mirostat, mirostat_eta, mirostat_tau, num_gpu, num_gqa, num_threads, repeat_last_n, repeat_penalty, seed, stopSequences, temperature, tfs_z, top_k, top_p, system, template, context, }) {
|
128
|
+
return (0, postToApi_js_1.postJsonToApi)({
|
129
|
+
url: api.assembleUrl(`/api/generate`),
|
130
|
+
headers: api.headers,
|
131
|
+
body: {
|
132
|
+
stream: responseFormat.stream,
|
133
|
+
model,
|
134
|
+
prompt,
|
135
|
+
options: {
|
136
|
+
mirostat,
|
137
|
+
mirostat_eta,
|
138
|
+
mirostat_tau,
|
139
|
+
num_ctx: contextWindowSize,
|
140
|
+
num_gpu,
|
141
|
+
num_gqa,
|
142
|
+
num_predict: maxCompletionTokens,
|
143
|
+
num_threads,
|
144
|
+
repeat_last_n,
|
145
|
+
repeat_penalty,
|
146
|
+
seed,
|
147
|
+
stop: stopSequences,
|
148
|
+
temperature,
|
149
|
+
tfs_z,
|
150
|
+
top_k,
|
151
|
+
top_p,
|
152
|
+
},
|
153
|
+
system,
|
154
|
+
template,
|
155
|
+
context,
|
156
|
+
},
|
157
|
+
failedResponseHandler: OllamaError_js_1.failedOllamaCallResponseHandler,
|
158
|
+
successfulResponseHandler: responseFormat.handler,
|
159
|
+
abortSignal,
|
160
|
+
});
|
161
|
+
}
|
162
|
+
async function createOllamaFullDeltaIterableQueue(stream) {
|
163
|
+
const queue = new AsyncQueue_js_1.AsyncQueue();
|
164
|
+
let accumulatedText = "";
|
165
|
+
// process the stream asynchonously (no 'await' on purpose):
|
166
|
+
(0, parseJsonStream_js_1.parseJsonStream)({
|
167
|
+
stream,
|
168
|
+
schema: ollamaTextStreamingResponseSchema,
|
169
|
+
process(event) {
|
170
|
+
if (event.done === true) {
|
171
|
+
queue.push({
|
172
|
+
type: "delta",
|
173
|
+
fullDelta: {
|
174
|
+
content: accumulatedText,
|
175
|
+
isComplete: true,
|
176
|
+
delta: "",
|
177
|
+
},
|
178
|
+
valueDelta: "",
|
179
|
+
});
|
180
|
+
}
|
181
|
+
else {
|
182
|
+
accumulatedText += event.response;
|
183
|
+
queue.push({
|
184
|
+
type: "delta",
|
185
|
+
fullDelta: {
|
186
|
+
content: accumulatedText,
|
187
|
+
isComplete: false,
|
188
|
+
delta: event.response,
|
189
|
+
},
|
190
|
+
valueDelta: event.response,
|
191
|
+
});
|
192
|
+
}
|
193
|
+
},
|
194
|
+
onDone() {
|
195
|
+
queue.close();
|
196
|
+
},
|
197
|
+
});
|
198
|
+
return queue;
|
199
|
+
}
|
200
|
+
exports.OllamaTextGenerationResponseFormat = {
|
201
|
+
/**
|
202
|
+
* Returns the response as a JSON object.
|
203
|
+
*/
|
204
|
+
json: {
|
205
|
+
stream: false,
|
206
|
+
handler: (0, postToApi_js_1.createJsonResponseHandler)(ollamaTextGenerationResponseSchema),
|
207
|
+
},
|
208
|
+
/**
|
209
|
+
* Returns an async iterable over the full deltas (all choices, including full current state at time of event)
|
210
|
+
* of the response stream.
|
211
|
+
*/
|
212
|
+
deltaIterable: {
|
213
|
+
stream: true,
|
214
|
+
handler: async ({ response }) => createOllamaFullDeltaIterableQueue(response.body),
|
215
|
+
},
|
216
|
+
};
|
@@ -0,0 +1,134 @@
|
|
1
|
+
import { z } from "zod";
|
2
|
+
import { FunctionOptions } from "../../core/FunctionOptions.js";
|
3
|
+
import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
|
4
|
+
import { ResponseHandler } from "../../core/api/postToApi.js";
|
5
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
6
|
+
import { Delta } from "../../model-function/Delta.js";
|
7
|
+
import { PromptFormatTextStreamingModel } from "../../model-function/generate-text/PromptFormatTextStreamingModel.js";
|
8
|
+
import { TextGenerationModelSettings, TextStreamingModel } from "../../model-function/generate-text/TextGenerationModel.js";
|
9
|
+
import { TextGenerationPromptFormat } from "../../model-function/generate-text/TextGenerationPromptFormat.js";
|
10
|
+
export interface OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends TextGenerationModelSettings {
|
11
|
+
api?: ApiConfiguration;
|
12
|
+
model: string;
|
13
|
+
temperature?: number;
|
14
|
+
/**
|
15
|
+
* Specify the context window size of the model that you have loaded in your
|
16
|
+
* Ollama server.
|
17
|
+
*/
|
18
|
+
contextWindowSize?: CONTEXT_WINDOW_SIZE;
|
19
|
+
mirostat?: number;
|
20
|
+
mirostat_eta?: number;
|
21
|
+
mirostat_tau?: number;
|
22
|
+
num_gqa?: number;
|
23
|
+
num_gpu?: number;
|
24
|
+
num_threads?: number;
|
25
|
+
repeat_last_n?: number;
|
26
|
+
repeat_penalty?: number;
|
27
|
+
seed?: number;
|
28
|
+
tfs_z?: number;
|
29
|
+
top_k?: number;
|
30
|
+
top_p?: number;
|
31
|
+
system?: string;
|
32
|
+
template?: string;
|
33
|
+
context?: number[];
|
34
|
+
}
|
35
|
+
export declare class OllamaTextGenerationModel<CONTEXT_WINDOW_SIZE extends number | undefined> extends AbstractModel<OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> implements TextStreamingModel<string, OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>> {
|
36
|
+
constructor(settings: OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>);
|
37
|
+
readonly provider = "ollama";
|
38
|
+
get modelName(): string;
|
39
|
+
readonly tokenizer: undefined;
|
40
|
+
readonly countPromptTokens: undefined;
|
41
|
+
get contextWindowSize(): CONTEXT_WINDOW_SIZE;
|
42
|
+
callAPI<RESPONSE>(prompt: string, options: {
|
43
|
+
responseFormat: OllamaTextGenerationResponseFormatType<RESPONSE>;
|
44
|
+
} & FunctionOptions): Promise<RESPONSE>;
|
45
|
+
get settingsForEvent(): Partial<OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>;
|
46
|
+
doGenerateText(prompt: string, options?: FunctionOptions): Promise<{
|
47
|
+
response: {
|
48
|
+
response: string;
|
49
|
+
model: string;
|
50
|
+
done: true;
|
51
|
+
context: number[];
|
52
|
+
total_duration: number;
|
53
|
+
load_duration: number;
|
54
|
+
prompt_eval_count: number;
|
55
|
+
eval_count: number;
|
56
|
+
eval_duration: number;
|
57
|
+
};
|
58
|
+
text: string;
|
59
|
+
}>;
|
60
|
+
doStreamText(prompt: string, options?: FunctionOptions): Promise<AsyncIterable<Delta<string>>>;
|
61
|
+
withPromptFormat<INPUT_PROMPT>(promptFormat: TextGenerationPromptFormat<INPUT_PROMPT, string>): PromptFormatTextStreamingModel<INPUT_PROMPT, string, OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>, this>;
|
62
|
+
withSettings(additionalSettings: Partial<OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE>>): this;
|
63
|
+
}
|
64
|
+
declare const ollamaTextGenerationResponseSchema: z.ZodObject<{
|
65
|
+
done: z.ZodLiteral<true>;
|
66
|
+
model: z.ZodString;
|
67
|
+
response: z.ZodString;
|
68
|
+
total_duration: z.ZodNumber;
|
69
|
+
load_duration: z.ZodNumber;
|
70
|
+
prompt_eval_count: z.ZodNumber;
|
71
|
+
eval_count: z.ZodNumber;
|
72
|
+
eval_duration: z.ZodNumber;
|
73
|
+
context: z.ZodArray<z.ZodNumber, "many">;
|
74
|
+
}, "strip", z.ZodTypeAny, {
|
75
|
+
response: string;
|
76
|
+
model: string;
|
77
|
+
done: true;
|
78
|
+
context: number[];
|
79
|
+
total_duration: number;
|
80
|
+
load_duration: number;
|
81
|
+
prompt_eval_count: number;
|
82
|
+
eval_count: number;
|
83
|
+
eval_duration: number;
|
84
|
+
}, {
|
85
|
+
response: string;
|
86
|
+
model: string;
|
87
|
+
done: true;
|
88
|
+
context: number[];
|
89
|
+
total_duration: number;
|
90
|
+
load_duration: number;
|
91
|
+
prompt_eval_count: number;
|
92
|
+
eval_count: number;
|
93
|
+
eval_duration: number;
|
94
|
+
}>;
|
95
|
+
export type OllamaTextGenerationResponse = z.infer<typeof ollamaTextGenerationResponseSchema>;
|
96
|
+
export type OllamaTextGenerationDelta = {
|
97
|
+
content: string;
|
98
|
+
isComplete: boolean;
|
99
|
+
delta: string;
|
100
|
+
};
|
101
|
+
export type OllamaTextGenerationResponseFormatType<T> = {
|
102
|
+
stream: boolean;
|
103
|
+
handler: ResponseHandler<T>;
|
104
|
+
};
|
105
|
+
export declare const OllamaTextGenerationResponseFormat: {
|
106
|
+
/**
|
107
|
+
* Returns the response as a JSON object.
|
108
|
+
*/
|
109
|
+
json: {
|
110
|
+
stream: false;
|
111
|
+
handler: ResponseHandler<{
|
112
|
+
response: string;
|
113
|
+
model: string;
|
114
|
+
done: true;
|
115
|
+
context: number[];
|
116
|
+
total_duration: number;
|
117
|
+
load_duration: number;
|
118
|
+
prompt_eval_count: number;
|
119
|
+
eval_count: number;
|
120
|
+
eval_duration: number;
|
121
|
+
}>;
|
122
|
+
};
|
123
|
+
/**
|
124
|
+
* Returns an async iterable over the full deltas (all choices, including full current state at time of event)
|
125
|
+
* of the response stream.
|
126
|
+
*/
|
127
|
+
deltaIterable: {
|
128
|
+
stream: true;
|
129
|
+
handler: ({ response }: {
|
130
|
+
response: Response;
|
131
|
+
}) => Promise<AsyncIterable<Delta<string>>>;
|
132
|
+
};
|
133
|
+
};
|
134
|
+
export {};
|
@@ -0,0 +1,212 @@
|
|
1
|
+
import { z } from "zod";
|
2
|
+
import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
|
3
|
+
import { createJsonResponseHandler, postJsonToApi, } from "../../core/api/postToApi.js";
|
4
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
5
|
+
import { PromptFormatTextStreamingModel } from "../../model-function/generate-text/PromptFormatTextStreamingModel.js";
|
6
|
+
import { AsyncQueue } from "../../util/AsyncQueue.js";
|
7
|
+
import { parseJsonStream } from "../../util/streaming/parseJsonStream.js";
|
8
|
+
import { OllamaApiConfiguration } from "./OllamaApiConfiguration.js";
|
9
|
+
import { failedOllamaCallResponseHandler } from "./OllamaError.js";
|
10
|
+
export class OllamaTextGenerationModel extends AbstractModel {
|
11
|
+
constructor(settings) {
|
12
|
+
super({ settings });
|
13
|
+
Object.defineProperty(this, "provider", {
|
14
|
+
enumerable: true,
|
15
|
+
configurable: true,
|
16
|
+
writable: true,
|
17
|
+
value: "ollama"
|
18
|
+
});
|
19
|
+
Object.defineProperty(this, "tokenizer", {
|
20
|
+
enumerable: true,
|
21
|
+
configurable: true,
|
22
|
+
writable: true,
|
23
|
+
value: undefined
|
24
|
+
});
|
25
|
+
Object.defineProperty(this, "countPromptTokens", {
|
26
|
+
enumerable: true,
|
27
|
+
configurable: true,
|
28
|
+
writable: true,
|
29
|
+
value: undefined
|
30
|
+
});
|
31
|
+
}
|
32
|
+
get modelName() {
|
33
|
+
return this.settings.model;
|
34
|
+
}
|
35
|
+
get contextWindowSize() {
|
36
|
+
return this.settings.contextWindowSize;
|
37
|
+
}
|
38
|
+
async callAPI(prompt, options) {
|
39
|
+
return callWithRetryAndThrottle({
|
40
|
+
retry: this.settings.api?.retry,
|
41
|
+
throttle: this.settings.api?.throttle,
|
42
|
+
call: async () => callOllamaTextGenerationAPI({
|
43
|
+
...this.settings,
|
44
|
+
// other
|
45
|
+
abortSignal: options.run?.abortSignal,
|
46
|
+
prompt,
|
47
|
+
responseFormat: options.responseFormat,
|
48
|
+
}),
|
49
|
+
});
|
50
|
+
}
|
51
|
+
get settingsForEvent() {
|
52
|
+
const eventSettingProperties = [
|
53
|
+
// "maxCompletionTokens",
|
54
|
+
// "stopSequences",
|
55
|
+
"contextWindowSize",
|
56
|
+
];
|
57
|
+
return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
|
58
|
+
}
|
59
|
+
async doGenerateText(prompt, options) {
|
60
|
+
const response = await this.callAPI(prompt, {
|
61
|
+
...options,
|
62
|
+
responseFormat: OllamaTextGenerationResponseFormat.json,
|
63
|
+
});
|
64
|
+
return {
|
65
|
+
response,
|
66
|
+
text: response.response,
|
67
|
+
};
|
68
|
+
}
|
69
|
+
doStreamText(prompt, options) {
|
70
|
+
return this.callAPI(prompt, {
|
71
|
+
...options,
|
72
|
+
responseFormat: OllamaTextGenerationResponseFormat.deltaIterable,
|
73
|
+
});
|
74
|
+
}
|
75
|
+
withPromptFormat(promptFormat) {
|
76
|
+
return new PromptFormatTextStreamingModel({
|
77
|
+
model: this.withSettings({
|
78
|
+
stopSequences: [
|
79
|
+
...(this.settings.stopSequences ?? []),
|
80
|
+
...promptFormat.stopSequences,
|
81
|
+
],
|
82
|
+
}),
|
83
|
+
promptFormat,
|
84
|
+
});
|
85
|
+
}
|
86
|
+
withSettings(additionalSettings) {
|
87
|
+
return new OllamaTextGenerationModel(Object.assign({}, this.settings, additionalSettings));
|
88
|
+
}
|
89
|
+
}
|
90
|
+
const ollamaTextGenerationResponseSchema = z.object({
|
91
|
+
done: z.literal(true),
|
92
|
+
model: z.string(),
|
93
|
+
response: z.string(),
|
94
|
+
total_duration: z.number(),
|
95
|
+
load_duration: z.number(),
|
96
|
+
prompt_eval_count: z.number(),
|
97
|
+
eval_count: z.number(),
|
98
|
+
eval_duration: z.number(),
|
99
|
+
context: z.array(z.number()),
|
100
|
+
});
|
101
|
+
const ollamaTextStreamingResponseSchema = z.discriminatedUnion("done", [
|
102
|
+
z.object({
|
103
|
+
done: z.literal(false),
|
104
|
+
model: z.string(),
|
105
|
+
created_at: z.string(),
|
106
|
+
response: z.string(),
|
107
|
+
}),
|
108
|
+
z.object({
|
109
|
+
done: z.literal(true),
|
110
|
+
model: z.string(),
|
111
|
+
created_at: z.string(),
|
112
|
+
total_duration: z.number(),
|
113
|
+
load_duration: z.number(),
|
114
|
+
sample_count: z.number().optional(),
|
115
|
+
sample_duration: z.number().optional(),
|
116
|
+
prompt_eval_count: z.number(),
|
117
|
+
prompt_eval_duration: z.number().optional(),
|
118
|
+
eval_count: z.number(),
|
119
|
+
eval_duration: z.number(),
|
120
|
+
context: z.array(z.number()),
|
121
|
+
}),
|
122
|
+
]);
|
123
|
+
async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration(), abortSignal, responseFormat, prompt, model, contextWindowSize, maxCompletionTokens, mirostat, mirostat_eta, mirostat_tau, num_gpu, num_gqa, num_threads, repeat_last_n, repeat_penalty, seed, stopSequences, temperature, tfs_z, top_k, top_p, system, template, context, }) {
|
124
|
+
return postJsonToApi({
|
125
|
+
url: api.assembleUrl(`/api/generate`),
|
126
|
+
headers: api.headers,
|
127
|
+
body: {
|
128
|
+
stream: responseFormat.stream,
|
129
|
+
model,
|
130
|
+
prompt,
|
131
|
+
options: {
|
132
|
+
mirostat,
|
133
|
+
mirostat_eta,
|
134
|
+
mirostat_tau,
|
135
|
+
num_ctx: contextWindowSize,
|
136
|
+
num_gpu,
|
137
|
+
num_gqa,
|
138
|
+
num_predict: maxCompletionTokens,
|
139
|
+
num_threads,
|
140
|
+
repeat_last_n,
|
141
|
+
repeat_penalty,
|
142
|
+
seed,
|
143
|
+
stop: stopSequences,
|
144
|
+
temperature,
|
145
|
+
tfs_z,
|
146
|
+
top_k,
|
147
|
+
top_p,
|
148
|
+
},
|
149
|
+
system,
|
150
|
+
template,
|
151
|
+
context,
|
152
|
+
},
|
153
|
+
failedResponseHandler: failedOllamaCallResponseHandler,
|
154
|
+
successfulResponseHandler: responseFormat.handler,
|
155
|
+
abortSignal,
|
156
|
+
});
|
157
|
+
}
|
158
|
+
async function createOllamaFullDeltaIterableQueue(stream) {
|
159
|
+
const queue = new AsyncQueue();
|
160
|
+
let accumulatedText = "";
|
161
|
+
// process the stream asynchonously (no 'await' on purpose):
|
162
|
+
parseJsonStream({
|
163
|
+
stream,
|
164
|
+
schema: ollamaTextStreamingResponseSchema,
|
165
|
+
process(event) {
|
166
|
+
if (event.done === true) {
|
167
|
+
queue.push({
|
168
|
+
type: "delta",
|
169
|
+
fullDelta: {
|
170
|
+
content: accumulatedText,
|
171
|
+
isComplete: true,
|
172
|
+
delta: "",
|
173
|
+
},
|
174
|
+
valueDelta: "",
|
175
|
+
});
|
176
|
+
}
|
177
|
+
else {
|
178
|
+
accumulatedText += event.response;
|
179
|
+
queue.push({
|
180
|
+
type: "delta",
|
181
|
+
fullDelta: {
|
182
|
+
content: accumulatedText,
|
183
|
+
isComplete: false,
|
184
|
+
delta: event.response,
|
185
|
+
},
|
186
|
+
valueDelta: event.response,
|
187
|
+
});
|
188
|
+
}
|
189
|
+
},
|
190
|
+
onDone() {
|
191
|
+
queue.close();
|
192
|
+
},
|
193
|
+
});
|
194
|
+
return queue;
|
195
|
+
}
|
196
|
+
export const OllamaTextGenerationResponseFormat = {
|
197
|
+
/**
|
198
|
+
* Returns the response as a JSON object.
|
199
|
+
*/
|
200
|
+
json: {
|
201
|
+
stream: false,
|
202
|
+
handler: createJsonResponseHandler(ollamaTextGenerationResponseSchema),
|
203
|
+
},
|
204
|
+
/**
|
205
|
+
* Returns an async iterable over the full deltas (all choices, including full current state at time of event)
|
206
|
+
* of the response stream.
|
207
|
+
*/
|
208
|
+
deltaIterable: {
|
209
|
+
stream: true,
|
210
|
+
handler: async ({ response }) => createOllamaFullDeltaIterableQueue(response.body),
|
211
|
+
},
|
212
|
+
};
|
@@ -0,0 +1,21 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
3
|
+
if (k2 === undefined) k2 = k;
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
7
|
+
}
|
8
|
+
Object.defineProperty(o, k2, desc);
|
9
|
+
}) : (function(o, m, k, k2) {
|
10
|
+
if (k2 === undefined) k2 = k;
|
11
|
+
o[k2] = m[k];
|
12
|
+
}));
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
15
|
+
};
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
17
|
+
exports.OllamaError = void 0;
|
18
|
+
__exportStar(require("./OllamaApiConfiguration.cjs"), exports);
|
19
|
+
var OllamaError_js_1 = require("./OllamaError.cjs");
|
20
|
+
Object.defineProperty(exports, "OllamaError", { enumerable: true, get: function () { return OllamaError_js_1.OllamaError; } });
|
21
|
+
__exportStar(require("./OllamaTextGenerationModel.cjs"), exports);
|
@@ -4,13 +4,13 @@ exports.OpenAITextResponseFormat = exports.OpenAICompletionModel = exports.calcu
|
|
4
4
|
const zod_1 = require("zod");
|
5
5
|
const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
|
6
6
|
const postToApi_js_1 = require("../../core/api/postToApi.cjs");
|
7
|
-
const AsyncQueue_js_1 = require("../../util/AsyncQueue.cjs");
|
8
|
-
const parseEventSourceStream_js_1 = require("../../event-source/parseEventSourceStream.cjs");
|
9
7
|
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
|
10
8
|
const PromptFormatTextStreamingModel_js_1 = require("../../model-function/generate-text/PromptFormatTextStreamingModel.cjs");
|
11
9
|
const TextPromptFormat_js_1 = require("../../model-function/generate-text/TextPromptFormat.cjs");
|
12
10
|
const countTokens_js_1 = require("../../model-function/tokenize-text/countTokens.cjs");
|
11
|
+
const AsyncQueue_js_1 = require("../../util/AsyncQueue.cjs");
|
13
12
|
const parseJSON_js_1 = require("../../util/parseJSON.cjs");
|
13
|
+
const parseEventSourceStream_js_1 = require("../../util/streaming/parseEventSourceStream.cjs");
|
14
14
|
const OpenAIApiConfiguration_js_1 = require("./OpenAIApiConfiguration.cjs");
|
15
15
|
const OpenAIError_js_1 = require("./OpenAIError.cjs");
|
16
16
|
const TikTokenTokenizer_js_1 = require("./TikTokenTokenizer.cjs");
|
@@ -1,13 +1,13 @@
|
|
1
1
|
import { z } from "zod";
|
2
2
|
import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
|
3
3
|
import { createJsonResponseHandler, postJsonToApi, } from "../../core/api/postToApi.js";
|
4
|
-
import { AsyncQueue } from "../../util/AsyncQueue.js";
|
5
|
-
import { parseEventSourceStream } from "../../event-source/parseEventSourceStream.js";
|
6
4
|
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
7
5
|
import { PromptFormatTextStreamingModel } from "../../model-function/generate-text/PromptFormatTextStreamingModel.js";
|
8
6
|
import { mapChatPromptToTextFormat, mapInstructionPromptToTextFormat, } from "../../model-function/generate-text/TextPromptFormat.js";
|
9
7
|
import { countTokens } from "../../model-function/tokenize-text/countTokens.js";
|
8
|
+
import { AsyncQueue } from "../../util/AsyncQueue.js";
|
10
9
|
import { parseJsonWithZod } from "../../util/parseJSON.js";
|
10
|
+
import { parseEventSourceStream } from "../../util/streaming/parseEventSourceStream.js";
|
11
11
|
import { OpenAIApiConfiguration } from "./OpenAIApiConfiguration.js";
|
12
12
|
import { failedOpenAICallResponseHandler } from "./OpenAIError.js";
|
13
13
|
import { TikTokenTokenizer } from "./TikTokenTokenizer.js";
|
@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.createOpenAIChatDeltaIterableQueue = void 0;
|
4
4
|
const zod_1 = require("zod");
|
5
5
|
const AsyncQueue_js_1 = require("../../../util/AsyncQueue.cjs");
|
6
|
-
const parseEventSourceStream_js_1 = require("../../../
|
6
|
+
const parseEventSourceStream_js_1 = require("../../../util/streaming/parseEventSourceStream.cjs");
|
7
7
|
const parseJSON_js_1 = require("../../../util/parseJSON.cjs");
|
8
8
|
const chatResponseStreamEventSchema = zod_1.z.object({
|
9
9
|
choices: zod_1.z.array(zod_1.z.object({
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import { z } from "zod";
|
2
2
|
import { AsyncQueue } from "../../../util/AsyncQueue.js";
|
3
|
-
import { parseEventSourceStream } from "../../../
|
3
|
+
import { parseEventSourceStream } from "../../../util/streaming/parseEventSourceStream.js";
|
4
4
|
import { safeParseJsonWithZod } from "../../../util/parseJSON.js";
|
5
5
|
const chatResponseStreamEventSchema = z.object({
|
6
6
|
choices: z.array(z.object({
|
package/package.json
CHANGED
package/util/index.cjs
CHANGED
@@ -20,3 +20,4 @@ __exportStar(require("./cosineSimilarity.cjs"), exports);
|
|
20
20
|
__exportStar(require("./delay.cjs"), exports);
|
21
21
|
__exportStar(require("./getAudioFileExtension.cjs"), exports);
|
22
22
|
__exportStar(require("./parseJSON.cjs"), exports);
|
23
|
+
__exportStar(require("./streaming/index.cjs"), exports);
|
package/util/index.d.ts
CHANGED
package/util/index.js
CHANGED