modelfusion 0.65.1 → 0.67.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,11 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.streamText = void 0;
4
4
  const executeStreamCall_js_1 = require("../executeStreamCall.cjs");
5
5
  async function streamText(model, prompt, options) {
6
+ const shouldTrimWhitespace = model.settings.trimWhitespace ?? true;
6
7
  let accumulatedText = "";
7
8
  let lastFullDelta;
9
+ let isFirstDelta = true;
10
+ let trailingWhitespace = "";
8
11
  const fullResponse = await (0, executeStreamCall_js_1.executeStreamCall)({
9
12
  functionType: "stream-text",
10
13
  input: prompt,
@@ -13,8 +16,25 @@ async function streamText(model, prompt, options) {
13
16
  startStream: async (options) => model.doStreamText(prompt, options),
14
17
  processDelta: (delta) => {
15
18
  lastFullDelta = delta.fullDelta;
16
- const textDelta = delta.valueDelta;
19
+ let textDelta = delta.valueDelta;
17
20
  if (textDelta != null && textDelta.length > 0) {
21
+ if (shouldTrimWhitespace) {
22
+ if (isFirstDelta) {
23
+ // remove leading whitespace:
24
+ textDelta = textDelta.trimStart();
25
+ }
26
+ else {
27
+ // restore trailing whitespace from previous chunk:
28
+ textDelta = trailingWhitespace + textDelta;
29
+ }
30
+ // trim trailing whitespace and store it for the next chunk:
31
+ const trailingWhitespaceMatch = textDelta.match(/\s+$/);
32
+ trailingWhitespace = trailingWhitespaceMatch
33
+ ? trailingWhitespaceMatch[0]
34
+ : "";
35
+ textDelta = textDelta.trimEnd();
36
+ }
37
+ isFirstDelta = false;
18
38
  accumulatedText += textDelta;
19
39
  return textDelta;
20
40
  }
@@ -1,7 +1,10 @@
1
1
  import { executeStreamCall } from "../executeStreamCall.js";
2
2
  export async function streamText(model, prompt, options) {
3
+ const shouldTrimWhitespace = model.settings.trimWhitespace ?? true;
3
4
  let accumulatedText = "";
4
5
  let lastFullDelta;
6
+ let isFirstDelta = true;
7
+ let trailingWhitespace = "";
5
8
  const fullResponse = await executeStreamCall({
6
9
  functionType: "stream-text",
7
10
  input: prompt,
@@ -10,8 +13,25 @@ export async function streamText(model, prompt, options) {
10
13
  startStream: async (options) => model.doStreamText(prompt, options),
11
14
  processDelta: (delta) => {
12
15
  lastFullDelta = delta.fullDelta;
13
- const textDelta = delta.valueDelta;
16
+ let textDelta = delta.valueDelta;
14
17
  if (textDelta != null && textDelta.length > 0) {
18
+ if (shouldTrimWhitespace) {
19
+ if (isFirstDelta) {
20
+ // remove leading whitespace:
21
+ textDelta = textDelta.trimStart();
22
+ }
23
+ else {
24
+ // restore trailing whitespace from previous chunk:
25
+ textDelta = trailingWhitespace + textDelta;
26
+ }
27
+ // trim trailing whitespace and store it for the next chunk:
28
+ const trailingWhitespaceMatch = textDelta.match(/\s+$/);
29
+ trailingWhitespace = trailingWhitespaceMatch
30
+ ? trailingWhitespaceMatch[0]
31
+ : "";
32
+ textDelta = textDelta.trimEnd();
33
+ }
34
+ isFirstDelta = false;
15
35
  accumulatedText += textDelta;
16
36
  return textDelta;
17
37
  }
@@ -72,6 +72,8 @@ class OllamaTextGenerationModel extends AbstractModel_js_1.AbstractModel {
72
72
  "system",
73
73
  "template",
74
74
  "context",
75
+ "format",
76
+ "raw",
75
77
  ];
76
78
  return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
77
79
  }
@@ -140,7 +142,7 @@ const ollamaTextStreamingResponseSchema = zod_1.z.discriminatedUnion("done", [
140
142
  context: zod_1.z.array(zod_1.z.number()),
141
143
  }),
142
144
  ]);
143
- async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration_js_1.OllamaApiConfiguration(), abortSignal, responseFormat, prompt, model, contextWindowSize, maxCompletionTokens, mirostat, mirostat_eta, mirostat_tau, num_gpu, num_gqa, num_threads, repeat_last_n, repeat_penalty, seed, stopSequences, temperature, tfs_z, top_k, top_p, system, template, context, }) {
145
+ async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration_js_1.OllamaApiConfiguration(), abortSignal, responseFormat, prompt, model, format, contextWindowSize, maxCompletionTokens, mirostat, mirostat_eta, mirostat_tau, num_gpu, num_gqa, num_threads, repeat_last_n, repeat_penalty, seed, stopSequences, temperature, tfs_z, top_k, top_p, system, template, context, raw, }) {
144
146
  return (0, postToApi_js_1.postJsonToApi)({
145
147
  url: api.assembleUrl(`/api/generate`),
146
148
  headers: api.headers,
@@ -148,6 +150,7 @@ async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration_js
148
150
  stream: responseFormat.stream,
149
151
  model,
150
152
  prompt,
153
+ format,
151
154
  options: {
152
155
  mirostat,
153
156
  mirostat_eta,
@@ -169,6 +172,7 @@ async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration_js
169
172
  system,
170
173
  template,
171
174
  context,
175
+ raw,
172
176
  },
173
177
  failedResponseHandler: OllamaError_js_1.failedOllamaCallResponseHandler,
174
178
  successfulResponseHandler: responseFormat.handler,
@@ -7,27 +7,104 @@ import { Delta } from "../../model-function/Delta.js";
7
7
  import { PromptFormatTextStreamingModel } from "../../model-function/generate-text/PromptFormatTextStreamingModel.js";
8
8
  import { TextGenerationModelSettings, TextStreamingModel } from "../../model-function/generate-text/TextGenerationModel.js";
9
9
  import { TextGenerationPromptFormat } from "../../model-function/generate-text/TextGenerationPromptFormat.js";
10
+ /**
11
+ * @see https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
12
+ */
10
13
  export interface OllamaTextGenerationModelSettings<CONTEXT_WINDOW_SIZE extends number | undefined> extends TextGenerationModelSettings {
11
14
  api?: ApiConfiguration;
15
+ /**
16
+ * The name of the model to use. For example, 'mistral'.
17
+ *
18
+ * @see https://ollama.ai/library
19
+ */
12
20
  model: string;
21
+ /**
22
+ * The temperature of the model. Increasing the temperature will make the model
23
+ * answer more creatively. (Default: 0.8)
24
+ */
13
25
  temperature?: number;
14
26
  /**
15
27
  * Specify the context window size of the model that you have loaded in your
16
- * Ollama server.
28
+ * Ollama server. (Default: 2048)
17
29
  */
18
30
  contextWindowSize?: CONTEXT_WINDOW_SIZE;
31
+ /**
32
+ * Enable Mirostat sampling for controlling perplexity.
33
+ * (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
34
+ */
19
35
  mirostat?: number;
36
+ /**
37
+ * Influences how quickly the algorithm responds to feedback from the generated text.
38
+ * A lower learning rate will result in slower adjustments,
39
+ * while a higher learning rate will make the algorithm more responsive. (Default: 0.1)
40
+ */
20
41
  mirostat_eta?: number;
42
+ /**
43
+ * Controls the balance between coherence and diversity of the output.
44
+ * A lower value will result in more focused and coherent text. (Default: 5.0)
45
+ */
21
46
  mirostat_tau?: number;
47
+ /**
48
+ * The number of GQA groups in the transformer layer. Required for some models,
49
+ * for example it is 8 for llama2:70b
50
+ */
22
51
  num_gqa?: number;
52
+ /**
53
+ * The number of layers to send to the GPU(s). On macOS it defaults to 1 to
54
+ * enable metal support, 0 to disable.
55
+ */
23
56
  num_gpu?: number;
57
+ /**
58
+ * Sets the number of threads to use during computation. By default, Ollama will
59
+ * detect this for optimal performance. It is recommended to set this value to the
60
+ * number of physical CPU cores your system has (as opposed to the logical number of cores).
61
+ */
24
62
  num_threads?: number;
63
+ /**
64
+ * Sets how far back for the model to look back to prevent repetition.
65
+ * (Default: 64, 0 = disabled, -1 = num_ctx)
66
+ */
25
67
  repeat_last_n?: number;
68
+ /**
69
+ * Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
70
+ * will penalize repetitions more strongly, while a lower value (e.g., 0.9)
71
+ * will be more lenient. (Default: 1.1)
72
+ */
26
73
  repeat_penalty?: number;
74
+ /**
75
+ * Sets the random number seed to use for generation. Setting this to a
76
+ * specific number will make the model generate the same text for the same prompt.
77
+ * (Default: 0)
78
+ */
27
79
  seed?: number;
80
+ /**
81
+ * Tail free sampling is used to reduce the impact of less probable tokens
82
+ * from the output. A higher value (e.g., 2.0) will reduce the impact more,
83
+ * while a value of 1.0 disables this setting. (default: 1)
84
+ */
28
85
  tfs_z?: number;
86
+ /**
87
+ * Reduces the probability of generating nonsense. A higher value (e.g. 100)
88
+ * will give more diverse answers, while a lower value (e.g. 10) will be more
89
+ * conservative. (Default: 40)
90
+ */
29
91
  top_k?: number;
92
+ /**
93
+ * Works together with top-k. A higher value (e.g., 0.95) will lead to more
94
+ * diverse text, while a lower value (e.g., 0.5) will generate more focused
95
+ * and conservative text. (Default: 0.9)
96
+ */
30
97
  top_p?: number;
98
+ /**
99
+ * When set to true, no formatting will be applied to the prompt and no context
100
+ * will be returned.
101
+ */
102
+ raw?: boolean;
103
+ /**
104
+ * The format to return a response in. Currently the only accepted value is 'json'.
105
+ * Leave undefined to return a string.
106
+ */
107
+ format?: "json";
31
108
  system?: string;
32
109
  template?: string;
33
110
  context?: number[];
@@ -69,6 +69,8 @@ export class OllamaTextGenerationModel extends AbstractModel {
69
69
  "system",
70
70
  "template",
71
71
  "context",
72
+ "format",
73
+ "raw",
72
74
  ];
73
75
  return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
74
76
  }
@@ -136,7 +138,7 @@ const ollamaTextStreamingResponseSchema = z.discriminatedUnion("done", [
136
138
  context: z.array(z.number()),
137
139
  }),
138
140
  ]);
139
- async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration(), abortSignal, responseFormat, prompt, model, contextWindowSize, maxCompletionTokens, mirostat, mirostat_eta, mirostat_tau, num_gpu, num_gqa, num_threads, repeat_last_n, repeat_penalty, seed, stopSequences, temperature, tfs_z, top_k, top_p, system, template, context, }) {
141
+ async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration(), abortSignal, responseFormat, prompt, model, format, contextWindowSize, maxCompletionTokens, mirostat, mirostat_eta, mirostat_tau, num_gpu, num_gqa, num_threads, repeat_last_n, repeat_penalty, seed, stopSequences, temperature, tfs_z, top_k, top_p, system, template, context, raw, }) {
140
142
  return postJsonToApi({
141
143
  url: api.assembleUrl(`/api/generate`),
142
144
  headers: api.headers,
@@ -144,6 +146,7 @@ async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration(),
144
146
  stream: responseFormat.stream,
145
147
  model,
146
148
  prompt,
149
+ format,
147
150
  options: {
148
151
  mirostat,
149
152
  mirostat_eta,
@@ -165,6 +168,7 @@ async function callOllamaTextGenerationAPI({ api = new OllamaApiConfiguration(),
165
168
  system,
166
169
  template,
167
170
  context,
171
+ raw,
168
172
  },
169
173
  failedResponseHandler: failedOllamaCallResponseHandler,
170
174
  successfulResponseHandler: responseFormat.handler,
@@ -212,6 +212,7 @@ class OpenAICompletionModel extends AbstractModel_js_1.AbstractModel {
212
212
  "frequencyPenalty",
213
213
  "bestOf",
214
214
  "logitBias",
215
+ "seed",
215
216
  ];
216
217
  return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
217
218
  }
@@ -266,22 +267,26 @@ class OpenAICompletionModel extends AbstractModel_js_1.AbstractModel {
266
267
  exports.OpenAICompletionModel = OpenAICompletionModel;
267
268
  const OpenAICompletionResponseSchema = zod_1.z.object({
268
269
  id: zod_1.z.string(),
269
- object: zod_1.z.literal("text_completion"),
270
- created: zod_1.z.number(),
271
- model: zod_1.z.string(),
272
270
  choices: zod_1.z.array(zod_1.z.object({
273
- text: zod_1.z.string(),
271
+ finish_reason: zod_1.z
272
+ .enum(["stop", "length", "content_filter"])
273
+ .optional()
274
+ .nullable(),
274
275
  index: zod_1.z.number(),
275
276
  logprobs: zod_1.z.nullable(zod_1.z.any()),
276
- finish_reason: zod_1.z.string(),
277
+ text: zod_1.z.string(),
277
278
  })),
279
+ created: zod_1.z.number(),
280
+ model: zod_1.z.string(),
281
+ system_fingerprint: zod_1.z.string().optional(),
282
+ object: zod_1.z.literal("text_completion"),
278
283
  usage: zod_1.z.object({
279
284
  prompt_tokens: zod_1.z.number(),
280
285
  completion_tokens: zod_1.z.number(),
281
286
  total_tokens: zod_1.z.number(),
282
287
  }),
283
288
  });
284
- async function callOpenAICompletionAPI({ api = new OpenAIApiConfiguration_js_1.OpenAIApiConfiguration(), abortSignal, responseFormat, model, prompt, suffix, maxTokens, temperature, topP, n, logprobs, echo, stop, presencePenalty, frequencyPenalty, bestOf, logitBias, user, }) {
289
+ async function callOpenAICompletionAPI({ api = new OpenAIApiConfiguration_js_1.OpenAIApiConfiguration(), abortSignal, responseFormat, model, prompt, suffix, maxTokens, temperature, topP, n, logprobs, echo, stop, presencePenalty, frequencyPenalty, bestOf, logitBias, seed, user, }) {
285
290
  // empty arrays are not allowed for stop:
286
291
  if (stop != null && Array.isArray(stop) && stop.length === 0) {
287
292
  stop = undefined;
@@ -301,6 +306,7 @@ async function callOpenAICompletionAPI({ api = new OpenAIApiConfiguration_js_1.O
301
306
  logprobs,
302
307
  echo,
303
308
  stop,
309
+ seed,
304
310
  presence_penalty: presencePenalty,
305
311
  frequency_penalty: frequencyPenalty,
306
312
  best_of: bestOf,
@@ -332,13 +338,17 @@ exports.OpenAITextResponseFormat = {
332
338
  const textResponseStreamEventSchema = zod_1.z.object({
333
339
  choices: zod_1.z.array(zod_1.z.object({
334
340
  text: zod_1.z.string(),
335
- finish_reason: zod_1.z.enum(["stop", "length"]).nullable(),
341
+ finish_reason: zod_1.z
342
+ .enum(["stop", "length", "content_filter"])
343
+ .optional()
344
+ .nullable(),
336
345
  index: zod_1.z.number(),
337
346
  })),
338
347
  created: zod_1.z.number(),
339
348
  id: zod_1.z.string(),
340
349
  model: zod_1.z.string(),
341
- object: zod_1.z.string(),
350
+ system_fingerprint: zod_1.z.string().optional(),
351
+ object: zod_1.z.literal("text_completion"),
342
352
  });
343
353
  async function createOpenAITextFullDeltaIterableQueue(stream) {
344
354
  const queue = new AsyncQueue_js_1.AsyncQueue();
@@ -112,6 +112,7 @@ export interface OpenAICompletionCallSettings {
112
112
  frequencyPenalty?: number;
113
113
  bestOf?: number;
114
114
  logitBias?: Record<number, number>;
115
+ seed?: number | null;
115
116
  }
116
117
  export interface OpenAICompletionModelSettings extends TextGenerationModelSettings, Omit<OpenAICompletionCallSettings, "stop" | "maxTokens"> {
117
118
  isUserIdForwardingEnabled?: boolean;
@@ -157,11 +158,12 @@ export declare class OpenAICompletionModel extends AbstractModel<OpenAICompletio
157
158
  id: string;
158
159
  choices: {
159
160
  text: string;
160
- finish_reason: string;
161
161
  index: number;
162
+ finish_reason?: "length" | "stop" | "content_filter" | null | undefined;
162
163
  logprobs?: any;
163
164
  }[];
164
165
  created: number;
166
+ system_fingerprint?: string | undefined;
165
167
  };
166
168
  text: string;
167
169
  usage: {
@@ -187,25 +189,26 @@ export declare class OpenAICompletionModel extends AbstractModel<OpenAICompletio
187
189
  }
188
190
  declare const OpenAICompletionResponseSchema: z.ZodObject<{
189
191
  id: z.ZodString;
190
- object: z.ZodLiteral<"text_completion">;
191
- created: z.ZodNumber;
192
- model: z.ZodString;
193
192
  choices: z.ZodArray<z.ZodObject<{
194
- text: z.ZodString;
193
+ finish_reason: z.ZodNullable<z.ZodOptional<z.ZodEnum<["stop", "length", "content_filter"]>>>;
195
194
  index: z.ZodNumber;
196
195
  logprobs: z.ZodNullable<z.ZodAny>;
197
- finish_reason: z.ZodString;
196
+ text: z.ZodString;
198
197
  }, "strip", z.ZodTypeAny, {
199
198
  text: string;
200
- finish_reason: string;
201
199
  index: number;
200
+ finish_reason?: "length" | "stop" | "content_filter" | null | undefined;
202
201
  logprobs?: any;
203
202
  }, {
204
203
  text: string;
205
- finish_reason: string;
206
204
  index: number;
205
+ finish_reason?: "length" | "stop" | "content_filter" | null | undefined;
207
206
  logprobs?: any;
208
207
  }>, "many">;
208
+ created: z.ZodNumber;
209
+ model: z.ZodString;
210
+ system_fingerprint: z.ZodOptional<z.ZodString>;
211
+ object: z.ZodLiteral<"text_completion">;
209
212
  usage: z.ZodObject<{
210
213
  prompt_tokens: z.ZodNumber;
211
214
  completion_tokens: z.ZodNumber;
@@ -230,11 +233,12 @@ declare const OpenAICompletionResponseSchema: z.ZodObject<{
230
233
  id: string;
231
234
  choices: {
232
235
  text: string;
233
- finish_reason: string;
234
236
  index: number;
237
+ finish_reason?: "length" | "stop" | "content_filter" | null | undefined;
235
238
  logprobs?: any;
236
239
  }[];
237
240
  created: number;
241
+ system_fingerprint?: string | undefined;
238
242
  }, {
239
243
  object: "text_completion";
240
244
  usage: {
@@ -246,11 +250,12 @@ declare const OpenAICompletionResponseSchema: z.ZodObject<{
246
250
  id: string;
247
251
  choices: {
248
252
  text: string;
249
- finish_reason: string;
250
253
  index: number;
254
+ finish_reason?: "length" | "stop" | "content_filter" | null | undefined;
251
255
  logprobs?: any;
252
256
  }[];
253
257
  created: number;
258
+ system_fingerprint?: string | undefined;
254
259
  }>;
255
260
  export type OpenAICompletionResponse = z.infer<typeof OpenAICompletionResponseSchema>;
256
261
  export type OpenAITextResponseFormatType<T> = {
@@ -274,11 +279,12 @@ export declare const OpenAITextResponseFormat: {
274
279
  id: string;
275
280
  choices: {
276
281
  text: string;
277
- finish_reason: string;
278
282
  index: number;
283
+ finish_reason?: "length" | "stop" | "content_filter" | null | undefined;
279
284
  logprobs?: any;
280
285
  }[];
281
286
  created: number;
287
+ system_fingerprint?: string | undefined;
282
288
  }>;
283
289
  };
284
290
  /**
@@ -206,6 +206,7 @@ export class OpenAICompletionModel extends AbstractModel {
206
206
  "frequencyPenalty",
207
207
  "bestOf",
208
208
  "logitBias",
209
+ "seed",
209
210
  ];
210
211
  return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
211
212
  }
@@ -259,22 +260,26 @@ export class OpenAICompletionModel extends AbstractModel {
259
260
  }
260
261
  const OpenAICompletionResponseSchema = z.object({
261
262
  id: z.string(),
262
- object: z.literal("text_completion"),
263
- created: z.number(),
264
- model: z.string(),
265
263
  choices: z.array(z.object({
266
- text: z.string(),
264
+ finish_reason: z
265
+ .enum(["stop", "length", "content_filter"])
266
+ .optional()
267
+ .nullable(),
267
268
  index: z.number(),
268
269
  logprobs: z.nullable(z.any()),
269
- finish_reason: z.string(),
270
+ text: z.string(),
270
271
  })),
272
+ created: z.number(),
273
+ model: z.string(),
274
+ system_fingerprint: z.string().optional(),
275
+ object: z.literal("text_completion"),
271
276
  usage: z.object({
272
277
  prompt_tokens: z.number(),
273
278
  completion_tokens: z.number(),
274
279
  total_tokens: z.number(),
275
280
  }),
276
281
  });
277
- async function callOpenAICompletionAPI({ api = new OpenAIApiConfiguration(), abortSignal, responseFormat, model, prompt, suffix, maxTokens, temperature, topP, n, logprobs, echo, stop, presencePenalty, frequencyPenalty, bestOf, logitBias, user, }) {
282
+ async function callOpenAICompletionAPI({ api = new OpenAIApiConfiguration(), abortSignal, responseFormat, model, prompt, suffix, maxTokens, temperature, topP, n, logprobs, echo, stop, presencePenalty, frequencyPenalty, bestOf, logitBias, seed, user, }) {
278
283
  // empty arrays are not allowed for stop:
279
284
  if (stop != null && Array.isArray(stop) && stop.length === 0) {
280
285
  stop = undefined;
@@ -294,6 +299,7 @@ async function callOpenAICompletionAPI({ api = new OpenAIApiConfiguration(), abo
294
299
  logprobs,
295
300
  echo,
296
301
  stop,
302
+ seed,
297
303
  presence_penalty: presencePenalty,
298
304
  frequency_penalty: frequencyPenalty,
299
305
  best_of: bestOf,
@@ -325,13 +331,17 @@ export const OpenAITextResponseFormat = {
325
331
  const textResponseStreamEventSchema = z.object({
326
332
  choices: z.array(z.object({
327
333
  text: z.string(),
328
- finish_reason: z.enum(["stop", "length"]).nullable(),
334
+ finish_reason: z
335
+ .enum(["stop", "length", "content_filter"])
336
+ .optional()
337
+ .nullable(),
329
338
  index: z.number(),
330
339
  })),
331
340
  created: z.number(),
332
341
  id: z.string(),
333
342
  model: z.string(),
334
- object: z.string(),
343
+ system_fingerprint: z.string().optional(),
344
+ object: z.literal("text_completion"),
335
345
  });
336
346
  async function createOpenAITextFullDeltaIterableQueue(stream) {
337
347
  const queue = new AsyncQueue();
@@ -39,6 +39,8 @@ exports.OPENAI_CHAT_MODELS = {
39
39
  contextWindowSize: 8192,
40
40
  promptTokenCostInMillicents: 3,
41
41
  completionTokenCostInMillicents: 6,
42
+ fineTunedPromptTokenCostInMillicents: null,
43
+ fineTunedCompletionTokenCostInMillicents: null,
42
44
  },
43
45
  "gpt-4-1106-preview": {
44
46
  contextWindowSize: 128000,
@@ -115,7 +117,7 @@ function getOpenAIChatModelInformation(model) {
115
117
  // Extract the base model from the fine-tuned model:
116
118
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
117
119
  const [_, baseModel, ___, ____, _____] = model.split(":");
118
- if (["gpt-3.5-turbo", "gpt-3.5-turbo-0613"].includes(baseModel)) {
120
+ if (["gpt-3.5-turbo", "gpt-3.5-turbo-0613", "gpt-4-0613"].includes(baseModel)) {
119
121
  const baseModelInformation = exports.OPENAI_CHAT_MODELS[baseModel];
120
122
  return {
121
123
  baseModel: baseModel,
@@ -133,11 +135,14 @@ const isOpenAIChatModel = (model) => model in exports.OPENAI_CHAT_MODELS ||
133
135
  model.startsWith("ft:gpt-3.5-turbo:");
134
136
  exports.isOpenAIChatModel = isOpenAIChatModel;
135
137
  const calculateOpenAIChatCostInMillicents = ({ model, response, }) => {
136
- const modelInformation = getOpenAIChatModelInformation(model);
137
- return (response.usage.prompt_tokens *
138
- modelInformation.promptTokenCostInMillicents +
139
- response.usage.completion_tokens *
140
- modelInformation.completionTokenCostInMillicents);
138
+ const { promptTokenCostInMillicents, completionTokenCostInMillicents } = getOpenAIChatModelInformation(model);
139
+ // null: when cost is unknown, e.g. for fine-tuned models where the price is not yet known
140
+ if (promptTokenCostInMillicents == null ||
141
+ completionTokenCostInMillicents == null) {
142
+ return null;
143
+ }
144
+ return (response.usage.prompt_tokens * promptTokenCostInMillicents +
145
+ response.usage.completion_tokens * completionTokenCostInMillicents);
141
146
  };
142
147
  exports.calculateOpenAIChatCostInMillicents = calculateOpenAIChatCostInMillicents;
143
148
  /**
@@ -234,6 +239,8 @@ class OpenAIChatModel extends AbstractModel_js_1.AbstractModel {
234
239
  "presencePenalty",
235
240
  "frequencyPenalty",
236
241
  "logitBias",
242
+ "seed",
243
+ "responseFormat",
237
244
  ];
238
245
  return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
239
246
  }
@@ -401,13 +408,16 @@ const openAIChatResponseSchema = zod_1.z.object({
401
408
  }),
402
409
  index: zod_1.z.number(),
403
410
  logprobs: zod_1.z.nullable(zod_1.z.any()),
404
- finish_reason: zod_1.z.enum([
411
+ finish_reason: zod_1.z
412
+ .enum([
405
413
  "stop",
406
414
  "length",
407
415
  "tool_calls",
408
416
  "content_filter",
409
417
  "function_call",
410
- ]),
418
+ ])
419
+ .optional()
420
+ .nullable(),
411
421
  })),
412
422
  created: zod_1.z.number(),
413
423
  model: zod_1.z.string(),
@@ -27,6 +27,8 @@ export declare const OPENAI_CHAT_MODELS: {
27
27
  contextWindowSize: number;
28
28
  promptTokenCostInMillicents: number;
29
29
  completionTokenCostInMillicents: number;
30
+ fineTunedPromptTokenCostInMillicents: null;
31
+ fineTunedCompletionTokenCostInMillicents: null;
30
32
  };
31
33
  "gpt-4-1106-preview": {
32
34
  contextWindowSize: number;
@@ -92,10 +94,10 @@ export declare function getOpenAIChatModelInformation(model: OpenAIChatModelType
92
94
  baseModel: OpenAIChatBaseModelType;
93
95
  isFineTuned: boolean;
94
96
  contextWindowSize: number;
95
- promptTokenCostInMillicents: number;
96
- completionTokenCostInMillicents: number;
97
+ promptTokenCostInMillicents: number | null;
98
+ completionTokenCostInMillicents: number | null;
97
99
  };
98
- type FineTuneableOpenAIChatModelType = `gpt-3.5-turbo` | `gpt-3.5-turbo-0613`;
100
+ type FineTuneableOpenAIChatModelType = `gpt-3.5-turbo` | `gpt-3.5-turbo-0613` | `gpt-4-0613`;
99
101
  type FineTunedOpenAIChatModelType = `ft:${FineTuneableOpenAIChatModelType}:${string}:${string}:${string}`;
100
102
  export type OpenAIChatBaseModelType = keyof typeof OPENAI_CHAT_MODELS;
101
103
  export type OpenAIChatModelType = OpenAIChatBaseModelType | FineTunedOpenAIChatModelType;
@@ -103,7 +105,7 @@ export declare const isOpenAIChatModel: (model: string) => model is OpenAIChatMo
103
105
  export declare const calculateOpenAIChatCostInMillicents: ({ model, response, }: {
104
106
  model: OpenAIChatModelType;
105
107
  response: OpenAIChatResponse;
106
- }) => number;
108
+ }) => number | null;
107
109
  export interface OpenAIChatCallSettings {
108
110
  api?: ApiConfiguration;
109
111
  model: OpenAIChatModelType;
@@ -193,9 +195,9 @@ export declare class OpenAIChatModel extends AbstractModel<OpenAIChatSettings> i
193
195
  arguments: string;
194
196
  } | undefined;
195
197
  };
196
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
197
198
  index: number;
198
199
  logprobs?: any;
200
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
199
201
  }[];
200
202
  created: number;
201
203
  system_fingerprint?: string | undefined;
@@ -234,9 +236,9 @@ export declare class OpenAIChatModel extends AbstractModel<OpenAIChatSettings> i
234
236
  arguments: string;
235
237
  } | undefined;
236
238
  };
237
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
238
239
  index: number;
239
240
  logprobs?: any;
241
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
240
242
  }[];
241
243
  created: number;
242
244
  system_fingerprint?: string | undefined;
@@ -269,9 +271,9 @@ export declare class OpenAIChatModel extends AbstractModel<OpenAIChatSettings> i
269
271
  arguments: string;
270
272
  } | undefined;
271
273
  };
272
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
273
274
  index: number;
274
275
  logprobs?: any;
276
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
275
277
  }[];
276
278
  created: number;
277
279
  system_fingerprint?: string | undefined;
@@ -306,9 +308,9 @@ export declare class OpenAIChatModel extends AbstractModel<OpenAIChatSettings> i
306
308
  arguments: string;
307
309
  } | undefined;
308
310
  };
309
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
310
311
  index: number;
311
312
  logprobs?: any;
313
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
312
314
  }[];
313
315
  created: number;
314
316
  system_fingerprint?: string | undefined;
@@ -374,7 +376,7 @@ declare const openAIChatResponseSchema: z.ZodObject<{
374
376
  }>;
375
377
  index: z.ZodNumber;
376
378
  logprobs: z.ZodNullable<z.ZodAny>;
377
- finish_reason: z.ZodEnum<["stop", "length", "tool_calls", "content_filter", "function_call"]>;
379
+ finish_reason: z.ZodNullable<z.ZodOptional<z.ZodEnum<["stop", "length", "tool_calls", "content_filter", "function_call"]>>>;
378
380
  }, "strip", z.ZodTypeAny, {
379
381
  message: {
380
382
  content: string | null;
@@ -384,9 +386,9 @@ declare const openAIChatResponseSchema: z.ZodObject<{
384
386
  arguments: string;
385
387
  } | undefined;
386
388
  };
387
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
388
389
  index: number;
389
390
  logprobs?: any;
391
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
390
392
  }, {
391
393
  message: {
392
394
  content: string | null;
@@ -396,9 +398,9 @@ declare const openAIChatResponseSchema: z.ZodObject<{
396
398
  arguments: string;
397
399
  } | undefined;
398
400
  };
399
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
400
401
  index: number;
401
402
  logprobs?: any;
403
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
402
404
  }>, "many">;
403
405
  created: z.ZodNumber;
404
406
  model: z.ZodString;
@@ -435,9 +437,9 @@ declare const openAIChatResponseSchema: z.ZodObject<{
435
437
  arguments: string;
436
438
  } | undefined;
437
439
  };
438
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
439
440
  index: number;
440
441
  logprobs?: any;
442
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
441
443
  }[];
442
444
  created: number;
443
445
  system_fingerprint?: string | undefined;
@@ -459,9 +461,9 @@ declare const openAIChatResponseSchema: z.ZodObject<{
459
461
  arguments: string;
460
462
  } | undefined;
461
463
  };
462
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
463
464
  index: number;
464
465
  logprobs?: any;
466
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
465
467
  }[];
466
468
  created: number;
467
469
  system_fingerprint?: string | undefined;
@@ -495,9 +497,9 @@ export declare const OpenAIChatResponseFormat: {
495
497
  arguments: string;
496
498
  } | undefined;
497
499
  };
498
- finish_reason: "length" | "stop" | "function_call" | "tool_calls" | "content_filter";
499
500
  index: number;
500
501
  logprobs?: any;
502
+ finish_reason?: "length" | "stop" | "function_call" | "tool_calls" | "content_filter" | null | undefined;
501
503
  }[];
502
504
  created: number;
503
505
  system_fingerprint?: string | undefined;
@@ -33,6 +33,8 @@ export const OPENAI_CHAT_MODELS = {
33
33
  contextWindowSize: 8192,
34
34
  promptTokenCostInMillicents: 3,
35
35
  completionTokenCostInMillicents: 6,
36
+ fineTunedPromptTokenCostInMillicents: null,
37
+ fineTunedCompletionTokenCostInMillicents: null,
36
38
  },
37
39
  "gpt-4-1106-preview": {
38
40
  contextWindowSize: 128000,
@@ -109,7 +111,7 @@ export function getOpenAIChatModelInformation(model) {
109
111
  // Extract the base model from the fine-tuned model:
110
112
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
111
113
  const [_, baseModel, ___, ____, _____] = model.split(":");
112
- if (["gpt-3.5-turbo", "gpt-3.5-turbo-0613"].includes(baseModel)) {
114
+ if (["gpt-3.5-turbo", "gpt-3.5-turbo-0613", "gpt-4-0613"].includes(baseModel)) {
113
115
  const baseModelInformation = OPENAI_CHAT_MODELS[baseModel];
114
116
  return {
115
117
  baseModel: baseModel,
@@ -125,11 +127,14 @@ export const isOpenAIChatModel = (model) => model in OPENAI_CHAT_MODELS ||
125
127
  model.startsWith("ft:gpt-3.5-turbo-0613:") ||
126
128
  model.startsWith("ft:gpt-3.5-turbo:");
127
129
  export const calculateOpenAIChatCostInMillicents = ({ model, response, }) => {
128
- const modelInformation = getOpenAIChatModelInformation(model);
129
- return (response.usage.prompt_tokens *
130
- modelInformation.promptTokenCostInMillicents +
131
- response.usage.completion_tokens *
132
- modelInformation.completionTokenCostInMillicents);
130
+ const { promptTokenCostInMillicents, completionTokenCostInMillicents } = getOpenAIChatModelInformation(model);
131
+ // null: when cost is unknown, e.g. for fine-tuned models where the price is not yet known
132
+ if (promptTokenCostInMillicents == null ||
133
+ completionTokenCostInMillicents == null) {
134
+ return null;
135
+ }
136
+ return (response.usage.prompt_tokens * promptTokenCostInMillicents +
137
+ response.usage.completion_tokens * completionTokenCostInMillicents);
133
138
  };
134
139
  /**
135
140
  * Create a text generation model that calls the OpenAI chat completion API.
@@ -225,6 +230,8 @@ export class OpenAIChatModel extends AbstractModel {
225
230
  "presencePenalty",
226
231
  "frequencyPenalty",
227
232
  "logitBias",
233
+ "seed",
234
+ "responseFormat",
228
235
  ];
229
236
  return Object.fromEntries(Object.entries(this.settings).filter(([key]) => eventSettingProperties.includes(key)));
230
237
  }
@@ -391,13 +398,16 @@ const openAIChatResponseSchema = z.object({
391
398
  }),
392
399
  index: z.number(),
393
400
  logprobs: z.nullable(z.any()),
394
- finish_reason: z.enum([
401
+ finish_reason: z
402
+ .enum([
395
403
  "stop",
396
404
  "length",
397
405
  "tool_calls",
398
406
  "content_filter",
399
407
  "function_call",
400
- ]),
408
+ ])
409
+ .optional()
410
+ .nullable(),
401
411
  })),
402
412
  created: z.number(),
403
413
  model: z.string(),
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "modelfusion",
3
3
  "description": "Build multimodal applications, chatbots, and agents with JavaScript and TypeScript.",
4
- "version": "0.65.1",
4
+ "version": "0.67.0",
5
5
  "author": "Lars Grammel",
6
6
  "license": "MIT",
7
7
  "keywords": [
@@ -8,9 +8,9 @@ function parseJsonStream({ schema, stream, process, onDone, }) {
8
8
  }
9
9
  return (async () => {
10
10
  try {
11
- let unprocessedText = "";
12
11
  const reader = new ReadableStreamDefaultReader(stream);
13
12
  const utf8Decoder = new TextDecoder("utf-8");
13
+ let unprocessedText = "";
14
14
  // eslint-disable-next-line no-constant-condition
15
15
  while (true) {
16
16
  const { value: chunk, done } = await reader.read();
@@ -18,8 +18,8 @@ function parseJsonStream({ schema, stream, process, onDone, }) {
18
18
  break;
19
19
  }
20
20
  unprocessedText += utf8Decoder.decode(chunk, { stream: true });
21
- const processableLines = unprocessedText.split(/\r\n|\n|\r/g);
22
- unprocessedText = processableLines.pop() || "";
21
+ const processableLines = unprocessedText.split("\n");
22
+ unprocessedText = processableLines.pop() ?? "";
23
23
  processableLines.forEach(processLine);
24
24
  }
25
25
  // processing remaining text:
@@ -5,9 +5,9 @@ export function parseJsonStream({ schema, stream, process, onDone, }) {
5
5
  }
6
6
  return (async () => {
7
7
  try {
8
- let unprocessedText = "";
9
8
  const reader = new ReadableStreamDefaultReader(stream);
10
9
  const utf8Decoder = new TextDecoder("utf-8");
10
+ let unprocessedText = "";
11
11
  // eslint-disable-next-line no-constant-condition
12
12
  while (true) {
13
13
  const { value: chunk, done } = await reader.read();
@@ -15,8 +15,8 @@ export function parseJsonStream({ schema, stream, process, onDone, }) {
15
15
  break;
16
16
  }
17
17
  unprocessedText += utf8Decoder.decode(chunk, { stream: true });
18
- const processableLines = unprocessedText.split(/\r\n|\n|\r/g);
19
- unprocessedText = processableLines.pop() || "";
18
+ const processableLines = unprocessedText.split("\n");
19
+ unprocessedText = processableLines.pop() ?? "";
20
20
  processableLines.forEach(processLine);
21
21
  }
22
22
  // processing remaining text: