@aj-archipelago/cortex 1.3.22 → 1.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -380,7 +380,7 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
380
380
  cortexRequest.params = {}; // query params
381
381
  cortexRequest.stream = stream;
382
382
  cortexRequest.urlSuffix = cortexRequest.stream
383
- ? ":streamRawPredict"
383
+ ? ":streamRawPredict?alt=sse"
384
384
  : ":rawPredict";
385
385
 
386
386
  const gcpAuthTokenHelper = this.config.get("gcpAuthTokenHelper");
@@ -392,33 +392,59 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
392
392
 
393
393
  processStreamEvent(event, requestProgress) {
394
394
  const eventData = JSON.parse(event.data);
395
+ const baseOpenAIResponse = {
396
+ id: eventData.message?.id || `chatcmpl-${Date.now()}`,
397
+ object: "chat.completion.chunk",
398
+ created: Math.floor(Date.now() / 1000),
399
+ model: this.modelName,
400
+ choices: [{
401
+ index: 0,
402
+ delta: {},
403
+ finish_reason: null
404
+ }]
405
+ };
406
+
395
407
  switch (eventData.type) {
396
408
  case "message_start":
397
- requestProgress.data = JSON.stringify(eventData.message);
398
- break;
399
- case "content_block_start":
400
- break;
401
- case "ping":
409
+ // Initial message with role
410
+ baseOpenAIResponse.choices[0].delta = {
411
+ role: "assistant",
412
+ content: ""
413
+ };
414
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
402
415
  break;
416
+
403
417
  case "content_block_delta":
404
418
  if (eventData.delta.type === "text_delta") {
405
- requestProgress.data = JSON.stringify(eventData.delta.text);
419
+ baseOpenAIResponse.choices[0].delta = {
420
+ content: eventData.delta.text
421
+ };
422
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
406
423
  }
407
424
  break;
408
- case "content_block_stop":
409
- break;
410
- case "message_delta":
411
- break;
425
+
412
426
  case "message_stop":
413
- requestProgress.data = "[DONE]";
427
+ baseOpenAIResponse.choices[0].delta = {};
428
+ baseOpenAIResponse.choices[0].finish_reason = "stop";
429
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
414
430
  requestProgress.progress = 1;
415
431
  break;
432
+
416
433
  case "error":
417
- requestProgress.data = `\n\n*** ${
418
- eventData.error.message || eventData.error
419
- } ***`;
434
+ baseOpenAIResponse.choices[0].delta = {
435
+ content: `\n\n*** ${eventData.error.message || eventData.error} ***`
436
+ };
437
+ baseOpenAIResponse.choices[0].finish_reason = "error";
438
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
420
439
  requestProgress.progress = 1;
421
440
  break;
441
+
442
+ // Ignore other event types as they don't map to OpenAI format
443
+ case "content_block_start":
444
+ case "content_block_stop":
445
+ case "message_delta":
446
+ case "ping":
447
+ break;
422
448
  }
423
449
 
424
450
  return requestProgress;
@@ -56,7 +56,11 @@ class Gemini15ChatPlugin extends ModelPlugin {
56
56
  const { role, author, content } = message;
57
57
 
58
58
  if (role === 'system') {
59
- systemParts.push({ text: content });
59
+ if (Array.isArray(content)) {
60
+ content.forEach(item => systemParts.push({ text: item }));
61
+ } else {
62
+ systemParts.push({ text: content });
63
+ }
60
64
  return;
61
65
  }
62
66
 
@@ -169,6 +173,91 @@ class Gemini15ChatPlugin extends ModelPlugin {
169
173
  return this.executeRequest(cortexRequest);
170
174
  }
171
175
 
176
+ processStreamEvent(event, requestProgress) {
177
+ const eventData = JSON.parse(event.data);
178
+
179
+ // Initialize requestProgress if needed
180
+ requestProgress = requestProgress || {};
181
+ requestProgress.data = requestProgress.data || null;
182
+
183
+ // Create a helper function to generate message chunks
184
+ const createChunk = (delta) => ({
185
+ id: eventData.responseId || `chatcmpl-${Date.now()}`,
186
+ object: "chat.completion.chunk",
187
+ created: Math.floor(Date.now() / 1000),
188
+ model: this.modelName,
189
+ choices: [{
190
+ index: 0,
191
+ delta,
192
+ finish_reason: null
193
+ }]
194
+ });
195
+
196
+ // Handle content chunks - do this first before handling any finish conditions
197
+ if (eventData.candidates?.[0]?.content?.parts?.[0]?.text) {
198
+ if (!requestProgress.started) {
199
+ // First chunk - send role
200
+ requestProgress.data = JSON.stringify(createChunk({ role: "assistant" }));
201
+ requestProgress.started = true;
202
+
203
+ // Immediately follow up with the first content chunk
204
+ requestProgress.data = JSON.stringify(createChunk({
205
+ content: eventData.candidates[0].content.parts[0].text
206
+ }));
207
+ } else {
208
+ // Send content chunk
209
+ requestProgress.data = JSON.stringify(createChunk({
210
+ content: eventData.candidates[0].content.parts[0].text
211
+ }));
212
+ }
213
+
214
+ // If this message also has STOP, mark it for completion but don't overwrite the content
215
+ if (eventData.candidates[0].finishReason === "STOP") {
216
+ requestProgress.progress = 1;
217
+ }
218
+ } else if (eventData.candidates?.[0]?.finishReason === "STOP") {
219
+ // Only send DONE if there was no content in this message
220
+ requestProgress.data = '[DONE]';
221
+ requestProgress.progress = 1;
222
+ }
223
+
224
+ // Handle safety blocks
225
+ if (eventData.candidates?.[0]?.safetyRatings?.some(rating => rating.blocked)) {
226
+ requestProgress.data = JSON.stringify({
227
+ id: eventData.responseId || `chatcmpl-${Date.now()}`,
228
+ object: "chat.completion.chunk",
229
+ created: Math.floor(Date.now() / 1000),
230
+ model: this.modelName,
231
+ choices: [{
232
+ index: 0,
233
+ delta: { content: "\n\n*** Response blocked due to safety ratings ***" },
234
+ finish_reason: "content_filter"
235
+ }]
236
+ });
237
+ requestProgress.progress = 1;
238
+ return requestProgress;
239
+ }
240
+
241
+ // Handle prompt feedback blocks
242
+ if (eventData.promptFeedback?.blockReason) {
243
+ requestProgress.data = JSON.stringify({
244
+ id: eventData.responseId || `chatcmpl-${Date.now()}`,
245
+ object: "chat.completion.chunk",
246
+ created: Math.floor(Date.now() / 1000),
247
+ model: this.modelName,
248
+ choices: [{
249
+ index: 0,
250
+ delta: { content: `\n\n*** Response blocked: ${eventData.promptFeedback.blockReason} ***` },
251
+ finish_reason: "content_filter"
252
+ }]
253
+ });
254
+ requestProgress.progress = 1;
255
+ return requestProgress;
256
+ }
257
+
258
+ return requestProgress;
259
+ }
260
+
172
261
  // Override the logging function to display the messages and responses
173
262
  logRequestData(data, responseData, prompt) {
174
263
  const messages = data && data.contents;
@@ -24,19 +24,24 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
24
24
  const { role, author, content } = message;
25
25
 
26
26
  if (role === 'system') {
27
- systemParts.push({ text: content });
27
+ if (Array.isArray(content)) {
28
+ content.forEach(item => systemParts.push({ text: item }));
29
+ } else {
30
+ systemParts.push({ text: content });
31
+ }
28
32
  return;
29
33
  }
30
34
 
31
35
  // Convert content to Gemini format, trying to maintain compatibility
32
36
  const convertPartToGemini = (inputPart) => {
33
37
  try {
38
+ // First try to parse as JSON if it's a string
34
39
  const part = typeof inputPart === 'string' ? JSON.parse(inputPart) : inputPart;
35
40
  const {type, text, image_url, gcs} = part;
36
41
  let fileUrl = gcs || image_url?.url;
37
42
 
38
43
  if (typeof part === 'string') {
39
- return { text: text };
44
+ return { text: inputPart };
40
45
  } else if (type === 'text') {
41
46
  return { text: text };
42
47
  } else if (type === 'image_url') {
@@ -77,7 +82,8 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
77
82
  return null;
78
83
  }
79
84
  } catch (e) {
80
- // this space intentionally left blank
85
+ // If JSON parsing fails or any other error, treat as plain text
86
+ return inputPart ? { text: inputPart } : null;
81
87
  }
82
88
  return inputPart ? { text: inputPart } : null;
83
89
  };
@@ -381,14 +381,17 @@ class ModelPlugin {
381
381
 
382
382
  // finish reason can be in different places in the message
383
383
  const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
384
- if (finishReason?.toLowerCase() === 'stop') {
385
- requestProgress.progress = 1;
386
- } else {
387
- if (finishReason?.toLowerCase() === 'safety') {
388
- const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
389
- logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
390
- requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
391
- requestProgress.progress = 1;
384
+ if (finishReason) {
385
+ switch (finishReason.toLowerCase()) {
386
+ case 'safety':
387
+ const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
388
+ logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
389
+ requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
390
+ requestProgress.progress = 1;
391
+ break;
392
+ default:
393
+ requestProgress.progress = 1;
394
+ break;
392
395
  }
393
396
  }
394
397
  }
@@ -0,0 +1,158 @@
1
+ import ModelPlugin from './modelPlugin.js';
2
+ import logger from '../../lib/logger.js';
3
+ import { Transform } from 'stream';
4
+
5
+ class OllamaChatPlugin extends ModelPlugin {
6
+
7
+ getRequestParameters(text, parameters, prompt) {
8
+ const { modelPromptMessages } = this.getCompiledPrompt(text, parameters, prompt);
9
+ return {
10
+ data: {
11
+ model: parameters.ollamaModel,
12
+ messages: modelPromptMessages,
13
+ stream: parameters.stream
14
+ },
15
+ params: {}
16
+ };
17
+ }
18
+
19
+ logRequestData(data, responseData, prompt) {
20
+ const { stream, messages, model } = data;
21
+
22
+ if (messages && messages.length > 0) {
23
+ logger.info(`[ollama chat request sent to model ${model} containing ${messages.length} messages]`);
24
+ let totalLength = 0;
25
+ let totalUnits;
26
+ messages.forEach((message, index) => {
27
+ const content = message.content;
28
+ const { length, units } = this.getLength(content);
29
+ const preview = this.shortenContent(content);
30
+
31
+ logger.verbose(
32
+ `message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`
33
+ );
34
+ totalLength += length;
35
+ totalUnits = units;
36
+ });
37
+ logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
38
+ }
39
+
40
+ if (stream) {
41
+ logger.info(`[response received as an SSE stream]`);
42
+ } else if (responseData) {
43
+ const responseText = this.parseResponse(responseData);
44
+ const { length, units } = this.getLength(responseText);
45
+ logger.info(`[response received containing ${length} ${units}]`);
46
+ logger.verbose(`${this.shortenContent(responseText)}`);
47
+ }
48
+
49
+ prompt &&
50
+ prompt.debugInfo &&
51
+ (prompt.debugInfo += `\n${JSON.stringify(data)}`);
52
+ }
53
+
54
+ parseResponse(data) {
55
+ // If data is not a string (e.g. streaming), return as is
56
+ if (typeof data !== 'string') {
57
+ return data;
58
+ }
59
+
60
+ // Split into lines and filter empty ones
61
+ const lines = data.split('\n').filter(line => line.trim());
62
+
63
+ let fullResponse = '';
64
+
65
+ for (const line of lines) {
66
+ try {
67
+ const jsonObj = JSON.parse(line);
68
+
69
+ if (jsonObj.message && jsonObj.message.content) {
70
+ // Unescape special sequences
71
+ const content = jsonObj.message.content
72
+ .replace(/\\n/g, '\n')
73
+ .replace(/\\"/g, '"')
74
+ .replace(/\\\\/g, '\\')
75
+ .replace(/\\u003c/g, '<')
76
+ .replace(/\\u003e/g, '>');
77
+
78
+ fullResponse += content;
79
+ }
80
+ } catch (err) {
81
+ // If we can't parse the line as JSON, just skip it
82
+ continue;
83
+ }
84
+ }
85
+
86
+ return fullResponse;
87
+ }
88
+
89
+ processStreamEvent(event, requestProgress) {
90
+ try {
91
+ const data = JSON.parse(event.data);
92
+
93
+ // Handle the streaming response
94
+ if (data.message?.content) {
95
+ // Unescape special sequences in the content
96
+ const content = data.message.content
97
+ .replace(/\\n/g, '\n')
98
+ .replace(/\\"/g, '"')
99
+ .replace(/\\\\/g, '\\')
100
+ .replace(/\\u003c/g, '<')
101
+ .replace(/\\u003e/g, '>');
102
+
103
+ requestProgress.data = JSON.stringify(content);
104
+ }
105
+
106
+ // Check if this is the final message
107
+ if (data.done) {
108
+ requestProgress.data = '[DONE]';
109
+ requestProgress.progress = 1;
110
+ }
111
+
112
+ return requestProgress;
113
+ } catch (err) {
114
+ // If we can't parse the event data, return the progress as is
115
+ return requestProgress;
116
+ }
117
+ }
118
+
119
+ async execute(text, parameters, prompt, cortexRequest) {
120
+ const requestParameters = this.getRequestParameters(text, parameters, prompt);
121
+ cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
122
+ cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
123
+
124
+ // For Ollama streaming, transform NDJSON to SSE format
125
+ if (parameters.stream) {
126
+ const response = await this.executeRequest(cortexRequest);
127
+
128
+ // Create a transform stream that converts NDJSON to SSE format
129
+ const transformer = new Transform({
130
+ decodeStrings: false, // Keep as string
131
+ transform(chunk, encoding, callback) {
132
+ try {
133
+ const lines = chunk.toString().split('\n');
134
+ for (const line of lines) {
135
+ if (line.trim()) {
136
+ // Format as SSE data
137
+ this.push(`data: ${line}\n\n`);
138
+ }
139
+ }
140
+ callback();
141
+ } catch (err) {
142
+ callback(err);
143
+ }
144
+ }
145
+ });
146
+
147
+ // Pipe the response through our transformer
148
+ response.pipe(transformer);
149
+
150
+ // Return the transformed stream
151
+ return transformer;
152
+ }
153
+
154
+ return this.executeRequest(cortexRequest);
155
+ }
156
+ }
157
+
158
+ export default OllamaChatPlugin;
@@ -0,0 +1,147 @@
1
+ import ModelPlugin from './modelPlugin.js';
2
+ import logger from '../../lib/logger.js';
3
+ import { Transform } from 'stream';
4
+
5
+ class OllamaCompletionPlugin extends ModelPlugin {
6
+
7
+ getRequestParameters(text, parameters, prompt) {
8
+ return {
9
+ data: {
10
+ model: parameters.ollamaModel,
11
+ prompt: text,
12
+ stream: parameters.stream
13
+ },
14
+ params: {}
15
+ };
16
+ }
17
+
18
+ logRequestData(data, responseData, prompt) {
19
+ const { stream, prompt: promptText, model } = data;
20
+
21
+ if (promptText) {
22
+ logger.info(`[ollama completion request sent to model ${model}]`);
23
+ const { length, units } = this.getLength(promptText);
24
+ const preview = this.shortenContent(promptText);
25
+ logger.verbose(`prompt ${units}: ${length}, content: "${preview}"`);
26
+ logger.info(`[completion request contained ${length} ${units}]`);
27
+ }
28
+
29
+ if (stream) {
30
+ logger.info(`[response received as an SSE stream]`);
31
+ } else if (responseData) {
32
+ const responseText = this.parseResponse(responseData);
33
+ const { length, units } = this.getLength(responseText);
34
+ logger.info(`[response received containing ${length} ${units}]`);
35
+ logger.verbose(`${this.shortenContent(responseText)}`);
36
+ }
37
+
38
+ prompt &&
39
+ prompt.debugInfo &&
40
+ (prompt.debugInfo += `\n${JSON.stringify(data)}`);
41
+ }
42
+
43
+ parseResponse(data) {
44
+ // If data is not a string (e.g. streaming), return as is
45
+ if (typeof data !== 'string') {
46
+ return data;
47
+ }
48
+
49
+ // Split into lines and filter empty ones
50
+ const lines = data.split('\n').filter(line => line.trim());
51
+
52
+ let fullResponse = '';
53
+
54
+ for (const line of lines) {
55
+ try {
56
+ const jsonObj = JSON.parse(line);
57
+
58
+ if (jsonObj.response) {
59
+ // Unescape special sequences
60
+ const content = jsonObj.response
61
+ .replace(/\\n/g, '\n')
62
+ .replace(/\\"/g, '"')
63
+ .replace(/\\\\/g, '\\')
64
+ .replace(/\\u003c/g, '<')
65
+ .replace(/\\u003e/g, '>');
66
+
67
+ fullResponse += content;
68
+ }
69
+ } catch (err) {
70
+ // If we can't parse the line as JSON, just skip it
71
+ continue;
72
+ }
73
+ }
74
+
75
+ return fullResponse;
76
+ }
77
+
78
+ processStreamEvent(event, requestProgress) {
79
+ try {
80
+ const data = JSON.parse(event.data);
81
+
82
+ // Handle the streaming response
83
+ if (data.response) {
84
+ // Unescape special sequences in the content
85
+ const content = data.response
86
+ .replace(/\\n/g, '\n')
87
+ .replace(/\\"/g, '"')
88
+ .replace(/\\\\/g, '\\')
89
+ .replace(/\\u003c/g, '<')
90
+ .replace(/\\u003e/g, '>');
91
+
92
+ requestProgress.data = JSON.stringify(content);
93
+ }
94
+
95
+ // Check if this is the final message
96
+ if (data.done) {
97
+ requestProgress.data = '[DONE]';
98
+ requestProgress.progress = 1;
99
+ }
100
+
101
+ return requestProgress;
102
+ } catch (err) {
103
+ // If we can't parse the event data, return the progress as is
104
+ return requestProgress;
105
+ }
106
+ }
107
+
108
+ async execute(text, parameters, prompt, cortexRequest) {
109
+ const requestParameters = this.getRequestParameters(text, parameters, prompt);
110
+ cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
111
+ cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
112
+
113
+ // For Ollama streaming, transform NDJSON to SSE format
114
+ if (parameters.stream) {
115
+ const response = await this.executeRequest(cortexRequest);
116
+
117
+ // Create a transform stream that converts NDJSON to SSE format
118
+ const transformer = new Transform({
119
+ decodeStrings: false, // Keep as string
120
+ transform(chunk, encoding, callback) {
121
+ try {
122
+ const lines = chunk.toString().split('\n');
123
+ for (const line of lines) {
124
+ if (line.trim()) {
125
+ // Format as SSE data
126
+ this.push(`data: ${line}\n\n`);
127
+ }
128
+ }
129
+ callback();
130
+ } catch (err) {
131
+ callback(err);
132
+ }
133
+ }
134
+ });
135
+
136
+ // Pipe the response through our transformer
137
+ response.pipe(transformer);
138
+
139
+ // Return the transformed stream
140
+ return transformer;
141
+ }
142
+
143
+ return this.executeRequest(cortexRequest);
144
+ }
145
+ }
146
+
147
+ export default OllamaCompletionPlugin;
package/server/rest.js CHANGED
@@ -6,6 +6,22 @@ import { requestState } from './requestState.js';
6
6
  import { v4 as uuidv4 } from 'uuid';
7
7
  import logger from '../lib/logger.js';
8
8
  import { getSingleTokenChunks } from './chunker.js';
9
+ import axios from 'axios';
10
+
11
+ const getOllamaModels = async (ollamaUrl) => {
12
+ try {
13
+ const response = await axios.get(`${ollamaUrl}/api/tags`);
14
+ return response.data.models.map(model => ({
15
+ id: `ollama-${model.name}`,
16
+ object: 'model',
17
+ owned_by: 'ollama',
18
+ permission: ''
19
+ }));
20
+ } catch (error) {
21
+ logger.error(`Error fetching Ollama models: ${error.message}`);
22
+ return [];
23
+ }
24
+ };
9
25
 
10
26
  const chunkTextIntoTokens = (() => {
11
27
  let partialToken = '';
@@ -282,7 +298,14 @@ function buildRestEndpoints(pathways, app, server, config) {
282
298
  // Create OpenAI compatible endpoints
283
299
  app.post('/v1/completions', async (req, res) => {
284
300
  const modelName = req.body.model || 'gpt-3.5-turbo';
285
- const pathwayName = openAICompletionModels[modelName] || openAICompletionModels['*'];
301
+ let pathwayName;
302
+
303
+ if (modelName.startsWith('ollama-')) {
304
+ pathwayName = 'sys_ollama_completion';
305
+ req.body.ollamaModel = modelName.replace('ollama-', '');
306
+ } else {
307
+ pathwayName = openAICompletionModels[modelName] || openAICompletionModels['*'];
308
+ }
286
309
 
287
310
  if (!pathwayName) {
288
311
  res.status(404).json({
@@ -318,7 +341,6 @@ function buildRestEndpoints(pathways, app, server, config) {
318
341
  if (Boolean(req.body.stream)) {
319
342
  jsonResponse.id = `cmpl-${resultText}`;
320
343
  jsonResponse.choices[0].finish_reason = null;
321
- //jsonResponse.object = "text_completion.chunk";
322
344
 
323
345
  processIncomingStream(resultText, res, jsonResponse, pathway);
324
346
  } else {
@@ -330,7 +352,14 @@ function buildRestEndpoints(pathways, app, server, config) {
330
352
 
331
353
  app.post('/v1/chat/completions', async (req, res) => {
332
354
  const modelName = req.body.model || 'gpt-3.5-turbo';
333
- const pathwayName = openAIChatModels[modelName] || openAIChatModels['*'];
355
+ let pathwayName;
356
+
357
+ if (modelName.startsWith('ollama-')) {
358
+ pathwayName = 'sys_ollama_chat';
359
+ req.body.ollamaModel = modelName.replace('ollama-', '');
360
+ } else {
361
+ pathwayName = openAIChatModels[modelName] || openAIChatModels['*'];
362
+ }
334
363
 
335
364
  if (!pathwayName) {
336
365
  res.status(404).json({
@@ -385,8 +414,11 @@ function buildRestEndpoints(pathways, app, server, config) {
385
414
  app.get('/v1/models', async (req, res) => {
386
415
  const openAIModels = { ...openAIChatModels, ...openAICompletionModels };
387
416
  const defaultModelId = 'gpt-3.5-turbo';
417
+ let models = [];
388
418
 
389
- const models = Object.entries(openAIModels)
419
+ // Get standard OpenAI-compatible models, filtering out our internal pathway models
420
+ models = Object.entries(openAIModels)
421
+ .filter(([modelId]) => !['ollama-chat', 'ollama-completion'].includes(modelId))
390
422
  .map(([modelId]) => {
391
423
  if (modelId.includes('*')) {
392
424
  modelId = defaultModelId;
@@ -397,7 +429,16 @@ function buildRestEndpoints(pathways, app, server, config) {
397
429
  owned_by: 'openai',
398
430
  permission: '',
399
431
  };
400
- })
432
+ });
433
+
434
+ // Get Ollama models if configured
435
+ if (config.get('ollamaUrl')) {
436
+ const ollamaModels = await getOllamaModels(config.get('ollamaUrl'));
437
+ models = [...models, ...ollamaModels];
438
+ }
439
+
440
+ // Filter out duplicates and sort
441
+ models = models
401
442
  .filter((model, index, self) => {
402
443
  return index === self.findIndex((m) => m.id === model.id);
403
444
  })