@aj-archipelago/cortex 1.3.22 → 1.3.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/config.js +26 -1
- package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +9 -4
- package/helper-apps/cortex-realtime-voice-server/src/realtime/realtimeTypes.ts +1 -0
- package/lib/util.js +4 -24
- package/package.json +5 -2
- package/pathways/system/rest_streaming/sys_ollama_chat.js +21 -0
- package/pathways/system/rest_streaming/sys_ollama_completion.js +14 -0
- package/pathways/transcribe_gemini.js +181 -53
- package/server/modelExecutor.js +8 -0
- package/server/pathwayResolver.js +6 -1
- package/server/plugins/claude3VertexPlugin.js +41 -15
- package/server/plugins/gemini15ChatPlugin.js +90 -1
- package/server/plugins/gemini15VisionPlugin.js +9 -3
- package/server/plugins/modelPlugin.js +11 -8
- package/server/plugins/ollamaChatPlugin.js +158 -0
- package/server/plugins/ollamaCompletionPlugin.js +147 -0
- package/server/rest.js +46 -5
- package/tests/multimodal_conversion.test.js +169 -0
- package/tests/transcribe_gemini.test.js +217 -0
|
@@ -380,7 +380,7 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
380
380
|
cortexRequest.params = {}; // query params
|
|
381
381
|
cortexRequest.stream = stream;
|
|
382
382
|
cortexRequest.urlSuffix = cortexRequest.stream
|
|
383
|
-
? ":streamRawPredict"
|
|
383
|
+
? ":streamRawPredict?alt=sse"
|
|
384
384
|
: ":rawPredict";
|
|
385
385
|
|
|
386
386
|
const gcpAuthTokenHelper = this.config.get("gcpAuthTokenHelper");
|
|
@@ -392,33 +392,59 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
392
392
|
|
|
393
393
|
processStreamEvent(event, requestProgress) {
|
|
394
394
|
const eventData = JSON.parse(event.data);
|
|
395
|
+
const baseOpenAIResponse = {
|
|
396
|
+
id: eventData.message?.id || `chatcmpl-${Date.now()}`,
|
|
397
|
+
object: "chat.completion.chunk",
|
|
398
|
+
created: Math.floor(Date.now() / 1000),
|
|
399
|
+
model: this.modelName,
|
|
400
|
+
choices: [{
|
|
401
|
+
index: 0,
|
|
402
|
+
delta: {},
|
|
403
|
+
finish_reason: null
|
|
404
|
+
}]
|
|
405
|
+
};
|
|
406
|
+
|
|
395
407
|
switch (eventData.type) {
|
|
396
408
|
case "message_start":
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
409
|
+
// Initial message with role
|
|
410
|
+
baseOpenAIResponse.choices[0].delta = {
|
|
411
|
+
role: "assistant",
|
|
412
|
+
content: ""
|
|
413
|
+
};
|
|
414
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
402
415
|
break;
|
|
416
|
+
|
|
403
417
|
case "content_block_delta":
|
|
404
418
|
if (eventData.delta.type === "text_delta") {
|
|
405
|
-
|
|
419
|
+
baseOpenAIResponse.choices[0].delta = {
|
|
420
|
+
content: eventData.delta.text
|
|
421
|
+
};
|
|
422
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
406
423
|
}
|
|
407
424
|
break;
|
|
408
|
-
|
|
409
|
-
break;
|
|
410
|
-
case "message_delta":
|
|
411
|
-
break;
|
|
425
|
+
|
|
412
426
|
case "message_stop":
|
|
413
|
-
|
|
427
|
+
baseOpenAIResponse.choices[0].delta = {};
|
|
428
|
+
baseOpenAIResponse.choices[0].finish_reason = "stop";
|
|
429
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
414
430
|
requestProgress.progress = 1;
|
|
415
431
|
break;
|
|
432
|
+
|
|
416
433
|
case "error":
|
|
417
|
-
|
|
418
|
-
eventData.error.message || eventData.error
|
|
419
|
-
}
|
|
434
|
+
baseOpenAIResponse.choices[0].delta = {
|
|
435
|
+
content: `\n\n*** ${eventData.error.message || eventData.error} ***`
|
|
436
|
+
};
|
|
437
|
+
baseOpenAIResponse.choices[0].finish_reason = "error";
|
|
438
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
420
439
|
requestProgress.progress = 1;
|
|
421
440
|
break;
|
|
441
|
+
|
|
442
|
+
// Ignore other event types as they don't map to OpenAI format
|
|
443
|
+
case "content_block_start":
|
|
444
|
+
case "content_block_stop":
|
|
445
|
+
case "message_delta":
|
|
446
|
+
case "ping":
|
|
447
|
+
break;
|
|
422
448
|
}
|
|
423
449
|
|
|
424
450
|
return requestProgress;
|
|
@@ -56,7 +56,11 @@ class Gemini15ChatPlugin extends ModelPlugin {
|
|
|
56
56
|
const { role, author, content } = message;
|
|
57
57
|
|
|
58
58
|
if (role === 'system') {
|
|
59
|
-
|
|
59
|
+
if (Array.isArray(content)) {
|
|
60
|
+
content.forEach(item => systemParts.push({ text: item }));
|
|
61
|
+
} else {
|
|
62
|
+
systemParts.push({ text: content });
|
|
63
|
+
}
|
|
60
64
|
return;
|
|
61
65
|
}
|
|
62
66
|
|
|
@@ -169,6 +173,91 @@ class Gemini15ChatPlugin extends ModelPlugin {
|
|
|
169
173
|
return this.executeRequest(cortexRequest);
|
|
170
174
|
}
|
|
171
175
|
|
|
176
|
+
processStreamEvent(event, requestProgress) {
|
|
177
|
+
const eventData = JSON.parse(event.data);
|
|
178
|
+
|
|
179
|
+
// Initialize requestProgress if needed
|
|
180
|
+
requestProgress = requestProgress || {};
|
|
181
|
+
requestProgress.data = requestProgress.data || null;
|
|
182
|
+
|
|
183
|
+
// Create a helper function to generate message chunks
|
|
184
|
+
const createChunk = (delta) => ({
|
|
185
|
+
id: eventData.responseId || `chatcmpl-${Date.now()}`,
|
|
186
|
+
object: "chat.completion.chunk",
|
|
187
|
+
created: Math.floor(Date.now() / 1000),
|
|
188
|
+
model: this.modelName,
|
|
189
|
+
choices: [{
|
|
190
|
+
index: 0,
|
|
191
|
+
delta,
|
|
192
|
+
finish_reason: null
|
|
193
|
+
}]
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Handle content chunks - do this first before handling any finish conditions
|
|
197
|
+
if (eventData.candidates?.[0]?.content?.parts?.[0]?.text) {
|
|
198
|
+
if (!requestProgress.started) {
|
|
199
|
+
// First chunk - send role
|
|
200
|
+
requestProgress.data = JSON.stringify(createChunk({ role: "assistant" }));
|
|
201
|
+
requestProgress.started = true;
|
|
202
|
+
|
|
203
|
+
// Immediately follow up with the first content chunk
|
|
204
|
+
requestProgress.data = JSON.stringify(createChunk({
|
|
205
|
+
content: eventData.candidates[0].content.parts[0].text
|
|
206
|
+
}));
|
|
207
|
+
} else {
|
|
208
|
+
// Send content chunk
|
|
209
|
+
requestProgress.data = JSON.stringify(createChunk({
|
|
210
|
+
content: eventData.candidates[0].content.parts[0].text
|
|
211
|
+
}));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// If this message also has STOP, mark it for completion but don't overwrite the content
|
|
215
|
+
if (eventData.candidates[0].finishReason === "STOP") {
|
|
216
|
+
requestProgress.progress = 1;
|
|
217
|
+
}
|
|
218
|
+
} else if (eventData.candidates?.[0]?.finishReason === "STOP") {
|
|
219
|
+
// Only send DONE if there was no content in this message
|
|
220
|
+
requestProgress.data = '[DONE]';
|
|
221
|
+
requestProgress.progress = 1;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Handle safety blocks
|
|
225
|
+
if (eventData.candidates?.[0]?.safetyRatings?.some(rating => rating.blocked)) {
|
|
226
|
+
requestProgress.data = JSON.stringify({
|
|
227
|
+
id: eventData.responseId || `chatcmpl-${Date.now()}`,
|
|
228
|
+
object: "chat.completion.chunk",
|
|
229
|
+
created: Math.floor(Date.now() / 1000),
|
|
230
|
+
model: this.modelName,
|
|
231
|
+
choices: [{
|
|
232
|
+
index: 0,
|
|
233
|
+
delta: { content: "\n\n*** Response blocked due to safety ratings ***" },
|
|
234
|
+
finish_reason: "content_filter"
|
|
235
|
+
}]
|
|
236
|
+
});
|
|
237
|
+
requestProgress.progress = 1;
|
|
238
|
+
return requestProgress;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Handle prompt feedback blocks
|
|
242
|
+
if (eventData.promptFeedback?.blockReason) {
|
|
243
|
+
requestProgress.data = JSON.stringify({
|
|
244
|
+
id: eventData.responseId || `chatcmpl-${Date.now()}`,
|
|
245
|
+
object: "chat.completion.chunk",
|
|
246
|
+
created: Math.floor(Date.now() / 1000),
|
|
247
|
+
model: this.modelName,
|
|
248
|
+
choices: [{
|
|
249
|
+
index: 0,
|
|
250
|
+
delta: { content: `\n\n*** Response blocked: ${eventData.promptFeedback.blockReason} ***` },
|
|
251
|
+
finish_reason: "content_filter"
|
|
252
|
+
}]
|
|
253
|
+
});
|
|
254
|
+
requestProgress.progress = 1;
|
|
255
|
+
return requestProgress;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return requestProgress;
|
|
259
|
+
}
|
|
260
|
+
|
|
172
261
|
// Override the logging function to display the messages and responses
|
|
173
262
|
logRequestData(data, responseData, prompt) {
|
|
174
263
|
const messages = data && data.contents;
|
|
@@ -24,19 +24,24 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
|
|
|
24
24
|
const { role, author, content } = message;
|
|
25
25
|
|
|
26
26
|
if (role === 'system') {
|
|
27
|
-
|
|
27
|
+
if (Array.isArray(content)) {
|
|
28
|
+
content.forEach(item => systemParts.push({ text: item }));
|
|
29
|
+
} else {
|
|
30
|
+
systemParts.push({ text: content });
|
|
31
|
+
}
|
|
28
32
|
return;
|
|
29
33
|
}
|
|
30
34
|
|
|
31
35
|
// Convert content to Gemini format, trying to maintain compatibility
|
|
32
36
|
const convertPartToGemini = (inputPart) => {
|
|
33
37
|
try {
|
|
38
|
+
// First try to parse as JSON if it's a string
|
|
34
39
|
const part = typeof inputPart === 'string' ? JSON.parse(inputPart) : inputPart;
|
|
35
40
|
const {type, text, image_url, gcs} = part;
|
|
36
41
|
let fileUrl = gcs || image_url?.url;
|
|
37
42
|
|
|
38
43
|
if (typeof part === 'string') {
|
|
39
|
-
return { text:
|
|
44
|
+
return { text: inputPart };
|
|
40
45
|
} else if (type === 'text') {
|
|
41
46
|
return { text: text };
|
|
42
47
|
} else if (type === 'image_url') {
|
|
@@ -77,7 +82,8 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
|
|
|
77
82
|
return null;
|
|
78
83
|
}
|
|
79
84
|
} catch (e) {
|
|
80
|
-
//
|
|
85
|
+
// If JSON parsing fails or any other error, treat as plain text
|
|
86
|
+
return inputPart ? { text: inputPart } : null;
|
|
81
87
|
}
|
|
82
88
|
return inputPart ? { text: inputPart } : null;
|
|
83
89
|
};
|
|
@@ -381,14 +381,17 @@ class ModelPlugin {
|
|
|
381
381
|
|
|
382
382
|
// finish reason can be in different places in the message
|
|
383
383
|
const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
|
|
384
|
-
if (finishReason
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
384
|
+
if (finishReason) {
|
|
385
|
+
switch (finishReason.toLowerCase()) {
|
|
386
|
+
case 'safety':
|
|
387
|
+
const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
|
|
388
|
+
logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
|
|
389
|
+
requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
|
|
390
|
+
requestProgress.progress = 1;
|
|
391
|
+
break;
|
|
392
|
+
default:
|
|
393
|
+
requestProgress.progress = 1;
|
|
394
|
+
break;
|
|
392
395
|
}
|
|
393
396
|
}
|
|
394
397
|
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import ModelPlugin from './modelPlugin.js';
|
|
2
|
+
import logger from '../../lib/logger.js';
|
|
3
|
+
import { Transform } from 'stream';
|
|
4
|
+
|
|
5
|
+
class OllamaChatPlugin extends ModelPlugin {
|
|
6
|
+
|
|
7
|
+
getRequestParameters(text, parameters, prompt) {
|
|
8
|
+
const { modelPromptMessages } = this.getCompiledPrompt(text, parameters, prompt);
|
|
9
|
+
return {
|
|
10
|
+
data: {
|
|
11
|
+
model: parameters.ollamaModel,
|
|
12
|
+
messages: modelPromptMessages,
|
|
13
|
+
stream: parameters.stream
|
|
14
|
+
},
|
|
15
|
+
params: {}
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
logRequestData(data, responseData, prompt) {
|
|
20
|
+
const { stream, messages, model } = data;
|
|
21
|
+
|
|
22
|
+
if (messages && messages.length > 0) {
|
|
23
|
+
logger.info(`[ollama chat request sent to model ${model} containing ${messages.length} messages]`);
|
|
24
|
+
let totalLength = 0;
|
|
25
|
+
let totalUnits;
|
|
26
|
+
messages.forEach((message, index) => {
|
|
27
|
+
const content = message.content;
|
|
28
|
+
const { length, units } = this.getLength(content);
|
|
29
|
+
const preview = this.shortenContent(content);
|
|
30
|
+
|
|
31
|
+
logger.verbose(
|
|
32
|
+
`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`
|
|
33
|
+
);
|
|
34
|
+
totalLength += length;
|
|
35
|
+
totalUnits = units;
|
|
36
|
+
});
|
|
37
|
+
logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (stream) {
|
|
41
|
+
logger.info(`[response received as an SSE stream]`);
|
|
42
|
+
} else if (responseData) {
|
|
43
|
+
const responseText = this.parseResponse(responseData);
|
|
44
|
+
const { length, units } = this.getLength(responseText);
|
|
45
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
46
|
+
logger.verbose(`${this.shortenContent(responseText)}`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
prompt &&
|
|
50
|
+
prompt.debugInfo &&
|
|
51
|
+
(prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
parseResponse(data) {
|
|
55
|
+
// If data is not a string (e.g. streaming), return as is
|
|
56
|
+
if (typeof data !== 'string') {
|
|
57
|
+
return data;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Split into lines and filter empty ones
|
|
61
|
+
const lines = data.split('\n').filter(line => line.trim());
|
|
62
|
+
|
|
63
|
+
let fullResponse = '';
|
|
64
|
+
|
|
65
|
+
for (const line of lines) {
|
|
66
|
+
try {
|
|
67
|
+
const jsonObj = JSON.parse(line);
|
|
68
|
+
|
|
69
|
+
if (jsonObj.message && jsonObj.message.content) {
|
|
70
|
+
// Unescape special sequences
|
|
71
|
+
const content = jsonObj.message.content
|
|
72
|
+
.replace(/\\n/g, '\n')
|
|
73
|
+
.replace(/\\"/g, '"')
|
|
74
|
+
.replace(/\\\\/g, '\\')
|
|
75
|
+
.replace(/\\u003c/g, '<')
|
|
76
|
+
.replace(/\\u003e/g, '>');
|
|
77
|
+
|
|
78
|
+
fullResponse += content;
|
|
79
|
+
}
|
|
80
|
+
} catch (err) {
|
|
81
|
+
// If we can't parse the line as JSON, just skip it
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return fullResponse;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
processStreamEvent(event, requestProgress) {
|
|
90
|
+
try {
|
|
91
|
+
const data = JSON.parse(event.data);
|
|
92
|
+
|
|
93
|
+
// Handle the streaming response
|
|
94
|
+
if (data.message?.content) {
|
|
95
|
+
// Unescape special sequences in the content
|
|
96
|
+
const content = data.message.content
|
|
97
|
+
.replace(/\\n/g, '\n')
|
|
98
|
+
.replace(/\\"/g, '"')
|
|
99
|
+
.replace(/\\\\/g, '\\')
|
|
100
|
+
.replace(/\\u003c/g, '<')
|
|
101
|
+
.replace(/\\u003e/g, '>');
|
|
102
|
+
|
|
103
|
+
requestProgress.data = JSON.stringify(content);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Check if this is the final message
|
|
107
|
+
if (data.done) {
|
|
108
|
+
requestProgress.data = '[DONE]';
|
|
109
|
+
requestProgress.progress = 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return requestProgress;
|
|
113
|
+
} catch (err) {
|
|
114
|
+
// If we can't parse the event data, return the progress as is
|
|
115
|
+
return requestProgress;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
120
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
121
|
+
cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
|
|
122
|
+
cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
|
|
123
|
+
|
|
124
|
+
// For Ollama streaming, transform NDJSON to SSE format
|
|
125
|
+
if (parameters.stream) {
|
|
126
|
+
const response = await this.executeRequest(cortexRequest);
|
|
127
|
+
|
|
128
|
+
// Create a transform stream that converts NDJSON to SSE format
|
|
129
|
+
const transformer = new Transform({
|
|
130
|
+
decodeStrings: false, // Keep as string
|
|
131
|
+
transform(chunk, encoding, callback) {
|
|
132
|
+
try {
|
|
133
|
+
const lines = chunk.toString().split('\n');
|
|
134
|
+
for (const line of lines) {
|
|
135
|
+
if (line.trim()) {
|
|
136
|
+
// Format as SSE data
|
|
137
|
+
this.push(`data: ${line}\n\n`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
callback();
|
|
141
|
+
} catch (err) {
|
|
142
|
+
callback(err);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Pipe the response through our transformer
|
|
148
|
+
response.pipe(transformer);
|
|
149
|
+
|
|
150
|
+
// Return the transformed stream
|
|
151
|
+
return transformer;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return this.executeRequest(cortexRequest);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export default OllamaChatPlugin;
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import ModelPlugin from './modelPlugin.js';
|
|
2
|
+
import logger from '../../lib/logger.js';
|
|
3
|
+
import { Transform } from 'stream';
|
|
4
|
+
|
|
5
|
+
class OllamaCompletionPlugin extends ModelPlugin {
|
|
6
|
+
|
|
7
|
+
getRequestParameters(text, parameters, prompt) {
|
|
8
|
+
return {
|
|
9
|
+
data: {
|
|
10
|
+
model: parameters.ollamaModel,
|
|
11
|
+
prompt: text,
|
|
12
|
+
stream: parameters.stream
|
|
13
|
+
},
|
|
14
|
+
params: {}
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
logRequestData(data, responseData, prompt) {
|
|
19
|
+
const { stream, prompt: promptText, model } = data;
|
|
20
|
+
|
|
21
|
+
if (promptText) {
|
|
22
|
+
logger.info(`[ollama completion request sent to model ${model}]`);
|
|
23
|
+
const { length, units } = this.getLength(promptText);
|
|
24
|
+
const preview = this.shortenContent(promptText);
|
|
25
|
+
logger.verbose(`prompt ${units}: ${length}, content: "${preview}"`);
|
|
26
|
+
logger.info(`[completion request contained ${length} ${units}]`);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (stream) {
|
|
30
|
+
logger.info(`[response received as an SSE stream]`);
|
|
31
|
+
} else if (responseData) {
|
|
32
|
+
const responseText = this.parseResponse(responseData);
|
|
33
|
+
const { length, units } = this.getLength(responseText);
|
|
34
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
35
|
+
logger.verbose(`${this.shortenContent(responseText)}`);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
prompt &&
|
|
39
|
+
prompt.debugInfo &&
|
|
40
|
+
(prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
parseResponse(data) {
|
|
44
|
+
// If data is not a string (e.g. streaming), return as is
|
|
45
|
+
if (typeof data !== 'string') {
|
|
46
|
+
return data;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Split into lines and filter empty ones
|
|
50
|
+
const lines = data.split('\n').filter(line => line.trim());
|
|
51
|
+
|
|
52
|
+
let fullResponse = '';
|
|
53
|
+
|
|
54
|
+
for (const line of lines) {
|
|
55
|
+
try {
|
|
56
|
+
const jsonObj = JSON.parse(line);
|
|
57
|
+
|
|
58
|
+
if (jsonObj.response) {
|
|
59
|
+
// Unescape special sequences
|
|
60
|
+
const content = jsonObj.response
|
|
61
|
+
.replace(/\\n/g, '\n')
|
|
62
|
+
.replace(/\\"/g, '"')
|
|
63
|
+
.replace(/\\\\/g, '\\')
|
|
64
|
+
.replace(/\\u003c/g, '<')
|
|
65
|
+
.replace(/\\u003e/g, '>');
|
|
66
|
+
|
|
67
|
+
fullResponse += content;
|
|
68
|
+
}
|
|
69
|
+
} catch (err) {
|
|
70
|
+
// If we can't parse the line as JSON, just skip it
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return fullResponse;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
processStreamEvent(event, requestProgress) {
|
|
79
|
+
try {
|
|
80
|
+
const data = JSON.parse(event.data);
|
|
81
|
+
|
|
82
|
+
// Handle the streaming response
|
|
83
|
+
if (data.response) {
|
|
84
|
+
// Unescape special sequences in the content
|
|
85
|
+
const content = data.response
|
|
86
|
+
.replace(/\\n/g, '\n')
|
|
87
|
+
.replace(/\\"/g, '"')
|
|
88
|
+
.replace(/\\\\/g, '\\')
|
|
89
|
+
.replace(/\\u003c/g, '<')
|
|
90
|
+
.replace(/\\u003e/g, '>');
|
|
91
|
+
|
|
92
|
+
requestProgress.data = JSON.stringify(content);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Check if this is the final message
|
|
96
|
+
if (data.done) {
|
|
97
|
+
requestProgress.data = '[DONE]';
|
|
98
|
+
requestProgress.progress = 1;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return requestProgress;
|
|
102
|
+
} catch (err) {
|
|
103
|
+
// If we can't parse the event data, return the progress as is
|
|
104
|
+
return requestProgress;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
109
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
110
|
+
cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
|
|
111
|
+
cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
|
|
112
|
+
|
|
113
|
+
// For Ollama streaming, transform NDJSON to SSE format
|
|
114
|
+
if (parameters.stream) {
|
|
115
|
+
const response = await this.executeRequest(cortexRequest);
|
|
116
|
+
|
|
117
|
+
// Create a transform stream that converts NDJSON to SSE format
|
|
118
|
+
const transformer = new Transform({
|
|
119
|
+
decodeStrings: false, // Keep as string
|
|
120
|
+
transform(chunk, encoding, callback) {
|
|
121
|
+
try {
|
|
122
|
+
const lines = chunk.toString().split('\n');
|
|
123
|
+
for (const line of lines) {
|
|
124
|
+
if (line.trim()) {
|
|
125
|
+
// Format as SSE data
|
|
126
|
+
this.push(`data: ${line}\n\n`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
callback();
|
|
130
|
+
} catch (err) {
|
|
131
|
+
callback(err);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
// Pipe the response through our transformer
|
|
137
|
+
response.pipe(transformer);
|
|
138
|
+
|
|
139
|
+
// Return the transformed stream
|
|
140
|
+
return transformer;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return this.executeRequest(cortexRequest);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export default OllamaCompletionPlugin;
|
package/server/rest.js
CHANGED
|
@@ -6,6 +6,22 @@ import { requestState } from './requestState.js';
|
|
|
6
6
|
import { v4 as uuidv4 } from 'uuid';
|
|
7
7
|
import logger from '../lib/logger.js';
|
|
8
8
|
import { getSingleTokenChunks } from './chunker.js';
|
|
9
|
+
import axios from 'axios';
|
|
10
|
+
|
|
11
|
+
const getOllamaModels = async (ollamaUrl) => {
|
|
12
|
+
try {
|
|
13
|
+
const response = await axios.get(`${ollamaUrl}/api/tags`);
|
|
14
|
+
return response.data.models.map(model => ({
|
|
15
|
+
id: `ollama-${model.name}`,
|
|
16
|
+
object: 'model',
|
|
17
|
+
owned_by: 'ollama',
|
|
18
|
+
permission: ''
|
|
19
|
+
}));
|
|
20
|
+
} catch (error) {
|
|
21
|
+
logger.error(`Error fetching Ollama models: ${error.message}`);
|
|
22
|
+
return [];
|
|
23
|
+
}
|
|
24
|
+
};
|
|
9
25
|
|
|
10
26
|
const chunkTextIntoTokens = (() => {
|
|
11
27
|
let partialToken = '';
|
|
@@ -282,7 +298,14 @@ function buildRestEndpoints(pathways, app, server, config) {
|
|
|
282
298
|
// Create OpenAI compatible endpoints
|
|
283
299
|
app.post('/v1/completions', async (req, res) => {
|
|
284
300
|
const modelName = req.body.model || 'gpt-3.5-turbo';
|
|
285
|
-
|
|
301
|
+
let pathwayName;
|
|
302
|
+
|
|
303
|
+
if (modelName.startsWith('ollama-')) {
|
|
304
|
+
pathwayName = 'sys_ollama_completion';
|
|
305
|
+
req.body.ollamaModel = modelName.replace('ollama-', '');
|
|
306
|
+
} else {
|
|
307
|
+
pathwayName = openAICompletionModels[modelName] || openAICompletionModels['*'];
|
|
308
|
+
}
|
|
286
309
|
|
|
287
310
|
if (!pathwayName) {
|
|
288
311
|
res.status(404).json({
|
|
@@ -318,7 +341,6 @@ function buildRestEndpoints(pathways, app, server, config) {
|
|
|
318
341
|
if (Boolean(req.body.stream)) {
|
|
319
342
|
jsonResponse.id = `cmpl-${resultText}`;
|
|
320
343
|
jsonResponse.choices[0].finish_reason = null;
|
|
321
|
-
//jsonResponse.object = "text_completion.chunk";
|
|
322
344
|
|
|
323
345
|
processIncomingStream(resultText, res, jsonResponse, pathway);
|
|
324
346
|
} else {
|
|
@@ -330,7 +352,14 @@ function buildRestEndpoints(pathways, app, server, config) {
|
|
|
330
352
|
|
|
331
353
|
app.post('/v1/chat/completions', async (req, res) => {
|
|
332
354
|
const modelName = req.body.model || 'gpt-3.5-turbo';
|
|
333
|
-
|
|
355
|
+
let pathwayName;
|
|
356
|
+
|
|
357
|
+
if (modelName.startsWith('ollama-')) {
|
|
358
|
+
pathwayName = 'sys_ollama_chat';
|
|
359
|
+
req.body.ollamaModel = modelName.replace('ollama-', '');
|
|
360
|
+
} else {
|
|
361
|
+
pathwayName = openAIChatModels[modelName] || openAIChatModels['*'];
|
|
362
|
+
}
|
|
334
363
|
|
|
335
364
|
if (!pathwayName) {
|
|
336
365
|
res.status(404).json({
|
|
@@ -385,8 +414,11 @@ function buildRestEndpoints(pathways, app, server, config) {
|
|
|
385
414
|
app.get('/v1/models', async (req, res) => {
|
|
386
415
|
const openAIModels = { ...openAIChatModels, ...openAICompletionModels };
|
|
387
416
|
const defaultModelId = 'gpt-3.5-turbo';
|
|
417
|
+
let models = [];
|
|
388
418
|
|
|
389
|
-
|
|
419
|
+
// Get standard OpenAI-compatible models, filtering out our internal pathway models
|
|
420
|
+
models = Object.entries(openAIModels)
|
|
421
|
+
.filter(([modelId]) => !['ollama-chat', 'ollama-completion'].includes(modelId))
|
|
390
422
|
.map(([modelId]) => {
|
|
391
423
|
if (modelId.includes('*')) {
|
|
392
424
|
modelId = defaultModelId;
|
|
@@ -397,7 +429,16 @@ function buildRestEndpoints(pathways, app, server, config) {
|
|
|
397
429
|
owned_by: 'openai',
|
|
398
430
|
permission: '',
|
|
399
431
|
};
|
|
400
|
-
})
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
// Get Ollama models if configured
|
|
435
|
+
if (config.get('ollamaUrl')) {
|
|
436
|
+
const ollamaModels = await getOllamaModels(config.get('ollamaUrl'));
|
|
437
|
+
models = [...models, ...ollamaModels];
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Filter out duplicates and sort
|
|
441
|
+
models = models
|
|
401
442
|
.filter((model, index, self) => {
|
|
402
443
|
return index === self.findIndex((m) => m.id === model.id);
|
|
403
444
|
})
|