@aj-archipelago/cortex 1.3.22 → 1.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -0
- package/config.js +26 -1
- package/helper-apps/cortex-realtime-voice-server/src/realtime/client.ts +9 -4
- package/helper-apps/cortex-realtime-voice-server/src/realtime/realtimeTypes.ts +1 -0
- package/lib/util.js +4 -24
- package/package.json +5 -2
- package/pathways/system/entity/sys_generator_memory.js +3 -3
- package/pathways/system/rest_streaming/sys_ollama_chat.js +21 -0
- package/pathways/system/rest_streaming/sys_ollama_completion.js +14 -0
- package/pathways/system/rest_streaming/sys_openai_chat.js +2 -2
- package/pathways/transcribe_gemini.js +181 -53
- package/server/modelExecutor.js +8 -0
- package/server/pathwayResolver.js +15 -6
- package/server/plugins/claude3VertexPlugin.js +51 -16
- package/server/plugins/gemini15ChatPlugin.js +94 -1
- package/server/plugins/gemini15VisionPlugin.js +9 -3
- package/server/plugins/modelPlugin.js +11 -8
- package/server/plugins/ollamaChatPlugin.js +158 -0
- package/server/plugins/ollamaCompletionPlugin.js +147 -0
- package/server/rest.js +46 -5
- package/tests/multimodal_conversion.test.js +169 -0
- package/tests/openai_api.test.js +43 -23
- package/tests/streaming.test.js +197 -0
- package/tests/transcribe_gemini.test.js +217 -0
|
@@ -79,6 +79,13 @@ class PathwayResolver {
|
|
|
79
79
|
let streamErrorOccurred = false;
|
|
80
80
|
let responseData = null;
|
|
81
81
|
|
|
82
|
+
const publishNestedRequestProgress = (requestProgress) => {
|
|
83
|
+
if (requestProgress.progress === 1 && this.rootRequestId) {
|
|
84
|
+
delete requestProgress.progress;
|
|
85
|
+
}
|
|
86
|
+
publishRequestProgress(requestProgress);
|
|
87
|
+
}
|
|
88
|
+
|
|
82
89
|
try {
|
|
83
90
|
responseData = await this.executePathway(args);
|
|
84
91
|
}
|
|
@@ -89,8 +96,13 @@ class PathwayResolver {
|
|
|
89
96
|
progress: 1,
|
|
90
97
|
data: '[DONE]',
|
|
91
98
|
});
|
|
99
|
+
} else {
|
|
100
|
+
publishRequestProgress({
|
|
101
|
+
requestId: this.rootRequestId || this.requestId,
|
|
102
|
+
progress: 1,
|
|
103
|
+
data: error.message || error.toString(),
|
|
104
|
+
});
|
|
92
105
|
}
|
|
93
|
-
return;
|
|
94
106
|
}
|
|
95
107
|
|
|
96
108
|
// If the response is a string, it's a regular long running response
|
|
@@ -100,7 +112,7 @@ class PathwayResolver {
|
|
|
100
112
|
|
|
101
113
|
// some models don't support progress updates
|
|
102
114
|
if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
|
|
103
|
-
await
|
|
115
|
+
await publishNestedRequestProgress({
|
|
104
116
|
requestId: this.rootRequestId || this.requestId,
|
|
105
117
|
progress: Math.min(completedCount,totalCount) / totalCount,
|
|
106
118
|
data: JSON.stringify(responseData),
|
|
@@ -139,10 +151,7 @@ class PathwayResolver {
|
|
|
139
151
|
|
|
140
152
|
try {
|
|
141
153
|
if (!streamEnded && requestProgress.data) {
|
|
142
|
-
|
|
143
|
-
logger.debug(`Publishing stream message to requestId ${this.requestId}: ${requestProgress.data}`);
|
|
144
|
-
publishRequestProgress(requestProgress);
|
|
145
|
-
}
|
|
154
|
+
publishNestedRequestProgress(requestProgress);
|
|
146
155
|
streamEnded = requestProgress.progress === 1;
|
|
147
156
|
}
|
|
148
157
|
} catch (error) {
|
|
@@ -136,7 +136,16 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
136
136
|
// Extract system messages
|
|
137
137
|
const systemMessages = messagesCopy.filter(message => message.role === "system");
|
|
138
138
|
if (systemMessages.length > 0) {
|
|
139
|
-
system = systemMessages.map(message =>
|
|
139
|
+
system = systemMessages.map(message => {
|
|
140
|
+
if (Array.isArray(message.content)) {
|
|
141
|
+
// For content arrays, extract text content and join
|
|
142
|
+
return message.content
|
|
143
|
+
.filter(item => item.type === 'text')
|
|
144
|
+
.map(item => item.text)
|
|
145
|
+
.join("\n");
|
|
146
|
+
}
|
|
147
|
+
return message.content;
|
|
148
|
+
}).join("\n");
|
|
140
149
|
}
|
|
141
150
|
|
|
142
151
|
// Filter out system messages and empty messages
|
|
@@ -380,7 +389,7 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
380
389
|
cortexRequest.params = {}; // query params
|
|
381
390
|
cortexRequest.stream = stream;
|
|
382
391
|
cortexRequest.urlSuffix = cortexRequest.stream
|
|
383
|
-
? ":streamRawPredict"
|
|
392
|
+
? ":streamRawPredict?alt=sse"
|
|
384
393
|
: ":rawPredict";
|
|
385
394
|
|
|
386
395
|
const gcpAuthTokenHelper = this.config.get("gcpAuthTokenHelper");
|
|
@@ -392,33 +401,59 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
|
|
|
392
401
|
|
|
393
402
|
processStreamEvent(event, requestProgress) {
|
|
394
403
|
const eventData = JSON.parse(event.data);
|
|
404
|
+
const baseOpenAIResponse = {
|
|
405
|
+
id: eventData.message?.id || `chatcmpl-${Date.now()}`,
|
|
406
|
+
object: "chat.completion.chunk",
|
|
407
|
+
created: Math.floor(Date.now() / 1000),
|
|
408
|
+
model: this.modelName,
|
|
409
|
+
choices: [{
|
|
410
|
+
index: 0,
|
|
411
|
+
delta: {},
|
|
412
|
+
finish_reason: null
|
|
413
|
+
}]
|
|
414
|
+
};
|
|
415
|
+
|
|
395
416
|
switch (eventData.type) {
|
|
396
417
|
case "message_start":
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
418
|
+
// Initial message with role
|
|
419
|
+
baseOpenAIResponse.choices[0].delta = {
|
|
420
|
+
role: "assistant",
|
|
421
|
+
content: ""
|
|
422
|
+
};
|
|
423
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
402
424
|
break;
|
|
425
|
+
|
|
403
426
|
case "content_block_delta":
|
|
404
427
|
if (eventData.delta.type === "text_delta") {
|
|
405
|
-
|
|
428
|
+
baseOpenAIResponse.choices[0].delta = {
|
|
429
|
+
content: eventData.delta.text
|
|
430
|
+
};
|
|
431
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
406
432
|
}
|
|
407
433
|
break;
|
|
408
|
-
|
|
409
|
-
break;
|
|
410
|
-
case "message_delta":
|
|
411
|
-
break;
|
|
434
|
+
|
|
412
435
|
case "message_stop":
|
|
413
|
-
|
|
436
|
+
baseOpenAIResponse.choices[0].delta = {};
|
|
437
|
+
baseOpenAIResponse.choices[0].finish_reason = "stop";
|
|
438
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
414
439
|
requestProgress.progress = 1;
|
|
415
440
|
break;
|
|
441
|
+
|
|
416
442
|
case "error":
|
|
417
|
-
|
|
418
|
-
eventData.error.message || eventData.error
|
|
419
|
-
}
|
|
443
|
+
baseOpenAIResponse.choices[0].delta = {
|
|
444
|
+
content: `\n\n*** ${eventData.error.message || eventData.error} ***`
|
|
445
|
+
};
|
|
446
|
+
baseOpenAIResponse.choices[0].finish_reason = "error";
|
|
447
|
+
requestProgress.data = JSON.stringify(baseOpenAIResponse);
|
|
420
448
|
requestProgress.progress = 1;
|
|
421
449
|
break;
|
|
450
|
+
|
|
451
|
+
// Ignore other event types as they don't map to OpenAI format
|
|
452
|
+
case "content_block_start":
|
|
453
|
+
case "content_block_stop":
|
|
454
|
+
case "message_delta":
|
|
455
|
+
case "ping":
|
|
456
|
+
break;
|
|
422
457
|
}
|
|
423
458
|
|
|
424
459
|
return requestProgress;
|
|
@@ -56,7 +56,11 @@ class Gemini15ChatPlugin extends ModelPlugin {
|
|
|
56
56
|
const { role, author, content } = message;
|
|
57
57
|
|
|
58
58
|
if (role === 'system') {
|
|
59
|
-
|
|
59
|
+
if (Array.isArray(content)) {
|
|
60
|
+
content.forEach(item => systemParts.push({ text: item }));
|
|
61
|
+
} else {
|
|
62
|
+
systemParts.push({ text: content });
|
|
63
|
+
}
|
|
60
64
|
return;
|
|
61
65
|
}
|
|
62
66
|
|
|
@@ -169,6 +173,95 @@ class Gemini15ChatPlugin extends ModelPlugin {
|
|
|
169
173
|
return this.executeRequest(cortexRequest);
|
|
170
174
|
}
|
|
171
175
|
|
|
176
|
+
processStreamEvent(event, requestProgress) {
|
|
177
|
+
const eventData = JSON.parse(event.data);
|
|
178
|
+
|
|
179
|
+
// Initialize requestProgress if needed
|
|
180
|
+
requestProgress = requestProgress || {};
|
|
181
|
+
requestProgress.data = requestProgress.data || null;
|
|
182
|
+
|
|
183
|
+
// Create a helper function to generate message chunks
|
|
184
|
+
const createChunk = (delta) => ({
|
|
185
|
+
id: eventData.responseId || `chatcmpl-${Date.now()}`,
|
|
186
|
+
object: "chat.completion.chunk",
|
|
187
|
+
created: Math.floor(Date.now() / 1000),
|
|
188
|
+
model: this.modelName,
|
|
189
|
+
choices: [{
|
|
190
|
+
index: 0,
|
|
191
|
+
delta,
|
|
192
|
+
finish_reason: null
|
|
193
|
+
}]
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// Handle content chunks - do this first before handling any finish conditions
|
|
197
|
+
if (eventData.candidates?.[0]?.content?.parts?.[0]?.text) {
|
|
198
|
+
if (!requestProgress.started) {
|
|
199
|
+
// First chunk - send role
|
|
200
|
+
requestProgress.data = JSON.stringify(createChunk({ role: "assistant" }));
|
|
201
|
+
requestProgress.started = true;
|
|
202
|
+
|
|
203
|
+
// Immediately follow up with the first content chunk
|
|
204
|
+
requestProgress.data = JSON.stringify(createChunk({
|
|
205
|
+
content: eventData.candidates[0].content.parts[0].text
|
|
206
|
+
}));
|
|
207
|
+
} else {
|
|
208
|
+
// Send content chunk
|
|
209
|
+
requestProgress.data = JSON.stringify(createChunk({
|
|
210
|
+
content: eventData.candidates[0].content.parts[0].text
|
|
211
|
+
}));
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// If this message also has STOP, mark it for completion but don't overwrite the content
|
|
215
|
+
if (eventData.candidates[0].finishReason === "STOP") {
|
|
216
|
+
// Send the content first
|
|
217
|
+
requestProgress.data = JSON.stringify(createChunk({
|
|
218
|
+
content: eventData.candidates[0].content.parts[0].text
|
|
219
|
+
}));
|
|
220
|
+
requestProgress.progress = 1;
|
|
221
|
+
}
|
|
222
|
+
} else if (eventData.candidates?.[0]?.finishReason === "STOP") {
|
|
223
|
+
// Only send DONE if there was no content in this message
|
|
224
|
+
requestProgress.data = '[DONE]';
|
|
225
|
+
requestProgress.progress = 1;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Handle safety blocks
|
|
229
|
+
if (eventData.candidates?.[0]?.safetyRatings?.some(rating => rating.blocked)) {
|
|
230
|
+
requestProgress.data = JSON.stringify({
|
|
231
|
+
id: eventData.responseId || `chatcmpl-${Date.now()}`,
|
|
232
|
+
object: "chat.completion.chunk",
|
|
233
|
+
created: Math.floor(Date.now() / 1000),
|
|
234
|
+
model: this.modelName,
|
|
235
|
+
choices: [{
|
|
236
|
+
index: 0,
|
|
237
|
+
delta: { content: "\n\n*** Response blocked due to safety ratings ***" },
|
|
238
|
+
finish_reason: "content_filter"
|
|
239
|
+
}]
|
|
240
|
+
});
|
|
241
|
+
requestProgress.progress = 1;
|
|
242
|
+
return requestProgress;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Handle prompt feedback blocks
|
|
246
|
+
if (eventData.promptFeedback?.blockReason) {
|
|
247
|
+
requestProgress.data = JSON.stringify({
|
|
248
|
+
id: eventData.responseId || `chatcmpl-${Date.now()}`,
|
|
249
|
+
object: "chat.completion.chunk",
|
|
250
|
+
created: Math.floor(Date.now() / 1000),
|
|
251
|
+
model: this.modelName,
|
|
252
|
+
choices: [{
|
|
253
|
+
index: 0,
|
|
254
|
+
delta: { content: `\n\n*** Response blocked: ${eventData.promptFeedback.blockReason} ***` },
|
|
255
|
+
finish_reason: "content_filter"
|
|
256
|
+
}]
|
|
257
|
+
});
|
|
258
|
+
requestProgress.progress = 1;
|
|
259
|
+
return requestProgress;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return requestProgress;
|
|
263
|
+
}
|
|
264
|
+
|
|
172
265
|
// Override the logging function to display the messages and responses
|
|
173
266
|
logRequestData(data, responseData, prompt) {
|
|
174
267
|
const messages = data && data.contents;
|
|
@@ -24,19 +24,24 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
|
|
|
24
24
|
const { role, author, content } = message;
|
|
25
25
|
|
|
26
26
|
if (role === 'system') {
|
|
27
|
-
|
|
27
|
+
if (Array.isArray(content)) {
|
|
28
|
+
content.forEach(item => systemParts.push({ text: item }));
|
|
29
|
+
} else {
|
|
30
|
+
systemParts.push({ text: content });
|
|
31
|
+
}
|
|
28
32
|
return;
|
|
29
33
|
}
|
|
30
34
|
|
|
31
35
|
// Convert content to Gemini format, trying to maintain compatibility
|
|
32
36
|
const convertPartToGemini = (inputPart) => {
|
|
33
37
|
try {
|
|
38
|
+
// First try to parse as JSON if it's a string
|
|
34
39
|
const part = typeof inputPart === 'string' ? JSON.parse(inputPart) : inputPart;
|
|
35
40
|
const {type, text, image_url, gcs} = part;
|
|
36
41
|
let fileUrl = gcs || image_url?.url;
|
|
37
42
|
|
|
38
43
|
if (typeof part === 'string') {
|
|
39
|
-
return { text:
|
|
44
|
+
return { text: inputPart };
|
|
40
45
|
} else if (type === 'text') {
|
|
41
46
|
return { text: text };
|
|
42
47
|
} else if (type === 'image_url') {
|
|
@@ -77,7 +82,8 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
|
|
|
77
82
|
return null;
|
|
78
83
|
}
|
|
79
84
|
} catch (e) {
|
|
80
|
-
//
|
|
85
|
+
// If JSON parsing fails or any other error, treat as plain text
|
|
86
|
+
return inputPart ? { text: inputPart } : null;
|
|
81
87
|
}
|
|
82
88
|
return inputPart ? { text: inputPart } : null;
|
|
83
89
|
};
|
|
@@ -381,14 +381,17 @@ class ModelPlugin {
|
|
|
381
381
|
|
|
382
382
|
// finish reason can be in different places in the message
|
|
383
383
|
const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
|
|
384
|
-
if (finishReason
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
384
|
+
if (finishReason) {
|
|
385
|
+
switch (finishReason.toLowerCase()) {
|
|
386
|
+
case 'safety':
|
|
387
|
+
const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
|
|
388
|
+
logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
|
|
389
|
+
requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
|
|
390
|
+
requestProgress.progress = 1;
|
|
391
|
+
break;
|
|
392
|
+
default:
|
|
393
|
+
requestProgress.progress = 1;
|
|
394
|
+
break;
|
|
392
395
|
}
|
|
393
396
|
}
|
|
394
397
|
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import ModelPlugin from './modelPlugin.js';
|
|
2
|
+
import logger from '../../lib/logger.js';
|
|
3
|
+
import { Transform } from 'stream';
|
|
4
|
+
|
|
5
|
+
class OllamaChatPlugin extends ModelPlugin {
|
|
6
|
+
|
|
7
|
+
getRequestParameters(text, parameters, prompt) {
|
|
8
|
+
const { modelPromptMessages } = this.getCompiledPrompt(text, parameters, prompt);
|
|
9
|
+
return {
|
|
10
|
+
data: {
|
|
11
|
+
model: parameters.ollamaModel,
|
|
12
|
+
messages: modelPromptMessages,
|
|
13
|
+
stream: parameters.stream
|
|
14
|
+
},
|
|
15
|
+
params: {}
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
logRequestData(data, responseData, prompt) {
|
|
20
|
+
const { stream, messages, model } = data;
|
|
21
|
+
|
|
22
|
+
if (messages && messages.length > 0) {
|
|
23
|
+
logger.info(`[ollama chat request sent to model ${model} containing ${messages.length} messages]`);
|
|
24
|
+
let totalLength = 0;
|
|
25
|
+
let totalUnits;
|
|
26
|
+
messages.forEach((message, index) => {
|
|
27
|
+
const content = message.content;
|
|
28
|
+
const { length, units } = this.getLength(content);
|
|
29
|
+
const preview = this.shortenContent(content);
|
|
30
|
+
|
|
31
|
+
logger.verbose(
|
|
32
|
+
`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`
|
|
33
|
+
);
|
|
34
|
+
totalLength += length;
|
|
35
|
+
totalUnits = units;
|
|
36
|
+
});
|
|
37
|
+
logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (stream) {
|
|
41
|
+
logger.info(`[response received as an SSE stream]`);
|
|
42
|
+
} else if (responseData) {
|
|
43
|
+
const responseText = this.parseResponse(responseData);
|
|
44
|
+
const { length, units } = this.getLength(responseText);
|
|
45
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
46
|
+
logger.verbose(`${this.shortenContent(responseText)}`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
prompt &&
|
|
50
|
+
prompt.debugInfo &&
|
|
51
|
+
(prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
parseResponse(data) {
|
|
55
|
+
// If data is not a string (e.g. streaming), return as is
|
|
56
|
+
if (typeof data !== 'string') {
|
|
57
|
+
return data;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Split into lines and filter empty ones
|
|
61
|
+
const lines = data.split('\n').filter(line => line.trim());
|
|
62
|
+
|
|
63
|
+
let fullResponse = '';
|
|
64
|
+
|
|
65
|
+
for (const line of lines) {
|
|
66
|
+
try {
|
|
67
|
+
const jsonObj = JSON.parse(line);
|
|
68
|
+
|
|
69
|
+
if (jsonObj.message && jsonObj.message.content) {
|
|
70
|
+
// Unescape special sequences
|
|
71
|
+
const content = jsonObj.message.content
|
|
72
|
+
.replace(/\\n/g, '\n')
|
|
73
|
+
.replace(/\\"/g, '"')
|
|
74
|
+
.replace(/\\\\/g, '\\')
|
|
75
|
+
.replace(/\\u003c/g, '<')
|
|
76
|
+
.replace(/\\u003e/g, '>');
|
|
77
|
+
|
|
78
|
+
fullResponse += content;
|
|
79
|
+
}
|
|
80
|
+
} catch (err) {
|
|
81
|
+
// If we can't parse the line as JSON, just skip it
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return fullResponse;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
processStreamEvent(event, requestProgress) {
|
|
90
|
+
try {
|
|
91
|
+
const data = JSON.parse(event.data);
|
|
92
|
+
|
|
93
|
+
// Handle the streaming response
|
|
94
|
+
if (data.message?.content) {
|
|
95
|
+
// Unescape special sequences in the content
|
|
96
|
+
const content = data.message.content
|
|
97
|
+
.replace(/\\n/g, '\n')
|
|
98
|
+
.replace(/\\"/g, '"')
|
|
99
|
+
.replace(/\\\\/g, '\\')
|
|
100
|
+
.replace(/\\u003c/g, '<')
|
|
101
|
+
.replace(/\\u003e/g, '>');
|
|
102
|
+
|
|
103
|
+
requestProgress.data = JSON.stringify(content);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Check if this is the final message
|
|
107
|
+
if (data.done) {
|
|
108
|
+
requestProgress.data = '[DONE]';
|
|
109
|
+
requestProgress.progress = 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return requestProgress;
|
|
113
|
+
} catch (err) {
|
|
114
|
+
// If we can't parse the event data, return the progress as is
|
|
115
|
+
return requestProgress;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
120
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
121
|
+
cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
|
|
122
|
+
cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
|
|
123
|
+
|
|
124
|
+
// For Ollama streaming, transform NDJSON to SSE format
|
|
125
|
+
if (parameters.stream) {
|
|
126
|
+
const response = await this.executeRequest(cortexRequest);
|
|
127
|
+
|
|
128
|
+
// Create a transform stream that converts NDJSON to SSE format
|
|
129
|
+
const transformer = new Transform({
|
|
130
|
+
decodeStrings: false, // Keep as string
|
|
131
|
+
transform(chunk, encoding, callback) {
|
|
132
|
+
try {
|
|
133
|
+
const lines = chunk.toString().split('\n');
|
|
134
|
+
for (const line of lines) {
|
|
135
|
+
if (line.trim()) {
|
|
136
|
+
// Format as SSE data
|
|
137
|
+
this.push(`data: ${line}\n\n`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
callback();
|
|
141
|
+
} catch (err) {
|
|
142
|
+
callback(err);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Pipe the response through our transformer
|
|
148
|
+
response.pipe(transformer);
|
|
149
|
+
|
|
150
|
+
// Return the transformed stream
|
|
151
|
+
return transformer;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return this.executeRequest(cortexRequest);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export default OllamaChatPlugin;
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import ModelPlugin from './modelPlugin.js';
|
|
2
|
+
import logger from '../../lib/logger.js';
|
|
3
|
+
import { Transform } from 'stream';
|
|
4
|
+
|
|
5
|
+
class OllamaCompletionPlugin extends ModelPlugin {
|
|
6
|
+
|
|
7
|
+
getRequestParameters(text, parameters, prompt) {
|
|
8
|
+
return {
|
|
9
|
+
data: {
|
|
10
|
+
model: parameters.ollamaModel,
|
|
11
|
+
prompt: text,
|
|
12
|
+
stream: parameters.stream
|
|
13
|
+
},
|
|
14
|
+
params: {}
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
logRequestData(data, responseData, prompt) {
|
|
19
|
+
const { stream, prompt: promptText, model } = data;
|
|
20
|
+
|
|
21
|
+
if (promptText) {
|
|
22
|
+
logger.info(`[ollama completion request sent to model ${model}]`);
|
|
23
|
+
const { length, units } = this.getLength(promptText);
|
|
24
|
+
const preview = this.shortenContent(promptText);
|
|
25
|
+
logger.verbose(`prompt ${units}: ${length}, content: "${preview}"`);
|
|
26
|
+
logger.info(`[completion request contained ${length} ${units}]`);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (stream) {
|
|
30
|
+
logger.info(`[response received as an SSE stream]`);
|
|
31
|
+
} else if (responseData) {
|
|
32
|
+
const responseText = this.parseResponse(responseData);
|
|
33
|
+
const { length, units } = this.getLength(responseText);
|
|
34
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
35
|
+
logger.verbose(`${this.shortenContent(responseText)}`);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
prompt &&
|
|
39
|
+
prompt.debugInfo &&
|
|
40
|
+
(prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
parseResponse(data) {
|
|
44
|
+
// If data is not a string (e.g. streaming), return as is
|
|
45
|
+
if (typeof data !== 'string') {
|
|
46
|
+
return data;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Split into lines and filter empty ones
|
|
50
|
+
const lines = data.split('\n').filter(line => line.trim());
|
|
51
|
+
|
|
52
|
+
let fullResponse = '';
|
|
53
|
+
|
|
54
|
+
for (const line of lines) {
|
|
55
|
+
try {
|
|
56
|
+
const jsonObj = JSON.parse(line);
|
|
57
|
+
|
|
58
|
+
if (jsonObj.response) {
|
|
59
|
+
// Unescape special sequences
|
|
60
|
+
const content = jsonObj.response
|
|
61
|
+
.replace(/\\n/g, '\n')
|
|
62
|
+
.replace(/\\"/g, '"')
|
|
63
|
+
.replace(/\\\\/g, '\\')
|
|
64
|
+
.replace(/\\u003c/g, '<')
|
|
65
|
+
.replace(/\\u003e/g, '>');
|
|
66
|
+
|
|
67
|
+
fullResponse += content;
|
|
68
|
+
}
|
|
69
|
+
} catch (err) {
|
|
70
|
+
// If we can't parse the line as JSON, just skip it
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return fullResponse;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
processStreamEvent(event, requestProgress) {
|
|
79
|
+
try {
|
|
80
|
+
const data = JSON.parse(event.data);
|
|
81
|
+
|
|
82
|
+
// Handle the streaming response
|
|
83
|
+
if (data.response) {
|
|
84
|
+
// Unescape special sequences in the content
|
|
85
|
+
const content = data.response
|
|
86
|
+
.replace(/\\n/g, '\n')
|
|
87
|
+
.replace(/\\"/g, '"')
|
|
88
|
+
.replace(/\\\\/g, '\\')
|
|
89
|
+
.replace(/\\u003c/g, '<')
|
|
90
|
+
.replace(/\\u003e/g, '>');
|
|
91
|
+
|
|
92
|
+
requestProgress.data = JSON.stringify(content);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Check if this is the final message
|
|
96
|
+
if (data.done) {
|
|
97
|
+
requestProgress.data = '[DONE]';
|
|
98
|
+
requestProgress.progress = 1;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return requestProgress;
|
|
102
|
+
} catch (err) {
|
|
103
|
+
// If we can't parse the event data, return the progress as is
|
|
104
|
+
return requestProgress;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
109
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
110
|
+
cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
|
|
111
|
+
cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
|
|
112
|
+
|
|
113
|
+
// For Ollama streaming, transform NDJSON to SSE format
|
|
114
|
+
if (parameters.stream) {
|
|
115
|
+
const response = await this.executeRequest(cortexRequest);
|
|
116
|
+
|
|
117
|
+
// Create a transform stream that converts NDJSON to SSE format
|
|
118
|
+
const transformer = new Transform({
|
|
119
|
+
decodeStrings: false, // Keep as string
|
|
120
|
+
transform(chunk, encoding, callback) {
|
|
121
|
+
try {
|
|
122
|
+
const lines = chunk.toString().split('\n');
|
|
123
|
+
for (const line of lines) {
|
|
124
|
+
if (line.trim()) {
|
|
125
|
+
// Format as SSE data
|
|
126
|
+
this.push(`data: ${line}\n\n`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
callback();
|
|
130
|
+
} catch (err) {
|
|
131
|
+
callback(err);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
// Pipe the response through our transformer
|
|
137
|
+
response.pipe(transformer);
|
|
138
|
+
|
|
139
|
+
// Return the transformed stream
|
|
140
|
+
return transformer;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return this.executeRequest(cortexRequest);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export default OllamaCompletionPlugin;
|