@aj-archipelago/cortex 1.3.22 → 1.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -79,6 +79,13 @@ class PathwayResolver {
79
79
  let streamErrorOccurred = false;
80
80
  let responseData = null;
81
81
 
82
+ const publishNestedRequestProgress = (requestProgress) => {
83
+ if (requestProgress.progress === 1 && this.rootRequestId) {
84
+ delete requestProgress.progress;
85
+ }
86
+ publishRequestProgress(requestProgress);
87
+ }
88
+
82
89
  try {
83
90
  responseData = await this.executePathway(args);
84
91
  }
@@ -89,8 +96,13 @@ class PathwayResolver {
89
96
  progress: 1,
90
97
  data: '[DONE]',
91
98
  });
99
+ } else {
100
+ publishRequestProgress({
101
+ requestId: this.rootRequestId || this.requestId,
102
+ progress: 1,
103
+ data: error.message || error.toString(),
104
+ });
92
105
  }
93
- return;
94
106
  }
95
107
 
96
108
  // If the response is a string, it's a regular long running response
@@ -100,7 +112,7 @@ class PathwayResolver {
100
112
 
101
113
  // some models don't support progress updates
102
114
  if (!modelTypesExcludedFromProgressUpdates.includes(this.model.type)) {
103
- await publishRequestProgress({
115
+ await publishNestedRequestProgress({
104
116
  requestId: this.rootRequestId || this.requestId,
105
117
  progress: Math.min(completedCount,totalCount) / totalCount,
106
118
  data: JSON.stringify(responseData),
@@ -139,10 +151,7 @@ class PathwayResolver {
139
151
 
140
152
  try {
141
153
  if (!streamEnded && requestProgress.data) {
142
- if (!(this.rootRequestId && requestProgress.progress === 1)) {
143
- logger.debug(`Publishing stream message to requestId ${this.requestId}: ${requestProgress.data}`);
144
- publishRequestProgress(requestProgress);
145
- }
154
+ publishNestedRequestProgress(requestProgress);
146
155
  streamEnded = requestProgress.progress === 1;
147
156
  }
148
157
  } catch (error) {
@@ -136,7 +136,16 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
136
136
  // Extract system messages
137
137
  const systemMessages = messagesCopy.filter(message => message.role === "system");
138
138
  if (systemMessages.length > 0) {
139
- system = systemMessages.map(message => message.content).join("\n");
139
+ system = systemMessages.map(message => {
140
+ if (Array.isArray(message.content)) {
141
+ // For content arrays, extract text content and join
142
+ return message.content
143
+ .filter(item => item.type === 'text')
144
+ .map(item => item.text)
145
+ .join("\n");
146
+ }
147
+ return message.content;
148
+ }).join("\n");
140
149
  }
141
150
 
142
151
  // Filter out system messages and empty messages
@@ -380,7 +389,7 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
380
389
  cortexRequest.params = {}; // query params
381
390
  cortexRequest.stream = stream;
382
391
  cortexRequest.urlSuffix = cortexRequest.stream
383
- ? ":streamRawPredict"
392
+ ? ":streamRawPredict?alt=sse"
384
393
  : ":rawPredict";
385
394
 
386
395
  const gcpAuthTokenHelper = this.config.get("gcpAuthTokenHelper");
@@ -392,33 +401,59 @@ class Claude3VertexPlugin extends OpenAIVisionPlugin {
392
401
 
393
402
  processStreamEvent(event, requestProgress) {
394
403
  const eventData = JSON.parse(event.data);
404
+ const baseOpenAIResponse = {
405
+ id: eventData.message?.id || `chatcmpl-${Date.now()}`,
406
+ object: "chat.completion.chunk",
407
+ created: Math.floor(Date.now() / 1000),
408
+ model: this.modelName,
409
+ choices: [{
410
+ index: 0,
411
+ delta: {},
412
+ finish_reason: null
413
+ }]
414
+ };
415
+
395
416
  switch (eventData.type) {
396
417
  case "message_start":
397
- requestProgress.data = JSON.stringify(eventData.message);
398
- break;
399
- case "content_block_start":
400
- break;
401
- case "ping":
418
+ // Initial message with role
419
+ baseOpenAIResponse.choices[0].delta = {
420
+ role: "assistant",
421
+ content: ""
422
+ };
423
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
402
424
  break;
425
+
403
426
  case "content_block_delta":
404
427
  if (eventData.delta.type === "text_delta") {
405
- requestProgress.data = JSON.stringify(eventData.delta.text);
428
+ baseOpenAIResponse.choices[0].delta = {
429
+ content: eventData.delta.text
430
+ };
431
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
406
432
  }
407
433
  break;
408
- case "content_block_stop":
409
- break;
410
- case "message_delta":
411
- break;
434
+
412
435
  case "message_stop":
413
- requestProgress.data = "[DONE]";
436
+ baseOpenAIResponse.choices[0].delta = {};
437
+ baseOpenAIResponse.choices[0].finish_reason = "stop";
438
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
414
439
  requestProgress.progress = 1;
415
440
  break;
441
+
416
442
  case "error":
417
- requestProgress.data = `\n\n*** ${
418
- eventData.error.message || eventData.error
419
- } ***`;
443
+ baseOpenAIResponse.choices[0].delta = {
444
+ content: `\n\n*** ${eventData.error.message || eventData.error} ***`
445
+ };
446
+ baseOpenAIResponse.choices[0].finish_reason = "error";
447
+ requestProgress.data = JSON.stringify(baseOpenAIResponse);
420
448
  requestProgress.progress = 1;
421
449
  break;
450
+
451
+ // Ignore other event types as they don't map to OpenAI format
452
+ case "content_block_start":
453
+ case "content_block_stop":
454
+ case "message_delta":
455
+ case "ping":
456
+ break;
422
457
  }
423
458
 
424
459
  return requestProgress;
@@ -56,7 +56,11 @@ class Gemini15ChatPlugin extends ModelPlugin {
56
56
  const { role, author, content } = message;
57
57
 
58
58
  if (role === 'system') {
59
- systemParts.push({ text: content });
59
+ if (Array.isArray(content)) {
60
+ content.forEach(item => systemParts.push({ text: item }));
61
+ } else {
62
+ systemParts.push({ text: content });
63
+ }
60
64
  return;
61
65
  }
62
66
 
@@ -169,6 +173,95 @@ class Gemini15ChatPlugin extends ModelPlugin {
169
173
  return this.executeRequest(cortexRequest);
170
174
  }
171
175
 
176
+ processStreamEvent(event, requestProgress) {
177
+ const eventData = JSON.parse(event.data);
178
+
179
+ // Initialize requestProgress if needed
180
+ requestProgress = requestProgress || {};
181
+ requestProgress.data = requestProgress.data || null;
182
+
183
+ // Create a helper function to generate message chunks
184
+ const createChunk = (delta) => ({
185
+ id: eventData.responseId || `chatcmpl-${Date.now()}`,
186
+ object: "chat.completion.chunk",
187
+ created: Math.floor(Date.now() / 1000),
188
+ model: this.modelName,
189
+ choices: [{
190
+ index: 0,
191
+ delta,
192
+ finish_reason: null
193
+ }]
194
+ });
195
+
196
+ // Handle content chunks - do this first before handling any finish conditions
197
+ if (eventData.candidates?.[0]?.content?.parts?.[0]?.text) {
198
+ if (!requestProgress.started) {
199
+ // First chunk - send role
200
+ requestProgress.data = JSON.stringify(createChunk({ role: "assistant" }));
201
+ requestProgress.started = true;
202
+
203
+ // Immediately follow up with the first content chunk
204
+ requestProgress.data = JSON.stringify(createChunk({
205
+ content: eventData.candidates[0].content.parts[0].text
206
+ }));
207
+ } else {
208
+ // Send content chunk
209
+ requestProgress.data = JSON.stringify(createChunk({
210
+ content: eventData.candidates[0].content.parts[0].text
211
+ }));
212
+ }
213
+
214
+ // If this message also has STOP, mark it for completion but don't overwrite the content
215
+ if (eventData.candidates[0].finishReason === "STOP") {
216
+ // Send the content first
217
+ requestProgress.data = JSON.stringify(createChunk({
218
+ content: eventData.candidates[0].content.parts[0].text
219
+ }));
220
+ requestProgress.progress = 1;
221
+ }
222
+ } else if (eventData.candidates?.[0]?.finishReason === "STOP") {
223
+ // Only send DONE if there was no content in this message
224
+ requestProgress.data = '[DONE]';
225
+ requestProgress.progress = 1;
226
+ }
227
+
228
+ // Handle safety blocks
229
+ if (eventData.candidates?.[0]?.safetyRatings?.some(rating => rating.blocked)) {
230
+ requestProgress.data = JSON.stringify({
231
+ id: eventData.responseId || `chatcmpl-${Date.now()}`,
232
+ object: "chat.completion.chunk",
233
+ created: Math.floor(Date.now() / 1000),
234
+ model: this.modelName,
235
+ choices: [{
236
+ index: 0,
237
+ delta: { content: "\n\n*** Response blocked due to safety ratings ***" },
238
+ finish_reason: "content_filter"
239
+ }]
240
+ });
241
+ requestProgress.progress = 1;
242
+ return requestProgress;
243
+ }
244
+
245
+ // Handle prompt feedback blocks
246
+ if (eventData.promptFeedback?.blockReason) {
247
+ requestProgress.data = JSON.stringify({
248
+ id: eventData.responseId || `chatcmpl-${Date.now()}`,
249
+ object: "chat.completion.chunk",
250
+ created: Math.floor(Date.now() / 1000),
251
+ model: this.modelName,
252
+ choices: [{
253
+ index: 0,
254
+ delta: { content: `\n\n*** Response blocked: ${eventData.promptFeedback.blockReason} ***` },
255
+ finish_reason: "content_filter"
256
+ }]
257
+ });
258
+ requestProgress.progress = 1;
259
+ return requestProgress;
260
+ }
261
+
262
+ return requestProgress;
263
+ }
264
+
172
265
  // Override the logging function to display the messages and responses
173
266
  logRequestData(data, responseData, prompt) {
174
267
  const messages = data && data.contents;
@@ -24,19 +24,24 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
24
24
  const { role, author, content } = message;
25
25
 
26
26
  if (role === 'system') {
27
- systemParts.push({ text: content });
27
+ if (Array.isArray(content)) {
28
+ content.forEach(item => systemParts.push({ text: item }));
29
+ } else {
30
+ systemParts.push({ text: content });
31
+ }
28
32
  return;
29
33
  }
30
34
 
31
35
  // Convert content to Gemini format, trying to maintain compatibility
32
36
  const convertPartToGemini = (inputPart) => {
33
37
  try {
38
+ // First try to parse as JSON if it's a string
34
39
  const part = typeof inputPart === 'string' ? JSON.parse(inputPart) : inputPart;
35
40
  const {type, text, image_url, gcs} = part;
36
41
  let fileUrl = gcs || image_url?.url;
37
42
 
38
43
  if (typeof part === 'string') {
39
- return { text: text };
44
+ return { text: inputPart };
40
45
  } else if (type === 'text') {
41
46
  return { text: text };
42
47
  } else if (type === 'image_url') {
@@ -77,7 +82,8 @@ class Gemini15VisionPlugin extends Gemini15ChatPlugin {
77
82
  return null;
78
83
  }
79
84
  } catch (e) {
80
- // this space intentionally left blank
85
+ // If JSON parsing fails or any other error, treat as plain text
86
+ return inputPart ? { text: inputPart } : null;
81
87
  }
82
88
  return inputPart ? { text: inputPart } : null;
83
89
  };
@@ -381,14 +381,17 @@ class ModelPlugin {
381
381
 
382
382
  // finish reason can be in different places in the message
383
383
  const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
384
- if (finishReason?.toLowerCase() === 'stop') {
385
- requestProgress.progress = 1;
386
- } else {
387
- if (finishReason?.toLowerCase() === 'safety') {
388
- const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
389
- logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
390
- requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
391
- requestProgress.progress = 1;
384
+ if (finishReason) {
385
+ switch (finishReason.toLowerCase()) {
386
+ case 'safety':
387
+ const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
388
+ logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
389
+ requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
390
+ requestProgress.progress = 1;
391
+ break;
392
+ default:
393
+ requestProgress.progress = 1;
394
+ break;
392
395
  }
393
396
  }
394
397
  }
@@ -0,0 +1,158 @@
1
+ import ModelPlugin from './modelPlugin.js';
2
+ import logger from '../../lib/logger.js';
3
+ import { Transform } from 'stream';
4
+
5
+ class OllamaChatPlugin extends ModelPlugin {
6
+
7
+ getRequestParameters(text, parameters, prompt) {
8
+ const { modelPromptMessages } = this.getCompiledPrompt(text, parameters, prompt);
9
+ return {
10
+ data: {
11
+ model: parameters.ollamaModel,
12
+ messages: modelPromptMessages,
13
+ stream: parameters.stream
14
+ },
15
+ params: {}
16
+ };
17
+ }
18
+
19
+ logRequestData(data, responseData, prompt) {
20
+ const { stream, messages, model } = data;
21
+
22
+ if (messages && messages.length > 0) {
23
+ logger.info(`[ollama chat request sent to model ${model} containing ${messages.length} messages]`);
24
+ let totalLength = 0;
25
+ let totalUnits;
26
+ messages.forEach((message, index) => {
27
+ const content = message.content;
28
+ const { length, units } = this.getLength(content);
29
+ const preview = this.shortenContent(content);
30
+
31
+ logger.verbose(
32
+ `message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`
33
+ );
34
+ totalLength += length;
35
+ totalUnits = units;
36
+ });
37
+ logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
38
+ }
39
+
40
+ if (stream) {
41
+ logger.info(`[response received as an SSE stream]`);
42
+ } else if (responseData) {
43
+ const responseText = this.parseResponse(responseData);
44
+ const { length, units } = this.getLength(responseText);
45
+ logger.info(`[response received containing ${length} ${units}]`);
46
+ logger.verbose(`${this.shortenContent(responseText)}`);
47
+ }
48
+
49
+ prompt &&
50
+ prompt.debugInfo &&
51
+ (prompt.debugInfo += `\n${JSON.stringify(data)}`);
52
+ }
53
+
54
+ parseResponse(data) {
55
+ // If data is not a string (e.g. streaming), return as is
56
+ if (typeof data !== 'string') {
57
+ return data;
58
+ }
59
+
60
+ // Split into lines and filter empty ones
61
+ const lines = data.split('\n').filter(line => line.trim());
62
+
63
+ let fullResponse = '';
64
+
65
+ for (const line of lines) {
66
+ try {
67
+ const jsonObj = JSON.parse(line);
68
+
69
+ if (jsonObj.message && jsonObj.message.content) {
70
+ // Unescape special sequences
71
+ const content = jsonObj.message.content
72
+ .replace(/\\n/g, '\n')
73
+ .replace(/\\"/g, '"')
74
+ .replace(/\\\\/g, '\\')
75
+ .replace(/\\u003c/g, '<')
76
+ .replace(/\\u003e/g, '>');
77
+
78
+ fullResponse += content;
79
+ }
80
+ } catch (err) {
81
+ // If we can't parse the line as JSON, just skip it
82
+ continue;
83
+ }
84
+ }
85
+
86
+ return fullResponse;
87
+ }
88
+
89
+ processStreamEvent(event, requestProgress) {
90
+ try {
91
+ const data = JSON.parse(event.data);
92
+
93
+ // Handle the streaming response
94
+ if (data.message?.content) {
95
+ // Unescape special sequences in the content
96
+ const content = data.message.content
97
+ .replace(/\\n/g, '\n')
98
+ .replace(/\\"/g, '"')
99
+ .replace(/\\\\/g, '\\')
100
+ .replace(/\\u003c/g, '<')
101
+ .replace(/\\u003e/g, '>');
102
+
103
+ requestProgress.data = JSON.stringify(content);
104
+ }
105
+
106
+ // Check if this is the final message
107
+ if (data.done) {
108
+ requestProgress.data = '[DONE]';
109
+ requestProgress.progress = 1;
110
+ }
111
+
112
+ return requestProgress;
113
+ } catch (err) {
114
+ // If we can't parse the event data, return the progress as is
115
+ return requestProgress;
116
+ }
117
+ }
118
+
119
+ async execute(text, parameters, prompt, cortexRequest) {
120
+ const requestParameters = this.getRequestParameters(text, parameters, prompt);
121
+ cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
122
+ cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
123
+
124
+ // For Ollama streaming, transform NDJSON to SSE format
125
+ if (parameters.stream) {
126
+ const response = await this.executeRequest(cortexRequest);
127
+
128
+ // Create a transform stream that converts NDJSON to SSE format
129
+ const transformer = new Transform({
130
+ decodeStrings: false, // Keep as string
131
+ transform(chunk, encoding, callback) {
132
+ try {
133
+ const lines = chunk.toString().split('\n');
134
+ for (const line of lines) {
135
+ if (line.trim()) {
136
+ // Format as SSE data
137
+ this.push(`data: ${line}\n\n`);
138
+ }
139
+ }
140
+ callback();
141
+ } catch (err) {
142
+ callback(err);
143
+ }
144
+ }
145
+ });
146
+
147
+ // Pipe the response through our transformer
148
+ response.pipe(transformer);
149
+
150
+ // Return the transformed stream
151
+ return transformer;
152
+ }
153
+
154
+ return this.executeRequest(cortexRequest);
155
+ }
156
+ }
157
+
158
+ export default OllamaChatPlugin;
@@ -0,0 +1,147 @@
1
+ import ModelPlugin from './modelPlugin.js';
2
+ import logger from '../../lib/logger.js';
3
+ import { Transform } from 'stream';
4
+
5
+ class OllamaCompletionPlugin extends ModelPlugin {
6
+
7
+ getRequestParameters(text, parameters, prompt) {
8
+ return {
9
+ data: {
10
+ model: parameters.ollamaModel,
11
+ prompt: text,
12
+ stream: parameters.stream
13
+ },
14
+ params: {}
15
+ };
16
+ }
17
+
18
+ logRequestData(data, responseData, prompt) {
19
+ const { stream, prompt: promptText, model } = data;
20
+
21
+ if (promptText) {
22
+ logger.info(`[ollama completion request sent to model ${model}]`);
23
+ const { length, units } = this.getLength(promptText);
24
+ const preview = this.shortenContent(promptText);
25
+ logger.verbose(`prompt ${units}: ${length}, content: "${preview}"`);
26
+ logger.info(`[completion request contained ${length} ${units}]`);
27
+ }
28
+
29
+ if (stream) {
30
+ logger.info(`[response received as an SSE stream]`);
31
+ } else if (responseData) {
32
+ const responseText = this.parseResponse(responseData);
33
+ const { length, units } = this.getLength(responseText);
34
+ logger.info(`[response received containing ${length} ${units}]`);
35
+ logger.verbose(`${this.shortenContent(responseText)}`);
36
+ }
37
+
38
+ prompt &&
39
+ prompt.debugInfo &&
40
+ (prompt.debugInfo += `\n${JSON.stringify(data)}`);
41
+ }
42
+
43
+ parseResponse(data) {
44
+ // If data is not a string (e.g. streaming), return as is
45
+ if (typeof data !== 'string') {
46
+ return data;
47
+ }
48
+
49
+ // Split into lines and filter empty ones
50
+ const lines = data.split('\n').filter(line => line.trim());
51
+
52
+ let fullResponse = '';
53
+
54
+ for (const line of lines) {
55
+ try {
56
+ const jsonObj = JSON.parse(line);
57
+
58
+ if (jsonObj.response) {
59
+ // Unescape special sequences
60
+ const content = jsonObj.response
61
+ .replace(/\\n/g, '\n')
62
+ .replace(/\\"/g, '"')
63
+ .replace(/\\\\/g, '\\')
64
+ .replace(/\\u003c/g, '<')
65
+ .replace(/\\u003e/g, '>');
66
+
67
+ fullResponse += content;
68
+ }
69
+ } catch (err) {
70
+ // If we can't parse the line as JSON, just skip it
71
+ continue;
72
+ }
73
+ }
74
+
75
+ return fullResponse;
76
+ }
77
+
78
+ processStreamEvent(event, requestProgress) {
79
+ try {
80
+ const data = JSON.parse(event.data);
81
+
82
+ // Handle the streaming response
83
+ if (data.response) {
84
+ // Unescape special sequences in the content
85
+ const content = data.response
86
+ .replace(/\\n/g, '\n')
87
+ .replace(/\\"/g, '"')
88
+ .replace(/\\\\/g, '\\')
89
+ .replace(/\\u003c/g, '<')
90
+ .replace(/\\u003e/g, '>');
91
+
92
+ requestProgress.data = JSON.stringify(content);
93
+ }
94
+
95
+ // Check if this is the final message
96
+ if (data.done) {
97
+ requestProgress.data = '[DONE]';
98
+ requestProgress.progress = 1;
99
+ }
100
+
101
+ return requestProgress;
102
+ } catch (err) {
103
+ // If we can't parse the event data, return the progress as is
104
+ return requestProgress;
105
+ }
106
+ }
107
+
108
+ async execute(text, parameters, prompt, cortexRequest) {
109
+ const requestParameters = this.getRequestParameters(text, parameters, prompt);
110
+ cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters.data };
111
+ cortexRequest.params = { ...(cortexRequest.params || {}), ...requestParameters.params };
112
+
113
+ // For Ollama streaming, transform NDJSON to SSE format
114
+ if (parameters.stream) {
115
+ const response = await this.executeRequest(cortexRequest);
116
+
117
+ // Create a transform stream that converts NDJSON to SSE format
118
+ const transformer = new Transform({
119
+ decodeStrings: false, // Keep as string
120
+ transform(chunk, encoding, callback) {
121
+ try {
122
+ const lines = chunk.toString().split('\n');
123
+ for (const line of lines) {
124
+ if (line.trim()) {
125
+ // Format as SSE data
126
+ this.push(`data: ${line}\n\n`);
127
+ }
128
+ }
129
+ callback();
130
+ } catch (err) {
131
+ callback(err);
132
+ }
133
+ }
134
+ });
135
+
136
+ // Pipe the response through our transformer
137
+ response.pipe(transformer);
138
+
139
+ // Return the transformed stream
140
+ return transformer;
141
+ }
142
+
143
+ return this.executeRequest(cortexRequest);
144
+ }
145
+ }
146
+
147
+ export default OllamaCompletionPlugin;