@aj-archipelago/cortex 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +3 -3
- package/helper-apps/cortex-whisper-wrapper/app.py +6 -1
- package/lib/cortexRequest.js +11 -1
- package/lib/encodeCache.js +38 -0
- package/lib/fastLruCache.js +82 -0
- package/lib/pathwayTools.js +1 -1
- package/lib/requestExecutor.js +71 -68
- package/lib/requestMonitor.js +19 -9
- package/package.json +3 -1
- package/pathways/basePathway.js +5 -3
- package/pathways/bias.js +1 -1
- package/pathways/cognitive_insert.js +1 -1
- package/server/chunker.js +1 -1
- package/server/graphql.js +2 -0
- package/server/modelExecutor.js +8 -0
- package/server/pathwayResolver.js +26 -8
- package/server/plugins/azureCognitivePlugin.js +11 -6
- package/server/plugins/azureTranslatePlugin.js +0 -2
- package/server/plugins/geminiChatPlugin.js +192 -0
- package/server/plugins/geminiVisionPlugin.js +102 -0
- package/server/plugins/localModelPlugin.js +1 -1
- package/server/plugins/modelPlugin.js +24 -19
- package/server/plugins/openAiChatPlugin.js +11 -12
- package/server/plugins/openAiCompletionPlugin.js +6 -7
- package/server/plugins/openAiEmbeddingsPlugin.js +3 -1
- package/server/plugins/openAiWhisperPlugin.js +3 -0
- package/server/plugins/palmChatPlugin.js +8 -11
- package/server/plugins/palmCompletionPlugin.js +4 -7
- package/server/rest.js +11 -5
- package/tests/chunkfunction.test.js +1 -2
- package/tests/encodeCache.test.js +92 -0
- package/tests/fastLruCache.test.js +29 -0
- package/tests/requestMonitor.test.js +3 -3
- package/tests/truncateMessages.test.js +1 -1
|
@@ -2,7 +2,7 @@ import { ModelExecutor } from './modelExecutor.js';
|
|
|
2
2
|
import { modelEndpoints } from '../lib/requestExecutor.js';
|
|
3
3
|
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
4
4
|
import { v4 as uuidv4 } from 'uuid';
|
|
5
|
-
import { encode } from '
|
|
5
|
+
import { encode } from '../lib/encodeCache.js';
|
|
6
6
|
import { getFirstNToken, getLastNToken, getSemanticChunks } from './chunker.js';
|
|
7
7
|
import { PathwayResponseParser } from './pathwayResponseParser.js';
|
|
8
8
|
import { Prompt } from './prompt.js';
|
|
@@ -98,8 +98,9 @@ class PathwayResolver {
|
|
|
98
98
|
const incomingMessage = responseData;
|
|
99
99
|
|
|
100
100
|
let messageBuffer = '';
|
|
101
|
+
let streamEnded = false;
|
|
101
102
|
|
|
102
|
-
const
|
|
103
|
+
const processStreamSSE = (data) => {
|
|
103
104
|
try {
|
|
104
105
|
//logger.info(`\n\nReceived stream data for requestId ${this.requestId}: ${data.toString()}`);
|
|
105
106
|
let events = data.toString().split('\n');
|
|
@@ -132,18 +133,35 @@ class PathwayResolver {
|
|
|
132
133
|
return;
|
|
133
134
|
}
|
|
134
135
|
|
|
136
|
+
// error can be in different places in the message
|
|
135
137
|
const streamError = parsedMessage?.error || parsedMessage?.choices?.[0]?.delta?.content?.error || parsedMessage?.choices?.[0]?.text?.error;
|
|
136
138
|
if (streamError) {
|
|
137
139
|
streamErrorOccurred = true;
|
|
138
140
|
logger.error(`Stream error: ${streamError.message}`);
|
|
139
|
-
incomingMessage.off('data',
|
|
141
|
+
incomingMessage.off('data', processStreamSSE);
|
|
140
142
|
return;
|
|
141
143
|
}
|
|
144
|
+
|
|
145
|
+
// finish reason can be in different places in the message
|
|
146
|
+
const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
|
|
147
|
+
if (finishReason?.toLowerCase() === 'stop') {
|
|
148
|
+
requestProgress.progress = 1;
|
|
149
|
+
} else {
|
|
150
|
+
if (finishReason?.toLowerCase() === 'safety') {
|
|
151
|
+
const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
|
|
152
|
+
logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
|
|
153
|
+
requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
|
|
154
|
+
requestProgress.progress = 1;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
142
157
|
}
|
|
143
158
|
|
|
144
159
|
try {
|
|
145
|
-
|
|
146
|
-
|
|
160
|
+
if (!streamEnded) {
|
|
161
|
+
//logger.info(`Publishing stream message to requestId ${this.requestId}: ${message}`);
|
|
162
|
+
publishRequestProgress(requestProgress);
|
|
163
|
+
streamEnded = requestProgress.progress === 1;
|
|
164
|
+
}
|
|
147
165
|
} catch (error) {
|
|
148
166
|
logger.error(`Could not publish the stream message: "${messageBuffer}", ${error}`);
|
|
149
167
|
}
|
|
@@ -156,7 +174,7 @@ class PathwayResolver {
|
|
|
156
174
|
|
|
157
175
|
if (incomingMessage) {
|
|
158
176
|
await new Promise((resolve, reject) => {
|
|
159
|
-
incomingMessage.on('data',
|
|
177
|
+
incomingMessage.on('data', processStreamSSE);
|
|
160
178
|
incomingMessage.on('end', resolve);
|
|
161
179
|
incomingMessage.on('error', reject);
|
|
162
180
|
});
|
|
@@ -321,7 +339,7 @@ class PathwayResolver {
|
|
|
321
339
|
const data = await Promise.all(chunks.map(chunk =>
|
|
322
340
|
this.applyPromptsSerially(chunk, parameters)));
|
|
323
341
|
// Join the chunks with newlines
|
|
324
|
-
return data.join("\n\n");
|
|
342
|
+
return data.join(this.pathway.joinChunksWith || "\n\n");
|
|
325
343
|
} else {
|
|
326
344
|
// Apply prompts one by one, serially, across all chunks
|
|
327
345
|
// This is the default processing mode and will make previousResult available at the object level
|
|
@@ -355,7 +373,7 @@ class PathwayResolver {
|
|
|
355
373
|
if (result.length === 1) {
|
|
356
374
|
result = result[0];
|
|
357
375
|
} else if (!currentParameters.stream) {
|
|
358
|
-
result = result.join("\n\n");
|
|
376
|
+
result = result.join(this.pathway.joinChunksWith || "\n\n");
|
|
359
377
|
}
|
|
360
378
|
}
|
|
361
379
|
|
|
@@ -6,6 +6,7 @@ import path from 'path';
|
|
|
6
6
|
import { config } from '../../config.js';
|
|
7
7
|
import { axios } from '../../lib/requestExecutor.js';
|
|
8
8
|
import logger from '../../lib/logger.js';
|
|
9
|
+
import { getSemanticChunks } from '../chunker.js';
|
|
9
10
|
|
|
10
11
|
const API_URL = config.get('whisperMediaApiUrl');
|
|
11
12
|
|
|
@@ -37,7 +38,8 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
37
38
|
const data = {};
|
|
38
39
|
|
|
39
40
|
if (mode == 'delete') {
|
|
40
|
-
|
|
41
|
+
let searchUrl = this.ensureMode(this.requestUrl(text), 'search');
|
|
42
|
+
searchUrl = this.ensureIndex(searchUrl, indexName);
|
|
41
43
|
let searchQuery = `owner:${savedContextId}`;
|
|
42
44
|
|
|
43
45
|
if (docId) {
|
|
@@ -155,6 +157,7 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
155
157
|
const headers = cortexRequest.headers;
|
|
156
158
|
|
|
157
159
|
const { file } = parameters;
|
|
160
|
+
const fileData = { value: [] };
|
|
158
161
|
if(file){
|
|
159
162
|
let url = file;
|
|
160
163
|
//if not txt file, use helper app to convert to txt
|
|
@@ -177,11 +180,13 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
177
180
|
throw Error(`No data can be extracted out of file!`);
|
|
178
181
|
}
|
|
179
182
|
|
|
180
|
-
|
|
181
|
-
|
|
183
|
+
const chunkTokenLength = this.promptParameters.inputChunkSize || 1000;
|
|
184
|
+
const chunks = getSemanticChunks(data, chunkTokenLength);
|
|
182
185
|
|
|
183
|
-
|
|
184
|
-
|
|
186
|
+
for (const text of chunks) {
|
|
187
|
+
const { data: singleData } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest)
|
|
188
|
+
fileData.value.push(singleData.value[0]);
|
|
189
|
+
}
|
|
185
190
|
}
|
|
186
191
|
|
|
187
192
|
const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest);
|
|
@@ -195,7 +200,7 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
195
200
|
|
|
196
201
|
// execute the request
|
|
197
202
|
cortexRequest.url = url;
|
|
198
|
-
cortexRequest.data = data;
|
|
203
|
+
cortexRequest.data = (mode === 'index' && fileData.value.length>0) ? fileData : data;
|
|
199
204
|
cortexRequest.params = params;
|
|
200
205
|
cortexRequest.headers = headers;
|
|
201
206
|
const result = await this.executeRequest(cortexRequest);
|
|
@@ -45,8 +45,6 @@ class AzureTranslatePlugin extends ModelPlugin {
|
|
|
45
45
|
|
|
46
46
|
// Override the logging function to display the request and response
|
|
47
47
|
logRequestData(data, responseData, prompt) {
|
|
48
|
-
this.logAIRequestFinished();
|
|
49
|
-
|
|
50
48
|
const modelInput = data[0].Text;
|
|
51
49
|
|
|
52
50
|
logger.debug(`${modelInput}`);
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
// geminiChatPlugin.js
|
|
2
|
+
import ModelPlugin from './modelPlugin.js';
|
|
3
|
+
import logger from '../../lib/logger.js';
|
|
4
|
+
|
|
5
|
+
const mergeResults = (data) => {
|
|
6
|
+
let output = '';
|
|
7
|
+
let safetyRatings = [];
|
|
8
|
+
|
|
9
|
+
for (let chunk of data) {
|
|
10
|
+
const { candidates } = chunk;
|
|
11
|
+
if (!candidates || !candidates.length) {
|
|
12
|
+
continue;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// If it was blocked, return the blocked message
|
|
16
|
+
if (candidates[0].safetyRatings.some(rating => rating.blocked)) {
|
|
17
|
+
safetyRatings = candidates[0].safetyRatings;
|
|
18
|
+
return {mergedResult: 'The response was blocked because the input or response potentially violates policies. Try rephrasing the prompt or adjusting the parameter settings.', safetyRatings: safetyRatings};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Append the content of the first part of the first candidate to the output
|
|
22
|
+
const message = candidates[0].content.parts[0].text;
|
|
23
|
+
output += message;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return {mergedResult: output || null, safetyRatings: safetyRatings};
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
class GeminiChatPlugin extends ModelPlugin {
|
|
30
|
+
constructor(pathway, model) {
|
|
31
|
+
super(pathway, model);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// This code converts either OpenAI or PaLM messages to the Gemini messages format
|
|
35
|
+
convertMessagesToGemini(messages) {
|
|
36
|
+
let modifiedMessages = [];
|
|
37
|
+
let lastAuthor = '';
|
|
38
|
+
|
|
39
|
+
// Check if the messages are already in the Gemini format
|
|
40
|
+
if (messages[0] && Object.prototype.hasOwnProperty.call(messages[0], 'parts')) {
|
|
41
|
+
modifiedMessages = messages;
|
|
42
|
+
} else {
|
|
43
|
+
messages.forEach(message => {
|
|
44
|
+
const { role, author, content } = message;
|
|
45
|
+
|
|
46
|
+
// Right now Gemini API has no direct translation for system messages,
|
|
47
|
+
// but they work fine as parts of user messages
|
|
48
|
+
if (role === 'system') {
|
|
49
|
+
modifiedMessages.push({
|
|
50
|
+
role: 'user',
|
|
51
|
+
parts: [{ text: content }],
|
|
52
|
+
});
|
|
53
|
+
lastAuthor = 'user';
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Aggregate consecutive author messages, appending the content
|
|
58
|
+
if ((role === lastAuthor || author === lastAuthor) && modifiedMessages.length > 0) {
|
|
59
|
+
modifiedMessages[modifiedMessages.length - 1].parts.push({ text: content });
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Push messages that are role: 'user' or 'assistant', changing 'assistant' to 'model'
|
|
63
|
+
else if (role === 'user' || role === 'assistant' || author) {
|
|
64
|
+
modifiedMessages.push({
|
|
65
|
+
role: author || role,
|
|
66
|
+
parts: [{ text: content }],
|
|
67
|
+
});
|
|
68
|
+
lastAuthor = author || role;
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Gemini requires an even number of messages
|
|
74
|
+
if (modifiedMessages.length % 2 === 0) {
|
|
75
|
+
modifiedMessages = modifiedMessages.slice(1);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
modifiedMessages,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Set up parameters specific to the Gemini API
|
|
84
|
+
getRequestParameters(text, parameters, prompt, cortexRequest) {
|
|
85
|
+
const { modelPromptText, modelPromptMessages, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
|
|
86
|
+
const { geminiSafetySettings, geminiTools, max_tokens } = cortexRequest ? cortexRequest.pathway : {};
|
|
87
|
+
|
|
88
|
+
// Define the model's max token length
|
|
89
|
+
const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
|
|
90
|
+
|
|
91
|
+
const geminiMessages = this.convertMessagesToGemini(modelPromptMessages || [{ "role": "user", "parts": [{ "text": modelPromptText }]}]);
|
|
92
|
+
|
|
93
|
+
let requestMessages = geminiMessages.modifiedMessages;
|
|
94
|
+
|
|
95
|
+
// Check if the token length exceeds the model's max token length
|
|
96
|
+
if (tokenLength > modelTargetTokenLength) {
|
|
97
|
+
// Remove older messages until the token length is within the model's limit
|
|
98
|
+
requestMessages = this.truncateMessagesToTargetLength(requestMessages, modelTargetTokenLength);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (max_tokens < 0) {
|
|
102
|
+
throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens. The model will not be called.`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const requestParameters = {
|
|
106
|
+
contents: requestMessages,
|
|
107
|
+
generationConfig: {
|
|
108
|
+
temperature: this.temperature || 0.7,
|
|
109
|
+
maxOutputTokens: max_tokens || this.getModelMaxReturnTokens(),
|
|
110
|
+
topP: parameters.topP || 0.95,
|
|
111
|
+
topK: parameters.topK || 40,
|
|
112
|
+
},
|
|
113
|
+
safety_settings: geminiSafetySettings || undefined,
|
|
114
|
+
tools: geminiTools || undefined
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
return requestParameters;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Parse the response from the new Chat API
|
|
121
|
+
parseResponse(data) {
|
|
122
|
+
// If data is not an array, return it directly
|
|
123
|
+
if (!Array.isArray(data)) {
|
|
124
|
+
return data;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return mergeResults(data).mergedResult || null;
|
|
128
|
+
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Execute the request to the new Chat API
|
|
132
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
133
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt, cortexRequest);
|
|
134
|
+
const { stream } = parameters;
|
|
135
|
+
|
|
136
|
+
cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters };
|
|
137
|
+
cortexRequest.params = {}; // query params
|
|
138
|
+
cortexRequest.stream = stream;
|
|
139
|
+
cortexRequest.url = cortexRequest.stream ? `${cortexRequest.url}?alt=sse` : cortexRequest.url;
|
|
140
|
+
|
|
141
|
+
const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
|
|
142
|
+
const authToken = await gcpAuthTokenHelper.getAccessToken();
|
|
143
|
+
cortexRequest.headers.Authorization = `Bearer ${authToken}`;
|
|
144
|
+
|
|
145
|
+
return this.executeRequest(cortexRequest);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Override the logging function to display the messages and responses
|
|
149
|
+
logRequestData(data, responseData, prompt) {
|
|
150
|
+
const messages = data && data.contents;
|
|
151
|
+
|
|
152
|
+
if (messages && messages.length > 1) {
|
|
153
|
+
logger.info(`[chat request contains ${messages.length} messages]`);
|
|
154
|
+
messages.forEach((message, index) => {
|
|
155
|
+
const messageContent = message.parts.reduce((acc, part) => {
|
|
156
|
+
if (part.text) {
|
|
157
|
+
return acc + part.text;
|
|
158
|
+
}
|
|
159
|
+
return acc;
|
|
160
|
+
} , '');
|
|
161
|
+
const words = messageContent.split(" ");
|
|
162
|
+
const { length, units } = this.getLength(messageContent);
|
|
163
|
+
const preview = words.length < 41 ? messageContent : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
|
|
164
|
+
|
|
165
|
+
logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
|
|
166
|
+
});
|
|
167
|
+
} else if (messages && messages.length === 1) {
|
|
168
|
+
logger.debug(`${messages[0].parts[0].text}`);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// check if responseData is an array
|
|
172
|
+
if (!Array.isArray(responseData)) {
|
|
173
|
+
logger.info(`[response received as an SSE stream]`);
|
|
174
|
+
} else {
|
|
175
|
+
const { mergedResult, safetyRatings } = mergeResults(responseData);
|
|
176
|
+
if (safetyRatings?.length) {
|
|
177
|
+
logger.warn(`!!! response was blocked because the input or response potentially violates policies`);
|
|
178
|
+
logger.debug(`Safety Ratings: ${JSON.stringify(safetyRatings, null, 2)}`);
|
|
179
|
+
}
|
|
180
|
+
const { length, units } = this.getLength(mergedResult);
|
|
181
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
182
|
+
logger.debug(`${mergedResult}`);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (prompt && prompt.debugInfo) {
|
|
186
|
+
prompt.debugInfo += `\n${JSON.stringify(data)}`;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export default GeminiChatPlugin;
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import GeminiChatPlugin from './geminiChatPlugin.js';
|
|
2
|
+
import mime from 'mime-types';
|
|
3
|
+
import logger from '../../lib/logger.js';
|
|
4
|
+
|
|
5
|
+
class GeminiVisionPlugin extends GeminiChatPlugin {
|
|
6
|
+
|
|
7
|
+
// Override the convertMessagesToGemini method to handle multimodal vision messages
|
|
8
|
+
// This function can operate on messages in Gemini native format or in OpenAI's format
|
|
9
|
+
// It will convert the messages to the Gemini format
|
|
10
|
+
convertMessagesToGemini(messages) {
|
|
11
|
+
let modifiedMessages = [];
|
|
12
|
+
let lastAuthor = '';
|
|
13
|
+
|
|
14
|
+
// Check if the messages are already in the Gemini format
|
|
15
|
+
if (messages[0] && Object.prototype.hasOwnProperty.call(messages[0], 'parts')) {
|
|
16
|
+
modifiedMessages = messages;
|
|
17
|
+
} else {
|
|
18
|
+
messages.forEach(message => {
|
|
19
|
+
const { role, author, content } = message;
|
|
20
|
+
|
|
21
|
+
// Right now Gemini API has no direct translation for system messages,
|
|
22
|
+
// so we insert them as parts of the first user: role message
|
|
23
|
+
if (role === 'system') {
|
|
24
|
+
modifiedMessages.push({
|
|
25
|
+
role: 'user',
|
|
26
|
+
parts: [{ text: content }],
|
|
27
|
+
});
|
|
28
|
+
lastAuthor = 'user';
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Convert content to Gemini format, trying to maintain compatibility
|
|
33
|
+
const convertPartToGemini = (partString) => {
|
|
34
|
+
try {
|
|
35
|
+
const part = JSON.parse(partString);
|
|
36
|
+
if (typeof part === 'string') {
|
|
37
|
+
return { text: part };
|
|
38
|
+
} else if (part.type === 'text') {
|
|
39
|
+
return { text: part.text };
|
|
40
|
+
} else if (part.type === 'image_url') {
|
|
41
|
+
if (part.image_url.url.startsWith('gs://')) {
|
|
42
|
+
return {
|
|
43
|
+
fileData: {
|
|
44
|
+
mimeType: mime.lookup(part.image_url.url),
|
|
45
|
+
fileUri: part.image_url.url
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
} else {
|
|
49
|
+
return {
|
|
50
|
+
inlineData: {
|
|
51
|
+
mimeType: 'image/jpeg', // fixed for now as there's no MIME type in the request
|
|
52
|
+
data: part.image_url.url.split('base64,')[1]
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
} catch (e) {
|
|
58
|
+
logger.warn(`Unable to parse part - including as string: ${partString}`);
|
|
59
|
+
}
|
|
60
|
+
return { text: partString };
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const addPartToMessages = (geminiPart) => {
|
|
64
|
+
// Gemini requires alternating user: and model: messages
|
|
65
|
+
if ((role === lastAuthor || author === lastAuthor) && modifiedMessages.length > 0) {
|
|
66
|
+
modifiedMessages[modifiedMessages.length - 1].parts.push(geminiPart);
|
|
67
|
+
}
|
|
68
|
+
// Gemini only supports user: and model: roles
|
|
69
|
+
else if (role === 'user' || role === 'assistant' || author) {
|
|
70
|
+
modifiedMessages.push({
|
|
71
|
+
role: author || role,
|
|
72
|
+
parts: [geminiPart],
|
|
73
|
+
});
|
|
74
|
+
lastAuthor = author || role;
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
// Content can either be in the "vision" format (array) or in the "chat" format (string)
|
|
79
|
+
if (Array.isArray(content)) {
|
|
80
|
+
content.forEach(part => {
|
|
81
|
+
addPartToMessages(convertPartToGemini(part));
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
addPartToMessages(convertPartToGemini(content));
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Gemini requires an even number of messages
|
|
91
|
+
if (modifiedMessages.length % 2 === 0) {
|
|
92
|
+
modifiedMessages = modifiedMessages.slice(1);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return {
|
|
96
|
+
modifiedMessages,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export default GeminiVisionPlugin;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// localModelPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
3
|
import { execFileSync } from 'child_process';
|
|
4
|
-
import { encode } from '
|
|
4
|
+
import { encode } from '../../lib/encodeCache.js';
|
|
5
5
|
import logger from '../../lib/logger.js';
|
|
6
6
|
|
|
7
7
|
class LocalModelPlugin extends ModelPlugin {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// ModelPlugin.js
|
|
2
2
|
import HandleBars from '../../lib/handleBars.js';
|
|
3
3
|
import { executeRequest } from '../../lib/requestExecutor.js';
|
|
4
|
-
import { encode } from '
|
|
4
|
+
import { encode } from '../../lib/encodeCache.js';
|
|
5
5
|
import { getFirstNToken } from '../chunker.js';
|
|
6
6
|
import logger, { obscureUrlParams } from '../../lib/logger.js';
|
|
7
7
|
import { config } from '../../config.js';
|
|
@@ -32,7 +32,6 @@ class ModelPlugin {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
this.requestCount = 0;
|
|
35
|
-
this.lastRequestStartTime = new Date();
|
|
36
35
|
}
|
|
37
36
|
|
|
38
37
|
truncateMessagesToTargetLength(messages, targetTokenLength) {
|
|
@@ -221,7 +220,6 @@ class ModelPlugin {
|
|
|
221
220
|
// Default simple logging
|
|
222
221
|
logRequestStart() {
|
|
223
222
|
this.requestCount++;
|
|
224
|
-
this.lastRequestStartTime = new Date();
|
|
225
223
|
const logMessage = `>>> [${this.requestId}: ${this.pathwayName}.${this.requestCount}] request`;
|
|
226
224
|
const header = '>'.repeat(logMessage.length);
|
|
227
225
|
logger.info(`${header}`);
|
|
@@ -229,28 +227,32 @@ class ModelPlugin {
|
|
|
229
227
|
logger.info(`>>> Making API request to ${obscureUrlParams(this.url)}`);
|
|
230
228
|
}
|
|
231
229
|
|
|
232
|
-
logAIRequestFinished() {
|
|
233
|
-
const
|
|
234
|
-
const timeElapsed = (currentTime - this.lastRequestStartTime) / 1000;
|
|
235
|
-
const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${timeElapsed}s - data:`;
|
|
230
|
+
logAIRequestFinished(requestDuration) {
|
|
231
|
+
const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${requestDuration}ms - data:`;
|
|
236
232
|
const header = '<'.repeat(logMessage.length);
|
|
237
233
|
logger.info(`${header}`);
|
|
238
234
|
logger.info(`${logMessage}`);
|
|
239
235
|
}
|
|
240
236
|
|
|
237
|
+
getLength(data) {
|
|
238
|
+
const isProd = config.get('env') === 'production';
|
|
239
|
+
const length = isProd ? data.length : encode(data).length;
|
|
240
|
+
const units = isProd ? 'characters' : 'tokens';
|
|
241
|
+
return {length, units};
|
|
242
|
+
}
|
|
243
|
+
|
|
241
244
|
logRequestData(data, responseData, prompt) {
|
|
242
|
-
this.logAIRequestFinished();
|
|
243
245
|
const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
|
|
244
246
|
|
|
245
247
|
if (modelInput) {
|
|
246
|
-
const
|
|
247
|
-
logger.info(`[request sent containing ${
|
|
248
|
+
const { length, units } = this.getLength(modelInput);
|
|
249
|
+
logger.info(`[request sent containing ${length} ${units}]`);
|
|
248
250
|
logger.debug(`${modelInput}`);
|
|
249
251
|
}
|
|
250
252
|
|
|
251
|
-
const responseText = JSON.stringify(
|
|
252
|
-
const
|
|
253
|
-
logger.info(`[response received containing ${
|
|
253
|
+
const responseText = JSON.stringify(responseData);
|
|
254
|
+
const { length, units } = this.getLength(responseText);
|
|
255
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
254
256
|
logger.debug(`${responseText}`);
|
|
255
257
|
|
|
256
258
|
prompt && prompt.debugInfo && (prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
@@ -267,15 +269,18 @@ class ModelPlugin {
|
|
|
267
269
|
cortexRequest.cache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
|
|
268
270
|
this.logRequestStart();
|
|
269
271
|
|
|
270
|
-
const responseData = await executeRequest(cortexRequest);
|
|
272
|
+
const { data: responseData, duration: requestDuration } = await executeRequest(cortexRequest);
|
|
271
273
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
throw
|
|
274
|
+
const errorData = Array.isArray(responseData) ? responseData[0] : responseData;
|
|
275
|
+
if (errorData && errorData.error) {
|
|
276
|
+
throw new Error(`Server error: ${JSON.stringify(errorData.error)}`);
|
|
275
277
|
}
|
|
276
278
|
|
|
277
|
-
this.
|
|
278
|
-
|
|
279
|
+
this.logAIRequestFinished(requestDuration);
|
|
280
|
+
const parsedData = this.parseResponse(responseData);
|
|
281
|
+
this.logRequestData(data, parsedData, prompt);
|
|
282
|
+
|
|
283
|
+
return parsedData;
|
|
279
284
|
} catch (error) {
|
|
280
285
|
// Log the error and continue
|
|
281
286
|
logger.error(error.message || error);
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// OpenAIChatPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
|
-
import { encode } from 'gpt-3-encoder';
|
|
4
3
|
import logger from '../../lib/logger.js';
|
|
5
4
|
|
|
6
5
|
class OpenAIChatPlugin extends ModelPlugin {
|
|
@@ -105,28 +104,28 @@ class OpenAIChatPlugin extends ModelPlugin {
|
|
|
105
104
|
|
|
106
105
|
// Override the logging function to display the messages and responses
|
|
107
106
|
logRequestData(data, responseData, prompt) {
|
|
108
|
-
this.logAIRequestFinished();
|
|
109
|
-
|
|
110
107
|
const { stream, messages } = data;
|
|
111
108
|
if (messages && messages.length > 1) {
|
|
112
109
|
logger.info(`[chat request sent containing ${messages.length} messages]`);
|
|
113
|
-
let
|
|
110
|
+
let totalLength = 0;
|
|
111
|
+
let totalUnits;
|
|
114
112
|
messages.forEach((message, index) => {
|
|
115
113
|
//message.content string or array
|
|
116
114
|
const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
|
|
117
115
|
const words = content.split(" ");
|
|
118
|
-
const
|
|
116
|
+
const { length, units } = this.getLength(content);
|
|
119
117
|
const preview = words.length < 41 ? content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
|
|
120
118
|
|
|
121
|
-
logger.debug(`
|
|
122
|
-
|
|
119
|
+
logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
|
|
120
|
+
totalLength += length;
|
|
121
|
+
totalUnits = units;
|
|
123
122
|
});
|
|
124
|
-
logger.info(`[chat request contained ${
|
|
123
|
+
logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
|
|
125
124
|
} else {
|
|
126
125
|
const message = messages[0];
|
|
127
126
|
const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
|
|
128
|
-
const
|
|
129
|
-
logger.info(`[request sent containing ${
|
|
127
|
+
const { length, units } = this.getLength(content);
|
|
128
|
+
logger.info(`[request sent containing ${length} ${units}]`);
|
|
130
129
|
logger.debug(`${content}`);
|
|
131
130
|
}
|
|
132
131
|
|
|
@@ -134,8 +133,8 @@ class OpenAIChatPlugin extends ModelPlugin {
|
|
|
134
133
|
logger.info(`[response received as an SSE stream]`);
|
|
135
134
|
} else {
|
|
136
135
|
const responseText = this.parseResponse(responseData);
|
|
137
|
-
const
|
|
138
|
-
logger.info(`[response received containing ${
|
|
136
|
+
const { length, units } = this.getLength(responseText);
|
|
137
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
139
138
|
logger.debug(`${responseText}`);
|
|
140
139
|
}
|
|
141
140
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// OpenAICompletionPlugin.js
|
|
2
2
|
|
|
3
3
|
import ModelPlugin from './modelPlugin.js';
|
|
4
|
-
import { encode } from '
|
|
4
|
+
import { encode } from '../../lib/encodeCache.js';
|
|
5
5
|
import logger from '../../lib/logger.js';
|
|
6
6
|
|
|
7
7
|
// Helper function to truncate the prompt if it is too long
|
|
@@ -104,21 +104,20 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
104
104
|
|
|
105
105
|
// Override the logging function to log the prompt and response
|
|
106
106
|
logRequestData(data, responseData, prompt) {
|
|
107
|
-
this.logAIRequestFinished();
|
|
108
|
-
|
|
109
107
|
const stream = data.stream;
|
|
110
108
|
const modelInput = data.prompt;
|
|
111
109
|
|
|
112
|
-
const
|
|
113
|
-
|
|
110
|
+
const { length, units } = this.getLength(modelInput);
|
|
111
|
+
|
|
112
|
+
logger.info(`[request sent containing ${length} ${units}]`);
|
|
114
113
|
logger.debug(`${modelInput}`);
|
|
115
114
|
|
|
116
115
|
if (stream) {
|
|
117
116
|
logger.info(`[response received as an SSE stream]`);
|
|
118
117
|
} else {
|
|
119
118
|
const responseText = this.parseResponse(responseData);
|
|
120
|
-
const
|
|
121
|
-
logger.info(`[response received containing ${
|
|
119
|
+
const { length, units } = this.getLength(responseText);
|
|
120
|
+
logger.info(`[response received containing ${length} ${units}]`);
|
|
122
121
|
logger.debug(`${responseText}`);
|
|
123
122
|
}
|
|
124
123
|
|
|
@@ -7,11 +7,13 @@ class OpenAiEmbeddingsPlugin extends ModelPlugin {
|
|
|
7
7
|
}
|
|
8
8
|
|
|
9
9
|
getRequestParameters(text, parameters, prompt) {
|
|
10
|
-
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
10
|
+
const combinedParameters = { ...this.promptParameters, ...this.model.params, ...parameters };
|
|
11
11
|
const { modelPromptText } = this.getCompiledPrompt(text, combinedParameters, prompt);
|
|
12
|
+
const { model } = combinedParameters;
|
|
12
13
|
const requestParameters = {
|
|
13
14
|
data: {
|
|
14
15
|
input: combinedParameters?.input?.length ? combinedParameters.input : modelPromptText || text,
|
|
16
|
+
model
|
|
15
17
|
}
|
|
16
18
|
};
|
|
17
19
|
return requestParameters;
|
|
@@ -201,6 +201,9 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
201
201
|
const processTS = async (uri) => {
|
|
202
202
|
try {
|
|
203
203
|
const tsparams = { fileurl:uri };
|
|
204
|
+
|
|
205
|
+
const { language } = parameters;
|
|
206
|
+
if(language) tsparams.language = language;
|
|
204
207
|
if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
|
|
205
208
|
if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
|
|
206
209
|
if(maxLineCount) tsparams.max_line_count = maxLineCount;
|