@aj-archipelago/cortex 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { ModelExecutor } from './modelExecutor.js';
2
2
  import { modelEndpoints } from '../lib/requestExecutor.js';
3
3
  // eslint-disable-next-line import/no-extraneous-dependencies
4
4
  import { v4 as uuidv4 } from 'uuid';
5
- import { encode } from 'gpt-3-encoder';
5
+ import { encode } from '../lib/encodeCache.js';
6
6
  import { getFirstNToken, getLastNToken, getSemanticChunks } from './chunker.js';
7
7
  import { PathwayResponseParser } from './pathwayResponseParser.js';
8
8
  import { Prompt } from './prompt.js';
@@ -98,8 +98,9 @@ class PathwayResolver {
98
98
  const incomingMessage = responseData;
99
99
 
100
100
  let messageBuffer = '';
101
+ let streamEnded = false;
101
102
 
102
- const processData = (data) => {
103
+ const processStreamSSE = (data) => {
103
104
  try {
104
105
  //logger.info(`\n\nReceived stream data for requestId ${this.requestId}: ${data.toString()}`);
105
106
  let events = data.toString().split('\n');
@@ -132,18 +133,35 @@ class PathwayResolver {
132
133
  return;
133
134
  }
134
135
 
136
+ // error can be in different places in the message
135
137
  const streamError = parsedMessage?.error || parsedMessage?.choices?.[0]?.delta?.content?.error || parsedMessage?.choices?.[0]?.text?.error;
136
138
  if (streamError) {
137
139
  streamErrorOccurred = true;
138
140
  logger.error(`Stream error: ${streamError.message}`);
139
- incomingMessage.off('data', processData); // Stop listening to 'data'
141
+ incomingMessage.off('data', processStreamSSE);
140
142
  return;
141
143
  }
144
+
145
+ // finish reason can be in different places in the message
146
+ const finishReason = parsedMessage?.choices?.[0]?.finish_reason || parsedMessage?.candidates?.[0]?.finishReason;
147
+ if (finishReason?.toLowerCase() === 'stop') {
148
+ requestProgress.progress = 1;
149
+ } else {
150
+ if (finishReason?.toLowerCase() === 'safety') {
151
+ const safetyRatings = JSON.stringify(parsedMessage?.candidates?.[0]?.safetyRatings) || '';
152
+ logger.warn(`Request ${this.requestId} was blocked by the safety filter. ${safetyRatings}`);
153
+ requestProgress.data = `\n\nResponse blocked by safety filter: ${safetyRatings}`;
154
+ requestProgress.progress = 1;
155
+ }
156
+ }
142
157
  }
143
158
 
144
159
  try {
145
- //logger.info(`Publishing stream message to requestId ${this.requestId}: ${message}`);
146
- publishRequestProgress(requestProgress);
160
+ if (!streamEnded) {
161
+ //logger.info(`Publishing stream message to requestId ${this.requestId}: ${message}`);
162
+ publishRequestProgress(requestProgress);
163
+ streamEnded = requestProgress.progress === 1;
164
+ }
147
165
  } catch (error) {
148
166
  logger.error(`Could not publish the stream message: "${messageBuffer}", ${error}`);
149
167
  }
@@ -156,7 +174,7 @@ class PathwayResolver {
156
174
 
157
175
  if (incomingMessage) {
158
176
  await new Promise((resolve, reject) => {
159
- incomingMessage.on('data', processData);
177
+ incomingMessage.on('data', processStreamSSE);
160
178
  incomingMessage.on('end', resolve);
161
179
  incomingMessage.on('error', reject);
162
180
  });
@@ -321,7 +339,7 @@ class PathwayResolver {
321
339
  const data = await Promise.all(chunks.map(chunk =>
322
340
  this.applyPromptsSerially(chunk, parameters)));
323
341
  // Join the chunks with newlines
324
- return data.join("\n\n");
342
+ return data.join(this.pathway.joinChunksWith || "\n\n");
325
343
  } else {
326
344
  // Apply prompts one by one, serially, across all chunks
327
345
  // This is the default processing mode and will make previousResult available at the object level
@@ -355,7 +373,7 @@ class PathwayResolver {
355
373
  if (result.length === 1) {
356
374
  result = result[0];
357
375
  } else if (!currentParameters.stream) {
358
- result = result.join("\n\n");
376
+ result = result.join(this.pathway.joinChunksWith || "\n\n");
359
377
  }
360
378
  }
361
379
 
@@ -6,6 +6,7 @@ import path from 'path';
6
6
  import { config } from '../../config.js';
7
7
  import { axios } from '../../lib/requestExecutor.js';
8
8
  import logger from '../../lib/logger.js';
9
+ import { getSemanticChunks } from '../chunker.js';
9
10
 
10
11
  const API_URL = config.get('whisperMediaApiUrl');
11
12
 
@@ -37,7 +38,8 @@ class AzureCognitivePlugin extends ModelPlugin {
37
38
  const data = {};
38
39
 
39
40
  if (mode == 'delete') {
40
- const searchUrl = this.ensureMode(this.requestUrl(text), 'search');
41
+ let searchUrl = this.ensureMode(this.requestUrl(text), 'search');
42
+ searchUrl = this.ensureIndex(searchUrl, indexName);
41
43
  let searchQuery = `owner:${savedContextId}`;
42
44
 
43
45
  if (docId) {
@@ -155,6 +157,7 @@ class AzureCognitivePlugin extends ModelPlugin {
155
157
  const headers = cortexRequest.headers;
156
158
 
157
159
  const { file } = parameters;
160
+ const fileData = { value: [] };
158
161
  if(file){
159
162
  let url = file;
160
163
  //if not txt file, use helper app to convert to txt
@@ -177,11 +180,13 @@ class AzureCognitivePlugin extends ModelPlugin {
177
180
  throw Error(`No data can be extracted out of file!`);
178
181
  }
179
182
 
180
- return await callPathway('cognitive_insert', {...parameters, file:null, text:data });
181
- }
183
+ const chunkTokenLength = this.promptParameters.inputChunkSize || 1000;
184
+ const chunks = getSemanticChunks(data, chunkTokenLength);
182
185
 
183
- if (mode === 'index' && (!text || !text.trim()) ){
184
- return; // nothing to index
186
+ for (const text of chunks) {
187
+ const { data: singleData } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest)
188
+ fileData.value.push(singleData.value[0]);
189
+ }
185
190
  }
186
191
 
187
192
  const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest);
@@ -195,7 +200,7 @@ class AzureCognitivePlugin extends ModelPlugin {
195
200
 
196
201
  // execute the request
197
202
  cortexRequest.url = url;
198
- cortexRequest.data = data;
203
+ cortexRequest.data = (mode === 'index' && fileData.value.length>0) ? fileData : data;
199
204
  cortexRequest.params = params;
200
205
  cortexRequest.headers = headers;
201
206
  const result = await this.executeRequest(cortexRequest);
@@ -45,8 +45,6 @@ class AzureTranslatePlugin extends ModelPlugin {
45
45
 
46
46
  // Override the logging function to display the request and response
47
47
  logRequestData(data, responseData, prompt) {
48
- this.logAIRequestFinished();
49
-
50
48
  const modelInput = data[0].Text;
51
49
 
52
50
  logger.debug(`${modelInput}`);
@@ -0,0 +1,192 @@
1
+ // geminiChatPlugin.js
2
+ import ModelPlugin from './modelPlugin.js';
3
+ import logger from '../../lib/logger.js';
4
+
5
+ const mergeResults = (data) => {
6
+ let output = '';
7
+ let safetyRatings = [];
8
+
9
+ for (let chunk of data) {
10
+ const { candidates } = chunk;
11
+ if (!candidates || !candidates.length) {
12
+ continue;
13
+ }
14
+
15
+ // If it was blocked, return the blocked message
16
+ if (candidates[0].safetyRatings.some(rating => rating.blocked)) {
17
+ safetyRatings = candidates[0].safetyRatings;
18
+ return {mergedResult: 'The response was blocked because the input or response potentially violates policies. Try rephrasing the prompt or adjusting the parameter settings.', safetyRatings: safetyRatings};
19
+ }
20
+
21
+ // Append the content of the first part of the first candidate to the output
22
+ const message = candidates[0].content.parts[0].text;
23
+ output += message;
24
+ }
25
+
26
+ return {mergedResult: output || null, safetyRatings: safetyRatings};
27
+ };
28
+
29
+ class GeminiChatPlugin extends ModelPlugin {
30
+ constructor(pathway, model) {
31
+ super(pathway, model);
32
+ }
33
+
34
+ // This code converts either OpenAI or PaLM messages to the Gemini messages format
35
+ convertMessagesToGemini(messages) {
36
+ let modifiedMessages = [];
37
+ let lastAuthor = '';
38
+
39
+ // Check if the messages are already in the Gemini format
40
+ if (messages[0] && Object.prototype.hasOwnProperty.call(messages[0], 'parts')) {
41
+ modifiedMessages = messages;
42
+ } else {
43
+ messages.forEach(message => {
44
+ const { role, author, content } = message;
45
+
46
+ // Right now Gemini API has no direct translation for system messages,
47
+ // but they work fine as parts of user messages
48
+ if (role === 'system') {
49
+ modifiedMessages.push({
50
+ role: 'user',
51
+ parts: [{ text: content }],
52
+ });
53
+ lastAuthor = 'user';
54
+ return;
55
+ }
56
+
57
+ // Aggregate consecutive author messages, appending the content
58
+ if ((role === lastAuthor || author === lastAuthor) && modifiedMessages.length > 0) {
59
+ modifiedMessages[modifiedMessages.length - 1].parts.push({ text: content });
60
+ }
61
+
62
+ // Push messages that are role: 'user' or 'assistant', changing 'assistant' to 'model'
63
+ else if (role === 'user' || role === 'assistant' || author) {
64
+ modifiedMessages.push({
65
+ role: author || role,
66
+ parts: [{ text: content }],
67
+ });
68
+ lastAuthor = author || role;
69
+ }
70
+ });
71
+ }
72
+
73
+ // Gemini requires an even number of messages
74
+ if (modifiedMessages.length % 2 === 0) {
75
+ modifiedMessages = modifiedMessages.slice(1);
76
+ }
77
+
78
+ return {
79
+ modifiedMessages,
80
+ };
81
+ }
82
+
83
+ // Set up parameters specific to the Gemini API
84
+ getRequestParameters(text, parameters, prompt, cortexRequest) {
85
+ const { modelPromptText, modelPromptMessages, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
86
+ const { geminiSafetySettings, geminiTools, max_tokens } = cortexRequest ? cortexRequest.pathway : {};
87
+
88
+ // Define the model's max token length
89
+ const modelTargetTokenLength = this.getModelMaxTokenLength() * this.getPromptTokenRatio();
90
+
91
+ const geminiMessages = this.convertMessagesToGemini(modelPromptMessages || [{ "role": "user", "parts": [{ "text": modelPromptText }]}]);
92
+
93
+ let requestMessages = geminiMessages.modifiedMessages;
94
+
95
+ // Check if the token length exceeds the model's max token length
96
+ if (tokenLength > modelTargetTokenLength) {
97
+ // Remove older messages until the token length is within the model's limit
98
+ requestMessages = this.truncateMessagesToTargetLength(requestMessages, modelTargetTokenLength);
99
+ }
100
+
101
+ if (max_tokens < 0) {
102
+ throw new Error(`Prompt is too long to successfully call the model at ${tokenLength} tokens. The model will not be called.`);
103
+ }
104
+
105
+ const requestParameters = {
106
+ contents: requestMessages,
107
+ generationConfig: {
108
+ temperature: this.temperature || 0.7,
109
+ maxOutputTokens: max_tokens || this.getModelMaxReturnTokens(),
110
+ topP: parameters.topP || 0.95,
111
+ topK: parameters.topK || 40,
112
+ },
113
+ safety_settings: geminiSafetySettings || undefined,
114
+ tools: geminiTools || undefined
115
+ };
116
+
117
+ return requestParameters;
118
+ }
119
+
120
+ // Parse the response from the new Chat API
121
+ parseResponse(data) {
122
+ // If data is not an array, return it directly
123
+ if (!Array.isArray(data)) {
124
+ return data;
125
+ }
126
+
127
+ return mergeResults(data).mergedResult || null;
128
+
129
+ }
130
+
131
+ // Execute the request to the new Chat API
132
+ async execute(text, parameters, prompt, cortexRequest) {
133
+ const requestParameters = this.getRequestParameters(text, parameters, prompt, cortexRequest);
134
+ const { stream } = parameters;
135
+
136
+ cortexRequest.data = { ...(cortexRequest.data || {}), ...requestParameters };
137
+ cortexRequest.params = {}; // query params
138
+ cortexRequest.stream = stream;
139
+ cortexRequest.url = cortexRequest.stream ? `${cortexRequest.url}?alt=sse` : cortexRequest.url;
140
+
141
+ const gcpAuthTokenHelper = this.config.get('gcpAuthTokenHelper');
142
+ const authToken = await gcpAuthTokenHelper.getAccessToken();
143
+ cortexRequest.headers.Authorization = `Bearer ${authToken}`;
144
+
145
+ return this.executeRequest(cortexRequest);
146
+ }
147
+
148
+ // Override the logging function to display the messages and responses
149
+ logRequestData(data, responseData, prompt) {
150
+ const messages = data && data.contents;
151
+
152
+ if (messages && messages.length > 1) {
153
+ logger.info(`[chat request contains ${messages.length} messages]`);
154
+ messages.forEach((message, index) => {
155
+ const messageContent = message.parts.reduce((acc, part) => {
156
+ if (part.text) {
157
+ return acc + part.text;
158
+ }
159
+ return acc;
160
+ } , '');
161
+ const words = messageContent.split(" ");
162
+ const { length, units } = this.getLength(messageContent);
163
+ const preview = words.length < 41 ? messageContent : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
164
+
165
+ logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
166
+ });
167
+ } else if (messages && messages.length === 1) {
168
+ logger.debug(`${messages[0].parts[0].text}`);
169
+ }
170
+
171
+ // check if responseData is an array
172
+ if (!Array.isArray(responseData)) {
173
+ logger.info(`[response received as an SSE stream]`);
174
+ } else {
175
+ const { mergedResult, safetyRatings } = mergeResults(responseData);
176
+ if (safetyRatings?.length) {
177
+ logger.warn(`!!! response was blocked because the input or response potentially violates policies`);
178
+ logger.debug(`Safety Ratings: ${JSON.stringify(safetyRatings, null, 2)}`);
179
+ }
180
+ const { length, units } = this.getLength(mergedResult);
181
+ logger.info(`[response received containing ${length} ${units}]`);
182
+ logger.debug(`${mergedResult}`);
183
+ }
184
+
185
+ if (prompt && prompt.debugInfo) {
186
+ prompt.debugInfo += `\n${JSON.stringify(data)}`;
187
+ }
188
+ }
189
+
190
+ }
191
+
192
+ export default GeminiChatPlugin;
@@ -0,0 +1,102 @@
1
+ import GeminiChatPlugin from './geminiChatPlugin.js';
2
+ import mime from 'mime-types';
3
+ import logger from '../../lib/logger.js';
4
+
5
+ class GeminiVisionPlugin extends GeminiChatPlugin {
6
+
7
+ // Override the convertMessagesToGemini method to handle multimodal vision messages
8
+ // This function can operate on messages in Gemini native format or in OpenAI's format
9
+ // It will convert the messages to the Gemini format
10
+ convertMessagesToGemini(messages) {
11
+ let modifiedMessages = [];
12
+ let lastAuthor = '';
13
+
14
+ // Check if the messages are already in the Gemini format
15
+ if (messages[0] && Object.prototype.hasOwnProperty.call(messages[0], 'parts')) {
16
+ modifiedMessages = messages;
17
+ } else {
18
+ messages.forEach(message => {
19
+ const { role, author, content } = message;
20
+
21
+ // Right now Gemini API has no direct translation for system messages,
22
+ // so we insert them as parts of the first user: role message
23
+ if (role === 'system') {
24
+ modifiedMessages.push({
25
+ role: 'user',
26
+ parts: [{ text: content }],
27
+ });
28
+ lastAuthor = 'user';
29
+ return;
30
+ }
31
+
32
+ // Convert content to Gemini format, trying to maintain compatibility
33
+ const convertPartToGemini = (partString) => {
34
+ try {
35
+ const part = JSON.parse(partString);
36
+ if (typeof part === 'string') {
37
+ return { text: part };
38
+ } else if (part.type === 'text') {
39
+ return { text: part.text };
40
+ } else if (part.type === 'image_url') {
41
+ if (part.image_url.url.startsWith('gs://')) {
42
+ return {
43
+ fileData: {
44
+ mimeType: mime.lookup(part.image_url.url),
45
+ fileUri: part.image_url.url
46
+ }
47
+ };
48
+ } else {
49
+ return {
50
+ inlineData: {
51
+ mimeType: 'image/jpeg', // fixed for now as there's no MIME type in the request
52
+ data: part.image_url.url.split('base64,')[1]
53
+ }
54
+ };
55
+ }
56
+ }
57
+ } catch (e) {
58
+ logger.warn(`Unable to parse part - including as string: ${partString}`);
59
+ }
60
+ return { text: partString };
61
+ };
62
+
63
+ const addPartToMessages = (geminiPart) => {
64
+ // Gemini requires alternating user: and model: messages
65
+ if ((role === lastAuthor || author === lastAuthor) && modifiedMessages.length > 0) {
66
+ modifiedMessages[modifiedMessages.length - 1].parts.push(geminiPart);
67
+ }
68
+ // Gemini only supports user: and model: roles
69
+ else if (role === 'user' || role === 'assistant' || author) {
70
+ modifiedMessages.push({
71
+ role: author || role,
72
+ parts: [geminiPart],
73
+ });
74
+ lastAuthor = author || role;
75
+ }
76
+ };
77
+
78
+ // Content can either be in the "vision" format (array) or in the "chat" format (string)
79
+ if (Array.isArray(content)) {
80
+ content.forEach(part => {
81
+ addPartToMessages(convertPartToGemini(part));
82
+ });
83
+ }
84
+ else {
85
+ addPartToMessages(convertPartToGemini(content));
86
+ }
87
+ });
88
+ }
89
+
90
+ // Gemini requires an even number of messages
91
+ if (modifiedMessages.length % 2 === 0) {
92
+ modifiedMessages = modifiedMessages.slice(1);
93
+ }
94
+
95
+ return {
96
+ modifiedMessages,
97
+ };
98
+ }
99
+
100
+ }
101
+
102
+ export default GeminiVisionPlugin;
@@ -1,7 +1,7 @@
1
1
  // localModelPlugin.js
2
2
  import ModelPlugin from './modelPlugin.js';
3
3
  import { execFileSync } from 'child_process';
4
- import { encode } from 'gpt-3-encoder';
4
+ import { encode } from '../../lib/encodeCache.js';
5
5
  import logger from '../../lib/logger.js';
6
6
 
7
7
  class LocalModelPlugin extends ModelPlugin {
@@ -1,7 +1,7 @@
1
1
  // ModelPlugin.js
2
2
  import HandleBars from '../../lib/handleBars.js';
3
3
  import { executeRequest } from '../../lib/requestExecutor.js';
4
- import { encode } from 'gpt-3-encoder';
4
+ import { encode } from '../../lib/encodeCache.js';
5
5
  import { getFirstNToken } from '../chunker.js';
6
6
  import logger, { obscureUrlParams } from '../../lib/logger.js';
7
7
  import { config } from '../../config.js';
@@ -32,7 +32,6 @@ class ModelPlugin {
32
32
  }
33
33
 
34
34
  this.requestCount = 0;
35
- this.lastRequestStartTime = new Date();
36
35
  }
37
36
 
38
37
  truncateMessagesToTargetLength(messages, targetTokenLength) {
@@ -221,7 +220,6 @@ class ModelPlugin {
221
220
  // Default simple logging
222
221
  logRequestStart() {
223
222
  this.requestCount++;
224
- this.lastRequestStartTime = new Date();
225
223
  const logMessage = `>>> [${this.requestId}: ${this.pathwayName}.${this.requestCount}] request`;
226
224
  const header = '>'.repeat(logMessage.length);
227
225
  logger.info(`${header}`);
@@ -229,28 +227,32 @@ class ModelPlugin {
229
227
  logger.info(`>>> Making API request to ${obscureUrlParams(this.url)}`);
230
228
  }
231
229
 
232
- logAIRequestFinished() {
233
- const currentTime = new Date();
234
- const timeElapsed = (currentTime - this.lastRequestStartTime) / 1000;
235
- const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${timeElapsed}s - data:`;
230
+ logAIRequestFinished(requestDuration) {
231
+ const logMessage = `<<< [${this.requestId}: ${this.pathwayName}] response - complete in ${requestDuration}ms - data:`;
236
232
  const header = '<'.repeat(logMessage.length);
237
233
  logger.info(`${header}`);
238
234
  logger.info(`${logMessage}`);
239
235
  }
240
236
 
237
+ getLength(data) {
238
+ const isProd = config.get('env') === 'production';
239
+ const length = isProd ? data.length : encode(data).length;
240
+ const units = isProd ? 'characters' : 'tokens';
241
+ return {length, units};
242
+ }
243
+
241
244
  logRequestData(data, responseData, prompt) {
242
- this.logAIRequestFinished();
243
245
  const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
244
246
 
245
247
  if (modelInput) {
246
- const inputTokens = encode(modelInput).length;
247
- logger.info(`[request sent containing ${inputTokens} tokens]`);
248
+ const { length, units } = this.getLength(modelInput);
249
+ logger.info(`[request sent containing ${length} ${units}]`);
248
250
  logger.debug(`${modelInput}`);
249
251
  }
250
252
 
251
- const responseText = JSON.stringify(this.parseResponse(responseData));
252
- const responseTokens = encode(responseText).length;
253
- logger.info(`[response received containing ${responseTokens} tokens]`);
253
+ const responseText = JSON.stringify(responseData);
254
+ const { length, units } = this.getLength(responseText);
255
+ logger.info(`[response received containing ${length} ${units}]`);
254
256
  logger.debug(`${responseText}`);
255
257
 
256
258
  prompt && prompt.debugInfo && (prompt.debugInfo += `\n${JSON.stringify(data)}`);
@@ -267,15 +269,18 @@ class ModelPlugin {
267
269
  cortexRequest.cache = config.get('enableCache') && (pathway.enableCache || pathway.temperature == 0);
268
270
  this.logRequestStart();
269
271
 
270
- const responseData = await executeRequest(cortexRequest);
272
+ const { data: responseData, duration: requestDuration } = await executeRequest(cortexRequest);
271
273
 
272
- if (responseData.error) {
273
- logger.error(`An error was returned from the server: ${JSON.stringify(responseData.error)}`);
274
- throw responseData;
274
+ const errorData = Array.isArray(responseData) ? responseData[0] : responseData;
275
+ if (errorData && errorData.error) {
276
+ throw new Error(`Server error: ${JSON.stringify(errorData.error)}`);
275
277
  }
276
278
 
277
- this.logRequestData(data, responseData, prompt);
278
- return this.parseResponse(responseData);
279
+ this.logAIRequestFinished(requestDuration);
280
+ const parsedData = this.parseResponse(responseData);
281
+ this.logRequestData(data, parsedData, prompt);
282
+
283
+ return parsedData;
279
284
  } catch (error) {
280
285
  // Log the error and continue
281
286
  logger.error(error.message || error);
@@ -1,6 +1,5 @@
1
1
  // OpenAIChatPlugin.js
2
2
  import ModelPlugin from './modelPlugin.js';
3
- import { encode } from 'gpt-3-encoder';
4
3
  import logger from '../../lib/logger.js';
5
4
 
6
5
  class OpenAIChatPlugin extends ModelPlugin {
@@ -105,28 +104,28 @@ class OpenAIChatPlugin extends ModelPlugin {
105
104
 
106
105
  // Override the logging function to display the messages and responses
107
106
  logRequestData(data, responseData, prompt) {
108
- this.logAIRequestFinished();
109
-
110
107
  const { stream, messages } = data;
111
108
  if (messages && messages.length > 1) {
112
109
  logger.info(`[chat request sent containing ${messages.length} messages]`);
113
- let totalTokens = 0;
110
+ let totalLength = 0;
111
+ let totalUnits;
114
112
  messages.forEach((message, index) => {
115
113
  //message.content string or array
116
114
  const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
117
115
  const words = content.split(" ");
118
- const tokenCount = encode(content).length;
116
+ const { length, units } = this.getLength(content);
119
117
  const preview = words.length < 41 ? content : words.slice(0, 20).join(" ") + " ... " + words.slice(-20).join(" ");
120
118
 
121
- logger.debug(`Message ${index + 1}: Role: ${message.role}, Tokens: ${tokenCount}, Content: "${preview}"`);
122
- totalTokens += tokenCount;
119
+ logger.debug(`message ${index + 1}: role: ${message.role}, ${units}: ${length}, content: "${preview}"`);
120
+ totalLength += length;
121
+ totalUnits = units;
123
122
  });
124
- logger.info(`[chat request contained ${totalTokens} tokens]`);
123
+ logger.info(`[chat request contained ${totalLength} ${totalUnits}]`);
125
124
  } else {
126
125
  const message = messages[0];
127
126
  const content = Array.isArray(message.content) ? message.content.map(item => JSON.stringify(item)).join(', ') : message.content;
128
- const tokenCount = encode(content).length;
129
- logger.info(`[request sent containing ${tokenCount} tokens]`);
127
+ const { length, units } = this.getLength(content);
128
+ logger.info(`[request sent containing ${length} ${units}]`);
130
129
  logger.debug(`${content}`);
131
130
  }
132
131
 
@@ -134,8 +133,8 @@ class OpenAIChatPlugin extends ModelPlugin {
134
133
  logger.info(`[response received as an SSE stream]`);
135
134
  } else {
136
135
  const responseText = this.parseResponse(responseData);
137
- const responseTokens = encode(responseText).length;
138
- logger.info(`[response received containing ${responseTokens} tokens]`);
136
+ const { length, units } = this.getLength(responseText);
137
+ logger.info(`[response received containing ${length} ${units}]`);
139
138
  logger.debug(`${responseText}`);
140
139
  }
141
140
 
@@ -1,7 +1,7 @@
1
1
  // OpenAICompletionPlugin.js
2
2
 
3
3
  import ModelPlugin from './modelPlugin.js';
4
- import { encode } from 'gpt-3-encoder';
4
+ import { encode } from '../../lib/encodeCache.js';
5
5
  import logger from '../../lib/logger.js';
6
6
 
7
7
  // Helper function to truncate the prompt if it is too long
@@ -104,21 +104,20 @@ class OpenAICompletionPlugin extends ModelPlugin {
104
104
 
105
105
  // Override the logging function to log the prompt and response
106
106
  logRequestData(data, responseData, prompt) {
107
- this.logAIRequestFinished();
108
-
109
107
  const stream = data.stream;
110
108
  const modelInput = data.prompt;
111
109
 
112
- const modelInputTokens = encode(modelInput).length;
113
- logger.info(`[request sent containing ${modelInputTokens} tokens]`);
110
+ const { length, units } = this.getLength(modelInput);
111
+
112
+ logger.info(`[request sent containing ${length} ${units}]`);
114
113
  logger.debug(`${modelInput}`);
115
114
 
116
115
  if (stream) {
117
116
  logger.info(`[response received as an SSE stream]`);
118
117
  } else {
119
118
  const responseText = this.parseResponse(responseData);
120
- const responseTokens = encode(responseText).length;
121
- logger.info(`[response received containing ${responseTokens} tokens]`);
119
+ const { length, units } = this.getLength(responseText);
120
+ logger.info(`[response received containing ${length} ${units}]`);
122
121
  logger.debug(`${responseText}`);
123
122
  }
124
123
 
@@ -7,11 +7,13 @@ class OpenAiEmbeddingsPlugin extends ModelPlugin {
7
7
  }
8
8
 
9
9
  getRequestParameters(text, parameters, prompt) {
10
- const combinedParameters = { ...this.promptParameters, ...parameters };
10
+ const combinedParameters = { ...this.promptParameters, ...this.model.params, ...parameters };
11
11
  const { modelPromptText } = this.getCompiledPrompt(text, combinedParameters, prompt);
12
+ const { model } = combinedParameters;
12
13
  const requestParameters = {
13
14
  data: {
14
15
  input: combinedParameters?.input?.length ? combinedParameters.input : modelPromptText || text,
16
+ model
15
17
  }
16
18
  };
17
19
  return requestParameters;
@@ -201,6 +201,9 @@ class OpenAIWhisperPlugin extends ModelPlugin {
201
201
  const processTS = async (uri) => {
202
202
  try {
203
203
  const tsparams = { fileurl:uri };
204
+
205
+ const { language } = parameters;
206
+ if(language) tsparams.language = language;
204
207
  if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
205
208
  if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
206
209
  if(maxLineCount) tsparams.max_line_count = maxLineCount;