@aj-archipelago/cortex 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.eslintignore +3 -3
  2. package/README.md +17 -4
  3. package/config.js +45 -9
  4. package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/Dockerfile +1 -1
  5. package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/fileChunker.js +4 -1
  6. package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/package-lock.json +25 -216
  7. package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/package.json +2 -2
  8. package/helper-apps/cortex-whisper-wrapper/.dockerignore +27 -0
  9. package/helper-apps/cortex-whisper-wrapper/Dockerfile +32 -0
  10. package/helper-apps/cortex-whisper-wrapper/app.py +104 -0
  11. package/helper-apps/cortex-whisper-wrapper/docker-compose.debug.yml +12 -0
  12. package/helper-apps/cortex-whisper-wrapper/docker-compose.yml +10 -0
  13. package/helper-apps/cortex-whisper-wrapper/models/.gitkeep +0 -0
  14. package/helper-apps/cortex-whisper-wrapper/requirements.txt +5 -0
  15. package/lib/cortexRequest.js +117 -0
  16. package/lib/pathwayTools.js +2 -1
  17. package/lib/redisSubscription.js +2 -2
  18. package/lib/requestExecutor.js +360 -0
  19. package/lib/requestMonitor.js +131 -28
  20. package/package.json +2 -1
  21. package/pathways/summary.js +3 -3
  22. package/server/graphql.js +6 -6
  23. package/server/{pathwayPrompter.js → modelExecutor.js} +24 -21
  24. package/server/pathwayResolver.js +22 -17
  25. package/server/plugins/azureCognitivePlugin.js +25 -20
  26. package/server/plugins/azureTranslatePlugin.js +6 -10
  27. package/server/plugins/cohereGeneratePlugin.js +5 -12
  28. package/server/plugins/cohereSummarizePlugin.js +5 -12
  29. package/server/plugins/localModelPlugin.js +3 -3
  30. package/server/plugins/modelPlugin.js +18 -12
  31. package/server/plugins/openAiChatExtensionPlugin.js +5 -5
  32. package/server/plugins/openAiChatPlugin.js +8 -10
  33. package/server/plugins/openAiCompletionPlugin.js +9 -12
  34. package/server/plugins/openAiDallE3Plugin.js +14 -31
  35. package/server/plugins/openAiEmbeddingsPlugin.js +6 -9
  36. package/server/plugins/openAiImagePlugin.js +19 -15
  37. package/server/plugins/openAiWhisperPlugin.js +168 -100
  38. package/server/plugins/palmChatPlugin.js +9 -10
  39. package/server/plugins/palmCodeCompletionPlugin.js +2 -2
  40. package/server/plugins/palmCompletionPlugin.js +11 -12
  41. package/server/resolver.js +2 -2
  42. package/server/rest.js +1 -1
  43. package/tests/config.test.js +1 -1
  44. package/tests/mocks.js +5 -0
  45. package/tests/modelPlugin.test.js +3 -10
  46. package/tests/openAiChatPlugin.test.js +9 -8
  47. package/tests/openai_api.test.js +3 -3
  48. package/tests/palmChatPlugin.test.js +1 -1
  49. package/tests/palmCompletionPlugin.test.js +1 -1
  50. package/tests/pathwayResolver.test.js +2 -1
  51. package/tests/requestMonitor.test.js +94 -0
  52. package/tests/{requestDurationEstimator.test.js → requestMonitorDurationEstimator.test.js} +21 -17
  53. package/tests/truncateMessages.test.js +1 -1
  54. package/lib/request.js +0 -259
  55. package/lib/requestDurationEstimator.js +0 -90
  56. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/blobHandler.js +0 -0
  57. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/docHelper.js +0 -0
  58. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/function.json +0 -0
  59. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/helper.js +0 -0
  60. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/index.js +0 -0
  61. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/localFileHandler.js +0 -0
  62. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/redis.js +0 -0
  63. /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/start.js +0 -0
@@ -1,43 +1,146 @@
1
+ import { v4 as uuidv4 } from 'uuid';
2
+ // eslint-disable-next-line import/no-extraneous-dependencies
3
+ import { Deque } from '@datastructures-js/deque';
4
+
1
5
  class RequestMonitor {
2
- constructor() {
3
- this.callCount = 0;
6
+ constructor( callsToKeep = 10 ) {
7
+ this.callCount = new Deque();
8
+ this.peakCallRate = 0;
9
+ this.error429Count = new Deque();
10
+ this.errorCount = new Deque();
11
+ this.startTime = new Date();
12
+ this.callStartTimes = new Map();
13
+ this.callDurations = new Deque();
14
+ this.healthy = true;
15
+ this.ageOutTime = 5 * 60 * 1000; // 5 minutes
16
+ this.callsToKeep = callsToKeep;
17
+ }
18
+
19
+ get isHealthy() {
20
+ return this.healthy;
21
+ }
22
+
23
+ removeOldCallStats(dq, timeProperty) {
24
+ const currentTime = new Date();
25
+ while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front()) > this.ageOutTime) {
26
+ dq.popFront();
27
+ }
28
+ }
29
+
30
+ maintain() {
31
+ this.removeOldCallStats(this.callCount);
32
+ if (this.callCount.size() === 0) {
4
33
  this.peakCallRate = 0;
5
- this.error429Count = 0;
6
- this.startTime = new Date();
7
34
  }
35
+ this.removeOldCallStats(this.callDurations, 'endTime');
36
+ this.removeOldCallStats(this.error429Count);
37
+ this.removeOldCallStats(this.errorCount);
8
38
 
9
- incrementCallCount() {
10
- this.callCount++;
11
- if (this.getCallRate() > this.peakCallRate) {
12
- this.peakCallRate = this.getCallRate();
39
+ if (this.getErrorRate() > 0.3) {
40
+ this.healthy = false;
41
+ } else {
42
+ this.healthy = true;
43
+ }
44
+ }
45
+
46
+ startCall() {
47
+ const callId = uuidv4();
48
+ const currentTime = new Date();
49
+ this.callStartTimes.set(callId, currentTime);
50
+ this.callCount.pushBack(currentTime);
51
+ this.maintain();
52
+ return callId;
53
+ }
54
+
55
+ endCall(callId) {
56
+ const endTime = new Date();
57
+ const startTime = this.callStartTimes.get(callId);
58
+
59
+ if (startTime) {
60
+ this.callStartTimes.delete(callId);
61
+ const callDuration = endTime - startTime;
62
+ this.callDurations.pushBack({endTime, callDuration});
63
+
64
+ // Keep the callDurations length to 5
65
+ while (this.callDurations.size() > this.callsToKeep) {
66
+ this.callDurations.popFront();
13
67
  }
14
68
  }
15
-
16
- incrementError429Count() {
17
- this.error429Count++;
69
+
70
+ const callRate = this.getCallRate();
71
+ if (callRate > this.peakCallRate) {
72
+ this.peakCallRate = callRate;
18
73
  }
19
-
20
- getCallRate() {
21
- const currentTime = new Date();
22
- const timeElapsed = (currentTime - this.startTime) / 1000; // time elapsed in seconds
23
- return timeElapsed < 1 ? this.callCount : this.callCount / timeElapsed;
74
+
75
+ this.maintain();
76
+ }
77
+
78
+ getAverageCallDuration() {
79
+ this.maintain();
80
+ if (this.callDurations.size() === 0) {
81
+ return 0;
24
82
  }
83
+ const sum = this.callDurations.toArray().reduce((a, b) => a + b.callDuration, 0);
84
+ return sum / this.callDurations.size();
85
+ }
86
+
87
+ incrementError429Count() {
88
+ this.error429Count.pushBack(new Date());
89
+ this.maintain();
90
+ }
91
+
92
+ incrementErrorCount() {
93
+ this.errorCount.pushBack(new Date());
94
+ this.maintain();
95
+ }
96
+
97
+ getCallRate() {
98
+ this.maintain();
99
+ const currentTime = new Date();
100
+ const timeElapsed = (currentTime - this.callCount.front()) / 1000; // time elapsed in seconds]
101
+ return timeElapsed < 1 ? this.callCount.size() : this.callCount.size() / timeElapsed;
102
+ }
103
+
104
+ getPeakCallRate() {
105
+ this.maintain();
106
+ return this.peakCallRate;
107
+ }
25
108
 
26
- getPeakCallRate() {
27
- return this.peakCallRate;
109
+ getError429Rate() {
110
+ return this.callCount.size() ? this.error429Count.size() / this.callCount.size() : 0;
111
+ }
112
+
113
+ getErrorRate() {
114
+ return this.callCount.size() ? this.errorCount.size() / this.callCount.size() : 0;
115
+ }
116
+
117
+ calculatePercentComplete(callId) {
118
+ if (!this.callDurations.size()) {
119
+ return 0;
28
120
  }
29
121
 
30
- getError429Rate() {
31
- return this.error429Count / this.callCount;
32
- }
122
+ const currentTime = new Date();
123
+ const duration = currentTime - this.callStartTimes.get(callId);
124
+ const average = this.getAverageCallDuration();
125
+ let percentComplete = duration / average;
33
126
 
34
- reset() {
35
- this.callCount = 0;
36
- this.error429Count = 0;
37
- this.peakCallRate = 0;
38
- this.startTime = new Date();
127
+ if (percentComplete > 0.8) {
128
+ percentComplete = 0.8;
39
129
  }
130
+
131
+ return percentComplete;
132
+ }
133
+
134
+ reset() {
135
+ this.callCount.clear();
136
+ this.peakCallRate = 0;
137
+ this.error429Count.clear();
138
+ this.errorCount.clear();
139
+ this.startTime = new Date();
140
+ this.callStartTimes = new Map();
141
+ this.callDurations.clear();
142
+ this.healthy = true;
40
143
  }
144
+ }
41
145
 
42
- export default RequestMonitor;
43
-
146
+ export default RequestMonitor;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.1.3",
3
+ "version": "1.1.4",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -32,6 +32,7 @@
32
32
  "@apollo/server": "^4.7.3",
33
33
  "@apollo/server-plugin-response-cache": "^4.1.2",
34
34
  "@apollo/utils.keyvadapter": "^3.0.0",
35
+ "@datastructures-js/deque": "^1.0.4",
35
36
  "@graphql-tools/schema": "^9.0.12",
36
37
  "@keyv/redis": "^2.5.4",
37
38
  "axios": "^1.3.4",
@@ -17,12 +17,12 @@ export default {
17
17
 
18
18
  // Custom resolver to generate summaries by reprompting if they are too long or too short.
19
19
  resolver: async (parent, args, contextValue, _info) => {
20
- const { config, pathway, requestState } = contextValue;
20
+ const { config, pathway } = contextValue;
21
21
  const originalTargetLength = args.targetLength;
22
22
 
23
23
  // If targetLength is not provided, execute the prompt once and return the result.
24
24
  if (originalTargetLength === 0 || originalTargetLength === null) {
25
- let pathwayResolver = new PathwayResolver({ config, pathway, args, requestState });
25
+ let pathwayResolver = new PathwayResolver({ config, pathway, args });
26
26
  return await pathwayResolver.resolve(args);
27
27
  }
28
28
 
@@ -37,7 +37,7 @@ export default {
37
37
 
38
38
  const MAX_ITERATIONS = 5;
39
39
  let summary = '';
40
- let pathwayResolver = new PathwayResolver({ config, pathway, args, requestState });
40
+ let pathwayResolver = new PathwayResolver({ config, pathway, args });
41
41
 
42
42
  // Modify the prompt to be words-based instead of characters-based.
43
43
  pathwayResolver.pathwayPrompt = `Write a summary of all of the text below. If the text is in a language other than english, make sure the summary is written in the same language. Your summary should be ${targetWords} words in length.\n\nText:\n\n{{{text}}}\n\nSummary:\n\n`
package/server/graphql.js CHANGED
@@ -16,7 +16,7 @@ import cors from 'cors';
16
16
  import { KeyvAdapter } from '@apollo/utils.keyvadapter';
17
17
  import responseCachePlugin from '@apollo/server-plugin-response-cache';
18
18
  import subscriptions from './subscriptions.js';
19
- import { buildLimiters } from '../lib/request.js';
19
+ import { buildModelEndpoints } from '../lib/requestExecutor.js';
20
20
  import { cancelRequestResolver } from './resolver.js';
21
21
  import { buildPathways, buildModels } from '../config.js';
22
22
  import { requestState } from './requestState.js';
@@ -116,8 +116,8 @@ const build = async (config) => {
116
116
  await buildPathways(config);
117
117
  buildModels(config);
118
118
 
119
- // build api limiters
120
- buildLimiters(config);
119
+ // build model API endpoints and limiters
120
+ buildModelEndpoints(config);
121
121
 
122
122
  //build api
123
123
  const pathways = config.get('pathways');
@@ -176,8 +176,8 @@ const build = async (config) => {
176
176
  });
177
177
 
178
178
  // If CORTEX_API_KEY is set, we roll our own auth middleware - usually not used if you're being fronted by a proxy
179
- const cortexApiKey = config.get('cortexApiKey');
180
- if (cortexApiKey) {
179
+ const cortexApiKeys = config.get('cortexApiKeys');
180
+ if (cortexApiKeys && Array.isArray(cortexApiKeys)) {
181
181
  app.use((req, res, next) => {
182
182
  let providedApiKey = req.headers['cortex-api-key'] || req.query['cortex-api-key'];
183
183
  if (!providedApiKey) {
@@ -185,7 +185,7 @@ const build = async (config) => {
185
185
  providedApiKey = providedApiKey?.startsWith('Bearer ') ? providedApiKey.slice(7) : providedApiKey;
186
186
  }
187
187
 
188
- if (cortexApiKey && cortexApiKey !== providedApiKey) {
188
+ if (!cortexApiKeys.includes(providedApiKey)) {
189
189
  if (req.baseUrl === '/graphql' || req.headers['content-type'] === 'application/graphql') {
190
190
  res.status(401)
191
191
  .set('WWW-Authenticate', 'Cortex-Api-Key')
@@ -1,4 +1,6 @@
1
- // PathwayPrompter.js
1
+ // ModelExecutor.js
2
+ import CortexRequest from '../lib/cortexRequest.js';
3
+
2
4
  import OpenAIChatPlugin from './plugins/openAiChatPlugin.js';
3
5
  import OpenAICompletionPlugin from './plugins/openAiCompletionPlugin.js';
4
6
  import AzureTranslatePlugin from './plugins/azureTranslatePlugin.js';
@@ -16,59 +18,59 @@ import OpenAIImagePlugin from './plugins/openAiImagePlugin.js';
16
18
  import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
17
19
  import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
18
20
 
19
- class PathwayPrompter {
20
- constructor(config, pathway, modelName, model) {
21
+ class ModelExecutor {
22
+ constructor(pathway, model) {
21
23
 
22
24
  let plugin;
23
25
 
24
26
  switch (model.type) {
25
27
  case 'OPENAI-CHAT':
26
- plugin = new OpenAIChatPlugin(config, pathway, modelName, model);
28
+ plugin = new OpenAIChatPlugin(pathway, model);
27
29
  break;
28
30
  case 'OPENAI-DALLE2':
29
- plugin = new OpenAIImagePlugin(config, pathway, modelName, model);
31
+ plugin = new OpenAIImagePlugin(pathway, model);
30
32
  break;
31
33
  case 'OPENAI-DALLE3':
32
- plugin = new OpenAIDallE3Plugin(config, pathway, modelName, model);
34
+ plugin = new OpenAIDallE3Plugin(pathway, model);
33
35
  break;
34
36
  case 'OPENAI-CHAT-EXTENSION':
35
- plugin = new OpenAIChatExtensionPlugin(config, pathway, modelName, model);
37
+ plugin = new OpenAIChatExtensionPlugin(pathway, model);
36
38
  break;
37
39
  case 'AZURE-TRANSLATE':
38
- plugin = new AzureTranslatePlugin(config, pathway, modelName, model);
40
+ plugin = new AzureTranslatePlugin(pathway, model);
39
41
  break;
40
42
  case 'AZURE-COGNITIVE':
41
- plugin = new AzureCognitivePlugin(config, pathway, modelName, model);
43
+ plugin = new AzureCognitivePlugin(pathway, model);
42
44
  break;
43
45
  case 'OPENAI-EMBEDDINGS':
44
- plugin = new OpenAiEmbeddingsPlugin(config, pathway, modelName, model);
46
+ plugin = new OpenAiEmbeddingsPlugin(pathway, model);
45
47
  break;
46
48
  case 'OPENAI-COMPLETION':
47
- plugin = new OpenAICompletionPlugin(config, pathway, modelName, model);
49
+ plugin = new OpenAICompletionPlugin(pathway, model);
48
50
  break;
49
51
  case 'OPENAI-WHISPER':
50
- plugin = new OpenAIWhisperPlugin(config, pathway, modelName, model);
52
+ plugin = new OpenAIWhisperPlugin(pathway, model);
51
53
  break;
52
54
  case 'LOCAL-CPP-MODEL':
53
- plugin = new LocalModelPlugin(config, pathway, modelName, model);
55
+ plugin = new LocalModelPlugin(pathway, model);
54
56
  break;
55
57
  case 'PALM-CHAT':
56
- plugin = new PalmChatPlugin(config, pathway, modelName, model);
58
+ plugin = new PalmChatPlugin(pathway, model);
57
59
  break;
58
60
  case 'PALM-COMPLETION':
59
- plugin = new PalmCompletionPlugin(config, pathway, modelName, model);
61
+ plugin = new PalmCompletionPlugin(pathway, model);
60
62
  break;
61
63
  case 'PALM-CODE-COMPLETION':
62
- plugin = new PalmCodeCompletionPlugin(config, pathway, modelName, model);
64
+ plugin = new PalmCodeCompletionPlugin(pathway, model);
63
65
  break;
64
66
  case 'COHERE-GENERATE':
65
- plugin = new CohereGeneratePlugin(config, pathway, modelName, model);
67
+ plugin = new CohereGeneratePlugin(pathway, model);
66
68
  break;
67
69
  case 'COHERE-SUMMARIZE':
68
- plugin = new CohereSummarizePlugin(config, pathway, modelName, model);
70
+ plugin = new CohereSummarizePlugin(pathway, model);
69
71
  break;
70
72
  case 'OPENAI-VISION':
71
- plugin = new OpenAIVisionPlugin(config, pathway, modelName, model);
73
+ plugin = new OpenAIVisionPlugin(pathway, model);
72
74
  break;
73
75
  default:
74
76
  throw new Error(`Unsupported model type: ${model.type}`);
@@ -78,10 +80,11 @@ class PathwayPrompter {
78
80
  }
79
81
 
80
82
  async execute(text, parameters, prompt, pathwayResolver) {
81
- return await this.plugin.execute(text, parameters, prompt, pathwayResolver);
83
+ const cortexRequest = new CortexRequest({ pathwayResolver });
84
+ return await this.plugin.execute(text, parameters, prompt, cortexRequest);
82
85
  }
83
86
  }
84
87
 
85
88
  export {
86
- PathwayPrompter
89
+ ModelExecutor
87
90
  };
@@ -1,4 +1,5 @@
1
- import { PathwayPrompter } from './pathwayPrompter.js';
1
+ import { ModelExecutor } from './modelExecutor.js';
2
+ import { modelEndpoints } from '../lib/requestExecutor.js';
2
3
  // eslint-disable-next-line import/no-extraneous-dependencies
3
4
  import { v4 as uuidv4 } from 'uuid';
4
5
  import { encode } from 'gpt-3-encoder';
@@ -14,7 +15,9 @@ import logger from '../lib/logger.js';
14
15
  const modelTypesExcludedFromProgressUpdates = ['OPENAI-DALLE2', 'OPENAI-DALLE3'];
15
16
 
16
17
  class PathwayResolver {
17
- constructor({ config, pathway, args }) {
18
+ // Optional endpoints override parameter is for testing purposes
19
+ constructor({ config, pathway, args, endpoints }) {
20
+ this.endpoints = endpoints || modelEndpoints;
18
21
  this.config = config;
19
22
  this.pathway = pathway;
20
23
  this.args = args;
@@ -28,8 +31,8 @@ class PathwayResolver {
28
31
  args?.model,
29
32
  pathway.inputParameters?.model,
30
33
  config.get('defaultModelName')
31
- ].find(modelName => modelName && Object.prototype.hasOwnProperty.call(config.get('models'), modelName));
32
- this.model = config.get('models')[this.modelName];
34
+ ].find(modelName => modelName && Object.prototype.hasOwnProperty.call(this.endpoints, modelName));
35
+ this.model = this.endpoints[this.modelName];
33
36
 
34
37
  if (!this.model) {
35
38
  throw new Error(`Model ${this.modelName} not found in config`);
@@ -47,7 +50,7 @@ class PathwayResolver {
47
50
 
48
51
  this.previousResult = '';
49
52
  this.prompts = [];
50
- this.pathwayPrompter = new PathwayPrompter(this.config, this.pathway, this.modelName, this.model);
53
+ this.modelExecutor = new ModelExecutor(this.pathway, this.model);
51
54
 
52
55
  Object.defineProperty(this, 'pathwayPrompt', {
53
56
  get() {
@@ -255,7 +258,7 @@ class PathwayResolver {
255
258
  }
256
259
 
257
260
  truncate(str, n) {
258
- if (this.pathwayPrompter.plugin.promptParameters.truncateFromFront) {
261
+ if (this.modelExecutor.plugin.promptParameters.truncateFromFront) {
259
262
  return getFirstNToken(str, n);
260
263
  }
261
264
  return getLastNToken(str, n);
@@ -263,7 +266,7 @@ class PathwayResolver {
263
266
 
264
267
  async summarizeIfEnabled({ text, ...parameters }) {
265
268
  if (this.pathway.useInputSummarization) {
266
- return await callPathway(this.config, 'summary', { ...this.args, ...parameters, targetLength: 0});
269
+ return await callPathway('summary', { ...this.args, ...parameters, targetLength: 0});
267
270
  }
268
271
  return text;
269
272
  }
@@ -271,15 +274,15 @@ class PathwayResolver {
271
274
  // Calculate the maximum token length for a chunk
272
275
  getChunkMaxTokenLength() {
273
276
  // find the longest prompt
274
- const maxPromptTokenLength = Math.max(...this.prompts.map((promptData) => this.pathwayPrompter.plugin.getCompiledPrompt('', this.args, promptData).tokenLength));
277
+ const maxPromptTokenLength = Math.max(...this.prompts.map((promptData) => this.modelExecutor.plugin.getCompiledPrompt('', this.args, promptData).tokenLength));
275
278
 
276
279
  // find out if any prompts use both text input and previous result
277
280
  const hasBothProperties = this.prompts.some(prompt => prompt.usesTextInput && prompt.usesPreviousResult);
278
281
 
279
282
  // the token ratio is the ratio of the total prompt to the result text - both have to be included
280
283
  // in computing the max token length
281
- const promptRatio = this.pathwayPrompter.plugin.getPromptTokenRatio();
282
- let chunkMaxTokenLength = promptRatio * this.pathwayPrompter.plugin.getModelMaxTokenLength() - maxPromptTokenLength - 1;
284
+ const promptRatio = this.modelExecutor.plugin.getPromptTokenRatio();
285
+ let chunkMaxTokenLength = promptRatio * this.modelExecutor.plugin.getModelMaxTokenLength() - maxPromptTokenLength - 1;
283
286
 
284
287
  // if we have to deal with prompts that have both text input
285
288
  // and previous result, we need to split the maxChunkToken in half
@@ -386,20 +389,22 @@ class PathwayResolver {
386
389
 
387
390
  // If this text is empty, skip applying the prompt as it will likely be a nonsensical result
388
391
  if (!/^\s*$/.test(text) || parameters?.file || parameters?.inputVector || this?.modelName.includes('cognitive')) {
389
- result = await this.pathwayPrompter.execute(text, { ...parameters, ...this.savedContext }, prompt, this);
392
+ result = await this.modelExecutor.execute(text, { ...parameters, ...this.savedContext }, prompt, this);
390
393
  } else {
391
394
  result = text;
392
395
  }
393
396
 
394
397
  requestState[this.requestId].completedCount++;
395
398
 
396
- const { completedCount, totalCount } = requestState[this.requestId];
399
+ if (parameters.async) {
400
+ const { completedCount, totalCount } = requestState[this.requestId];
397
401
 
398
- if (completedCount < totalCount) {
399
- await publishRequestProgress({
400
- requestId: this.requestId,
401
- progress: completedCount / totalCount,
402
- });
402
+ if (completedCount < totalCount) {
403
+ await publishRequestProgress({
404
+ requestId: this.requestId,
405
+ progress: completedCount / totalCount,
406
+ });
407
+ }
403
408
  }
404
409
 
405
410
  if (prompt.saveResultTo) {
@@ -4,7 +4,7 @@ import ModelPlugin from './modelPlugin.js';
4
4
  import { v4 as uuidv4 } from 'uuid';
5
5
  import path from 'path';
6
6
  import { config } from '../../config.js';
7
- import { axios } from '../../lib/request.js';
7
+ import { axios } from '../../lib/requestExecutor.js';
8
8
  import logger from '../../lib/logger.js';
9
9
 
10
10
  const API_URL = config.get('whisperMediaApiUrl');
@@ -14,8 +14,8 @@ const TOP = 1000;
14
14
  let DIRECT_FILE_EXTENSIONS = [".txt", ".json", ".csv", ".md", ".xml", ".js", ".html", ".css"];
15
15
 
16
16
  class AzureCognitivePlugin extends ModelPlugin {
17
- constructor(config, pathway, modelName, model) {
18
- super(config, pathway, modelName, model);
17
+ constructor(pathway, model) {
18
+ super(pathway, model);
19
19
  }
20
20
 
21
21
  async getInputVector (text) {
@@ -23,14 +23,14 @@ class AzureCognitivePlugin extends ModelPlugin {
23
23
  if(!text || !text.trim()){
24
24
  return;
25
25
  }
26
- return JSON.parse(await callPathway(this.config, 'embeddings', { text }))[0];
26
+ return JSON.parse(await callPathway('embeddings', { text }))[0];
27
27
  }catch(err){
28
28
  logger.error(`Error in calculating input vector for text: ${text}, error: ${err}`);
29
29
  }
30
30
  }
31
31
 
32
32
  // Set up parameters specific to the Azure Cognitive API
33
- async getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, {headers, requestId, pathway, _url}) {
33
+ async getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest) {
34
34
  const combinedParameters = { ...this.promptParameters, ...parameters };
35
35
  const { modelPromptText } = this.getCompiledPrompt(text, combinedParameters, prompt);
36
36
  const { inputVector, calculateInputVector, privateData, filter, docId } = combinedParameters;
@@ -44,13 +44,15 @@ class AzureCognitivePlugin extends ModelPlugin {
44
44
  searchQuery += ` AND docId:'${docId}'`;
45
45
  }
46
46
 
47
- const docsToDelete = JSON.parse(await this.executeRequest(searchUrl,
48
- { search: searchQuery,
49
- "searchMode": "all",
50
- "queryType": "full",
51
- select: 'id', top: TOP
52
- },
53
- {}, headers, prompt, requestId, pathway));
47
+ cortexRequest.url = searchUrl;
48
+ cortexRequest.data =
49
+ { search: searchQuery,
50
+ "searchMode": "all",
51
+ "queryType": "full",
52
+ select: 'id', top: TOP
53
+ };
54
+
55
+ const docsToDelete = JSON.parse(await this.executeRequest(cortexRequest));
54
56
 
55
57
  const value = docsToDelete.value.map(({id}) => ({
56
58
  id,
@@ -144,13 +146,13 @@ class AzureCognitivePlugin extends ModelPlugin {
144
146
  }
145
147
 
146
148
  // Execute the request to the Azure Cognitive API
147
- async execute(text, parameters, prompt, pathwayResolver) {
148
- const { requestId, pathway, savedContextId, savedContext } = pathwayResolver;
149
+ async execute(text, parameters, prompt, cortexRequest) {
150
+ const { requestId, savedContextId, savedContext } = cortexRequest.pathwayResolver;
149
151
  const mode = this.promptParameters.mode || 'search';
150
152
  let url = this.ensureMode(this.requestUrl(text), mode == 'delete' ? 'index' : mode);
151
153
  const indexName = parameters.indexName || 'indexcortex';
152
154
  url = this.ensureIndex(url, indexName);
153
- const headers = this.model.headers;
155
+ const headers = cortexRequest.headers;
154
156
 
155
157
  const { file } = parameters;
156
158
  if(file){
@@ -175,15 +177,14 @@ class AzureCognitivePlugin extends ModelPlugin {
175
177
  throw Error(`No data can be extracted out of file!`);
176
178
  }
177
179
 
178
- //return await this.execute(data, {...parameters, file:null}, prompt, pathwayResolver);
179
- return await callPathway(this.config, 'cognitive_insert', {...parameters, file:null, text:data });
180
+ return await callPathway('cognitive_insert', {...parameters, file:null, text:data });
180
181
  }
181
182
 
182
183
  if (mode === 'index' && (!text || !text.trim()) ){
183
184
  return; // nothing to index
184
185
  }
185
186
 
186
- const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, {headers, requestId, pathway, url});
187
+ const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest);
187
188
 
188
189
  // update contextid last used
189
190
  savedContext["lastUsed"] = new Date().toISOString();
@@ -193,11 +194,15 @@ class AzureCognitivePlugin extends ModelPlugin {
193
194
  }
194
195
 
195
196
  // execute the request
196
- const result = await this.executeRequest(url, data || {}, params || {}, headers || {}, prompt, requestId, pathway);
197
+ cortexRequest.url = url;
198
+ cortexRequest.data = data;
199
+ cortexRequest.params = params;
200
+ cortexRequest.headers = headers;
201
+ const result = await this.executeRequest(cortexRequest);
197
202
 
198
203
  // if still has more to delete
199
204
  if (mode === 'delete' && data?.value?.length == TOP) {
200
- return await this.execute(text, parameters, prompt, pathwayResolver);
205
+ return await this.execute(text, parameters, prompt, cortexRequest);
201
206
  }
202
207
 
203
208
  return result;
@@ -3,8 +3,8 @@ import ModelPlugin from './modelPlugin.js';
3
3
  import logger from '../../lib/logger.js';
4
4
 
5
5
  class AzureTranslatePlugin extends ModelPlugin {
6
- constructor(config, pathway, modelName, model) {
7
- super(config, pathway, modelName, model);
6
+ constructor(pathway, model) {
7
+ super(pathway, model);
8
8
  }
9
9
 
10
10
  // Set up parameters specific to the Azure Translate API
@@ -25,17 +25,13 @@ class AzureTranslatePlugin extends ModelPlugin {
25
25
  }
26
26
 
27
27
  // Execute the request to the Azure Translate API
28
- async execute(text, parameters, prompt, pathwayResolver) {
28
+ async execute(text, parameters, prompt, cortexRequest) {
29
29
  const requestParameters = this.getRequestParameters(text, parameters, prompt);
30
- const { requestId, pathway} = pathwayResolver;
31
30
 
32
- const url = this.requestUrl(text);
31
+ cortexRequest.data = requestParameters.data;
32
+ cortexRequest.params = requestParameters.params;
33
33
 
34
- const data = requestParameters.data;
35
- const params = requestParameters.params;
36
- const headers = this.model.headers || {};
37
-
38
- return this.executeRequest(url, data, params, headers, prompt, requestId, pathway);
34
+ return this.executeRequest(cortexRequest);
39
35
  }
40
36
 
41
37
  // Parse the response from the Azure Translate API
@@ -2,8 +2,8 @@
2
2
  import ModelPlugin from './modelPlugin.js';
3
3
 
4
4
  class CohereGeneratePlugin extends ModelPlugin {
5
- constructor(config, pathway, modelName, model) {
6
- super(config, pathway, modelName, model);
5
+ constructor(pathway, model) {
6
+ super(pathway, model);
7
7
  }
8
8
 
9
9
  // Set up parameters specific to the Cohere API
@@ -33,17 +33,10 @@ class CohereGeneratePlugin extends ModelPlugin {
33
33
  }
34
34
 
35
35
  // Execute the request to the Cohere API
36
- async execute(text, parameters, prompt, pathwayResolver) {
37
- const url = this.requestUrl();
36
+ async execute(text, parameters, prompt, cortexRequest) {
38
37
  const requestParameters = this.getRequestParameters(text, parameters, prompt);
39
- const { requestId, pathway} = pathwayResolver;
40
-
41
- const data = { ...(this.model.params || {}), ...requestParameters };
42
- const params = {};
43
- const headers = {
44
- ...this.model.headers || {}
45
- };
46
- return this.executeRequest(url, data, params, headers, prompt, requestId, pathway);
38
+ cortexRequest.data = { ...cortexRequest.data, ...requestParameters };
39
+ return this.executeRequest(cortexRequest);
47
40
  }
48
41
 
49
42
  // Parse the response from the Cohere API