@aj-archipelago/cortex 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +3 -3
- package/README.md +17 -4
- package/config.js +45 -9
- package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/Dockerfile +1 -1
- package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/fileChunker.js +4 -1
- package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/package-lock.json +25 -216
- package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/package.json +2 -2
- package/helper-apps/cortex-whisper-wrapper/.dockerignore +27 -0
- package/helper-apps/cortex-whisper-wrapper/Dockerfile +32 -0
- package/helper-apps/cortex-whisper-wrapper/app.py +104 -0
- package/helper-apps/cortex-whisper-wrapper/docker-compose.debug.yml +12 -0
- package/helper-apps/cortex-whisper-wrapper/docker-compose.yml +10 -0
- package/helper-apps/cortex-whisper-wrapper/models/.gitkeep +0 -0
- package/helper-apps/cortex-whisper-wrapper/requirements.txt +5 -0
- package/lib/cortexRequest.js +117 -0
- package/lib/pathwayTools.js +2 -1
- package/lib/redisSubscription.js +2 -2
- package/lib/requestExecutor.js +360 -0
- package/lib/requestMonitor.js +131 -28
- package/package.json +2 -1
- package/pathways/summary.js +3 -3
- package/server/graphql.js +6 -6
- package/server/{pathwayPrompter.js → modelExecutor.js} +24 -21
- package/server/pathwayResolver.js +22 -17
- package/server/plugins/azureCognitivePlugin.js +25 -20
- package/server/plugins/azureTranslatePlugin.js +6 -10
- package/server/plugins/cohereGeneratePlugin.js +5 -12
- package/server/plugins/cohereSummarizePlugin.js +5 -12
- package/server/plugins/localModelPlugin.js +3 -3
- package/server/plugins/modelPlugin.js +18 -12
- package/server/plugins/openAiChatExtensionPlugin.js +5 -5
- package/server/plugins/openAiChatPlugin.js +8 -10
- package/server/plugins/openAiCompletionPlugin.js +9 -12
- package/server/plugins/openAiDallE3Plugin.js +14 -31
- package/server/plugins/openAiEmbeddingsPlugin.js +6 -9
- package/server/plugins/openAiImagePlugin.js +19 -15
- package/server/plugins/openAiWhisperPlugin.js +168 -100
- package/server/plugins/palmChatPlugin.js +9 -10
- package/server/plugins/palmCodeCompletionPlugin.js +2 -2
- package/server/plugins/palmCompletionPlugin.js +11 -12
- package/server/resolver.js +2 -2
- package/server/rest.js +1 -1
- package/tests/config.test.js +1 -1
- package/tests/mocks.js +5 -0
- package/tests/modelPlugin.test.js +3 -10
- package/tests/openAiChatPlugin.test.js +9 -8
- package/tests/openai_api.test.js +3 -3
- package/tests/palmChatPlugin.test.js +1 -1
- package/tests/palmCompletionPlugin.test.js +1 -1
- package/tests/pathwayResolver.test.js +2 -1
- package/tests/requestMonitor.test.js +94 -0
- package/tests/{requestDurationEstimator.test.js → requestMonitorDurationEstimator.test.js} +21 -17
- package/tests/truncateMessages.test.js +1 -1
- package/lib/request.js +0 -259
- package/lib/requestDurationEstimator.js +0 -90
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/blobHandler.js +0 -0
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/docHelper.js +0 -0
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/function.json +0 -0
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/helper.js +0 -0
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/index.js +0 -0
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/localFileHandler.js +0 -0
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/redis.js +0 -0
- /package/{helper_apps/CortexFileHandler → helper-apps/cortex-file-handler}/start.js +0 -0
package/lib/requestMonitor.js
CHANGED
|
@@ -1,43 +1,146 @@
|
|
|
1
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
2
|
+
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
3
|
+
import { Deque } from '@datastructures-js/deque';
|
|
4
|
+
|
|
1
5
|
class RequestMonitor {
|
|
2
|
-
|
|
3
|
-
|
|
6
|
+
constructor( callsToKeep = 10 ) {
|
|
7
|
+
this.callCount = new Deque();
|
|
8
|
+
this.peakCallRate = 0;
|
|
9
|
+
this.error429Count = new Deque();
|
|
10
|
+
this.errorCount = new Deque();
|
|
11
|
+
this.startTime = new Date();
|
|
12
|
+
this.callStartTimes = new Map();
|
|
13
|
+
this.callDurations = new Deque();
|
|
14
|
+
this.healthy = true;
|
|
15
|
+
this.ageOutTime = 5 * 60 * 1000; // 5 minutes
|
|
16
|
+
this.callsToKeep = callsToKeep;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
get isHealthy() {
|
|
20
|
+
return this.healthy;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
removeOldCallStats(dq, timeProperty) {
|
|
24
|
+
const currentTime = new Date();
|
|
25
|
+
while (!dq.isEmpty() && currentTime - (timeProperty ? dq.front()[timeProperty] : dq.front()) > this.ageOutTime) {
|
|
26
|
+
dq.popFront();
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
maintain() {
|
|
31
|
+
this.removeOldCallStats(this.callCount);
|
|
32
|
+
if (this.callCount.size() === 0) {
|
|
4
33
|
this.peakCallRate = 0;
|
|
5
|
-
this.error429Count = 0;
|
|
6
|
-
this.startTime = new Date();
|
|
7
34
|
}
|
|
35
|
+
this.removeOldCallStats(this.callDurations, 'endTime');
|
|
36
|
+
this.removeOldCallStats(this.error429Count);
|
|
37
|
+
this.removeOldCallStats(this.errorCount);
|
|
8
38
|
|
|
9
|
-
|
|
10
|
-
this.
|
|
11
|
-
|
|
12
|
-
|
|
39
|
+
if (this.getErrorRate() > 0.3) {
|
|
40
|
+
this.healthy = false;
|
|
41
|
+
} else {
|
|
42
|
+
this.healthy = true;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
startCall() {
|
|
47
|
+
const callId = uuidv4();
|
|
48
|
+
const currentTime = new Date();
|
|
49
|
+
this.callStartTimes.set(callId, currentTime);
|
|
50
|
+
this.callCount.pushBack(currentTime);
|
|
51
|
+
this.maintain();
|
|
52
|
+
return callId;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
endCall(callId) {
|
|
56
|
+
const endTime = new Date();
|
|
57
|
+
const startTime = this.callStartTimes.get(callId);
|
|
58
|
+
|
|
59
|
+
if (startTime) {
|
|
60
|
+
this.callStartTimes.delete(callId);
|
|
61
|
+
const callDuration = endTime - startTime;
|
|
62
|
+
this.callDurations.pushBack({endTime, callDuration});
|
|
63
|
+
|
|
64
|
+
// Keep the callDurations length to 5
|
|
65
|
+
while (this.callDurations.size() > this.callsToKeep) {
|
|
66
|
+
this.callDurations.popFront();
|
|
13
67
|
}
|
|
14
68
|
}
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
69
|
+
|
|
70
|
+
const callRate = this.getCallRate();
|
|
71
|
+
if (callRate > this.peakCallRate) {
|
|
72
|
+
this.peakCallRate = callRate;
|
|
18
73
|
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
74
|
+
|
|
75
|
+
this.maintain();
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
getAverageCallDuration() {
|
|
79
|
+
this.maintain();
|
|
80
|
+
if (this.callDurations.size() === 0) {
|
|
81
|
+
return 0;
|
|
24
82
|
}
|
|
83
|
+
const sum = this.callDurations.toArray().reduce((a, b) => a + b.callDuration, 0);
|
|
84
|
+
return sum / this.callDurations.size();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
incrementError429Count() {
|
|
88
|
+
this.error429Count.pushBack(new Date());
|
|
89
|
+
this.maintain();
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
incrementErrorCount() {
|
|
93
|
+
this.errorCount.pushBack(new Date());
|
|
94
|
+
this.maintain();
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
getCallRate() {
|
|
98
|
+
this.maintain();
|
|
99
|
+
const currentTime = new Date();
|
|
100
|
+
const timeElapsed = (currentTime - this.callCount.front()) / 1000; // time elapsed in seconds]
|
|
101
|
+
return timeElapsed < 1 ? this.callCount.size() : this.callCount.size() / timeElapsed;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
getPeakCallRate() {
|
|
105
|
+
this.maintain();
|
|
106
|
+
return this.peakCallRate;
|
|
107
|
+
}
|
|
25
108
|
|
|
26
|
-
|
|
27
|
-
|
|
109
|
+
getError429Rate() {
|
|
110
|
+
return this.callCount.size() ? this.error429Count.size() / this.callCount.size() : 0;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
getErrorRate() {
|
|
114
|
+
return this.callCount.size() ? this.errorCount.size() / this.callCount.size() : 0;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
calculatePercentComplete(callId) {
|
|
118
|
+
if (!this.callDurations.size()) {
|
|
119
|
+
return 0;
|
|
28
120
|
}
|
|
29
121
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
122
|
+
const currentTime = new Date();
|
|
123
|
+
const duration = currentTime - this.callStartTimes.get(callId);
|
|
124
|
+
const average = this.getAverageCallDuration();
|
|
125
|
+
let percentComplete = duration / average;
|
|
33
126
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
this.error429Count = 0;
|
|
37
|
-
this.peakCallRate = 0;
|
|
38
|
-
this.startTime = new Date();
|
|
127
|
+
if (percentComplete > 0.8) {
|
|
128
|
+
percentComplete = 0.8;
|
|
39
129
|
}
|
|
130
|
+
|
|
131
|
+
return percentComplete;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
reset() {
|
|
135
|
+
this.callCount.clear();
|
|
136
|
+
this.peakCallRate = 0;
|
|
137
|
+
this.error429Count.clear();
|
|
138
|
+
this.errorCount.clear();
|
|
139
|
+
this.startTime = new Date();
|
|
140
|
+
this.callStartTimes = new Map();
|
|
141
|
+
this.callDurations.clear();
|
|
142
|
+
this.healthy = true;
|
|
40
143
|
}
|
|
144
|
+
}
|
|
41
145
|
|
|
42
|
-
export default RequestMonitor;
|
|
43
|
-
|
|
146
|
+
export default RequestMonitor;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.4",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -32,6 +32,7 @@
|
|
|
32
32
|
"@apollo/server": "^4.7.3",
|
|
33
33
|
"@apollo/server-plugin-response-cache": "^4.1.2",
|
|
34
34
|
"@apollo/utils.keyvadapter": "^3.0.0",
|
|
35
|
+
"@datastructures-js/deque": "^1.0.4",
|
|
35
36
|
"@graphql-tools/schema": "^9.0.12",
|
|
36
37
|
"@keyv/redis": "^2.5.4",
|
|
37
38
|
"axios": "^1.3.4",
|
package/pathways/summary.js
CHANGED
|
@@ -17,12 +17,12 @@ export default {
|
|
|
17
17
|
|
|
18
18
|
// Custom resolver to generate summaries by reprompting if they are too long or too short.
|
|
19
19
|
resolver: async (parent, args, contextValue, _info) => {
|
|
20
|
-
const { config, pathway
|
|
20
|
+
const { config, pathway } = contextValue;
|
|
21
21
|
const originalTargetLength = args.targetLength;
|
|
22
22
|
|
|
23
23
|
// If targetLength is not provided, execute the prompt once and return the result.
|
|
24
24
|
if (originalTargetLength === 0 || originalTargetLength === null) {
|
|
25
|
-
let pathwayResolver = new PathwayResolver({ config, pathway, args
|
|
25
|
+
let pathwayResolver = new PathwayResolver({ config, pathway, args });
|
|
26
26
|
return await pathwayResolver.resolve(args);
|
|
27
27
|
}
|
|
28
28
|
|
|
@@ -37,7 +37,7 @@ export default {
|
|
|
37
37
|
|
|
38
38
|
const MAX_ITERATIONS = 5;
|
|
39
39
|
let summary = '';
|
|
40
|
-
let pathwayResolver = new PathwayResolver({ config, pathway, args
|
|
40
|
+
let pathwayResolver = new PathwayResolver({ config, pathway, args });
|
|
41
41
|
|
|
42
42
|
// Modify the prompt to be words-based instead of characters-based.
|
|
43
43
|
pathwayResolver.pathwayPrompt = `Write a summary of all of the text below. If the text is in a language other than english, make sure the summary is written in the same language. Your summary should be ${targetWords} words in length.\n\nText:\n\n{{{text}}}\n\nSummary:\n\n`
|
package/server/graphql.js
CHANGED
|
@@ -16,7 +16,7 @@ import cors from 'cors';
|
|
|
16
16
|
import { KeyvAdapter } from '@apollo/utils.keyvadapter';
|
|
17
17
|
import responseCachePlugin from '@apollo/server-plugin-response-cache';
|
|
18
18
|
import subscriptions from './subscriptions.js';
|
|
19
|
-
import {
|
|
19
|
+
import { buildModelEndpoints } from '../lib/requestExecutor.js';
|
|
20
20
|
import { cancelRequestResolver } from './resolver.js';
|
|
21
21
|
import { buildPathways, buildModels } from '../config.js';
|
|
22
22
|
import { requestState } from './requestState.js';
|
|
@@ -116,8 +116,8 @@ const build = async (config) => {
|
|
|
116
116
|
await buildPathways(config);
|
|
117
117
|
buildModels(config);
|
|
118
118
|
|
|
119
|
-
// build
|
|
120
|
-
|
|
119
|
+
// build model API endpoints and limiters
|
|
120
|
+
buildModelEndpoints(config);
|
|
121
121
|
|
|
122
122
|
//build api
|
|
123
123
|
const pathways = config.get('pathways');
|
|
@@ -176,8 +176,8 @@ const build = async (config) => {
|
|
|
176
176
|
});
|
|
177
177
|
|
|
178
178
|
// If CORTEX_API_KEY is set, we roll our own auth middleware - usually not used if you're being fronted by a proxy
|
|
179
|
-
const
|
|
180
|
-
if (
|
|
179
|
+
const cortexApiKeys = config.get('cortexApiKeys');
|
|
180
|
+
if (cortexApiKeys && Array.isArray(cortexApiKeys)) {
|
|
181
181
|
app.use((req, res, next) => {
|
|
182
182
|
let providedApiKey = req.headers['cortex-api-key'] || req.query['cortex-api-key'];
|
|
183
183
|
if (!providedApiKey) {
|
|
@@ -185,7 +185,7 @@ const build = async (config) => {
|
|
|
185
185
|
providedApiKey = providedApiKey?.startsWith('Bearer ') ? providedApiKey.slice(7) : providedApiKey;
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
-
if (
|
|
188
|
+
if (!cortexApiKeys.includes(providedApiKey)) {
|
|
189
189
|
if (req.baseUrl === '/graphql' || req.headers['content-type'] === 'application/graphql') {
|
|
190
190
|
res.status(401)
|
|
191
191
|
.set('WWW-Authenticate', 'Cortex-Api-Key')
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
//
|
|
1
|
+
// ModelExecutor.js
|
|
2
|
+
import CortexRequest from '../lib/cortexRequest.js';
|
|
3
|
+
|
|
2
4
|
import OpenAIChatPlugin from './plugins/openAiChatPlugin.js';
|
|
3
5
|
import OpenAICompletionPlugin from './plugins/openAiCompletionPlugin.js';
|
|
4
6
|
import AzureTranslatePlugin from './plugins/azureTranslatePlugin.js';
|
|
@@ -16,59 +18,59 @@ import OpenAIImagePlugin from './plugins/openAiImagePlugin.js';
|
|
|
16
18
|
import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
|
|
17
19
|
import OpenAIVisionPlugin from './plugins/openAiVisionPlugin.js';
|
|
18
20
|
|
|
19
|
-
class
|
|
20
|
-
constructor(
|
|
21
|
+
class ModelExecutor {
|
|
22
|
+
constructor(pathway, model) {
|
|
21
23
|
|
|
22
24
|
let plugin;
|
|
23
25
|
|
|
24
26
|
switch (model.type) {
|
|
25
27
|
case 'OPENAI-CHAT':
|
|
26
|
-
plugin = new OpenAIChatPlugin(
|
|
28
|
+
plugin = new OpenAIChatPlugin(pathway, model);
|
|
27
29
|
break;
|
|
28
30
|
case 'OPENAI-DALLE2':
|
|
29
|
-
plugin = new OpenAIImagePlugin(
|
|
31
|
+
plugin = new OpenAIImagePlugin(pathway, model);
|
|
30
32
|
break;
|
|
31
33
|
case 'OPENAI-DALLE3':
|
|
32
|
-
plugin = new OpenAIDallE3Plugin(
|
|
34
|
+
plugin = new OpenAIDallE3Plugin(pathway, model);
|
|
33
35
|
break;
|
|
34
36
|
case 'OPENAI-CHAT-EXTENSION':
|
|
35
|
-
plugin = new OpenAIChatExtensionPlugin(
|
|
37
|
+
plugin = new OpenAIChatExtensionPlugin(pathway, model);
|
|
36
38
|
break;
|
|
37
39
|
case 'AZURE-TRANSLATE':
|
|
38
|
-
plugin = new AzureTranslatePlugin(
|
|
40
|
+
plugin = new AzureTranslatePlugin(pathway, model);
|
|
39
41
|
break;
|
|
40
42
|
case 'AZURE-COGNITIVE':
|
|
41
|
-
plugin = new AzureCognitivePlugin(
|
|
43
|
+
plugin = new AzureCognitivePlugin(pathway, model);
|
|
42
44
|
break;
|
|
43
45
|
case 'OPENAI-EMBEDDINGS':
|
|
44
|
-
plugin = new OpenAiEmbeddingsPlugin(
|
|
46
|
+
plugin = new OpenAiEmbeddingsPlugin(pathway, model);
|
|
45
47
|
break;
|
|
46
48
|
case 'OPENAI-COMPLETION':
|
|
47
|
-
plugin = new OpenAICompletionPlugin(
|
|
49
|
+
plugin = new OpenAICompletionPlugin(pathway, model);
|
|
48
50
|
break;
|
|
49
51
|
case 'OPENAI-WHISPER':
|
|
50
|
-
plugin = new OpenAIWhisperPlugin(
|
|
52
|
+
plugin = new OpenAIWhisperPlugin(pathway, model);
|
|
51
53
|
break;
|
|
52
54
|
case 'LOCAL-CPP-MODEL':
|
|
53
|
-
plugin = new LocalModelPlugin(
|
|
55
|
+
plugin = new LocalModelPlugin(pathway, model);
|
|
54
56
|
break;
|
|
55
57
|
case 'PALM-CHAT':
|
|
56
|
-
plugin = new PalmChatPlugin(
|
|
58
|
+
plugin = new PalmChatPlugin(pathway, model);
|
|
57
59
|
break;
|
|
58
60
|
case 'PALM-COMPLETION':
|
|
59
|
-
plugin = new PalmCompletionPlugin(
|
|
61
|
+
plugin = new PalmCompletionPlugin(pathway, model);
|
|
60
62
|
break;
|
|
61
63
|
case 'PALM-CODE-COMPLETION':
|
|
62
|
-
plugin = new PalmCodeCompletionPlugin(
|
|
64
|
+
plugin = new PalmCodeCompletionPlugin(pathway, model);
|
|
63
65
|
break;
|
|
64
66
|
case 'COHERE-GENERATE':
|
|
65
|
-
plugin = new CohereGeneratePlugin(
|
|
67
|
+
plugin = new CohereGeneratePlugin(pathway, model);
|
|
66
68
|
break;
|
|
67
69
|
case 'COHERE-SUMMARIZE':
|
|
68
|
-
plugin = new CohereSummarizePlugin(
|
|
70
|
+
plugin = new CohereSummarizePlugin(pathway, model);
|
|
69
71
|
break;
|
|
70
72
|
case 'OPENAI-VISION':
|
|
71
|
-
plugin = new OpenAIVisionPlugin(
|
|
73
|
+
plugin = new OpenAIVisionPlugin(pathway, model);
|
|
72
74
|
break;
|
|
73
75
|
default:
|
|
74
76
|
throw new Error(`Unsupported model type: ${model.type}`);
|
|
@@ -78,10 +80,11 @@ class PathwayPrompter {
|
|
|
78
80
|
}
|
|
79
81
|
|
|
80
82
|
async execute(text, parameters, prompt, pathwayResolver) {
|
|
81
|
-
|
|
83
|
+
const cortexRequest = new CortexRequest({ pathwayResolver });
|
|
84
|
+
return await this.plugin.execute(text, parameters, prompt, cortexRequest);
|
|
82
85
|
}
|
|
83
86
|
}
|
|
84
87
|
|
|
85
88
|
export {
|
|
86
|
-
|
|
89
|
+
ModelExecutor
|
|
87
90
|
};
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ModelExecutor } from './modelExecutor.js';
|
|
2
|
+
import { modelEndpoints } from '../lib/requestExecutor.js';
|
|
2
3
|
// eslint-disable-next-line import/no-extraneous-dependencies
|
|
3
4
|
import { v4 as uuidv4 } from 'uuid';
|
|
4
5
|
import { encode } from 'gpt-3-encoder';
|
|
@@ -14,7 +15,9 @@ import logger from '../lib/logger.js';
|
|
|
14
15
|
const modelTypesExcludedFromProgressUpdates = ['OPENAI-DALLE2', 'OPENAI-DALLE3'];
|
|
15
16
|
|
|
16
17
|
class PathwayResolver {
|
|
17
|
-
|
|
18
|
+
// Optional endpoints override parameter is for testing purposes
|
|
19
|
+
constructor({ config, pathway, args, endpoints }) {
|
|
20
|
+
this.endpoints = endpoints || modelEndpoints;
|
|
18
21
|
this.config = config;
|
|
19
22
|
this.pathway = pathway;
|
|
20
23
|
this.args = args;
|
|
@@ -28,8 +31,8 @@ class PathwayResolver {
|
|
|
28
31
|
args?.model,
|
|
29
32
|
pathway.inputParameters?.model,
|
|
30
33
|
config.get('defaultModelName')
|
|
31
|
-
].find(modelName => modelName && Object.prototype.hasOwnProperty.call(
|
|
32
|
-
this.model =
|
|
34
|
+
].find(modelName => modelName && Object.prototype.hasOwnProperty.call(this.endpoints, modelName));
|
|
35
|
+
this.model = this.endpoints[this.modelName];
|
|
33
36
|
|
|
34
37
|
if (!this.model) {
|
|
35
38
|
throw new Error(`Model ${this.modelName} not found in config`);
|
|
@@ -47,7 +50,7 @@ class PathwayResolver {
|
|
|
47
50
|
|
|
48
51
|
this.previousResult = '';
|
|
49
52
|
this.prompts = [];
|
|
50
|
-
this.
|
|
53
|
+
this.modelExecutor = new ModelExecutor(this.pathway, this.model);
|
|
51
54
|
|
|
52
55
|
Object.defineProperty(this, 'pathwayPrompt', {
|
|
53
56
|
get() {
|
|
@@ -255,7 +258,7 @@ class PathwayResolver {
|
|
|
255
258
|
}
|
|
256
259
|
|
|
257
260
|
truncate(str, n) {
|
|
258
|
-
if (this.
|
|
261
|
+
if (this.modelExecutor.plugin.promptParameters.truncateFromFront) {
|
|
259
262
|
return getFirstNToken(str, n);
|
|
260
263
|
}
|
|
261
264
|
return getLastNToken(str, n);
|
|
@@ -263,7 +266,7 @@ class PathwayResolver {
|
|
|
263
266
|
|
|
264
267
|
async summarizeIfEnabled({ text, ...parameters }) {
|
|
265
268
|
if (this.pathway.useInputSummarization) {
|
|
266
|
-
return await callPathway(
|
|
269
|
+
return await callPathway('summary', { ...this.args, ...parameters, targetLength: 0});
|
|
267
270
|
}
|
|
268
271
|
return text;
|
|
269
272
|
}
|
|
@@ -271,15 +274,15 @@ class PathwayResolver {
|
|
|
271
274
|
// Calculate the maximum token length for a chunk
|
|
272
275
|
getChunkMaxTokenLength() {
|
|
273
276
|
// find the longest prompt
|
|
274
|
-
const maxPromptTokenLength = Math.max(...this.prompts.map((promptData) => this.
|
|
277
|
+
const maxPromptTokenLength = Math.max(...this.prompts.map((promptData) => this.modelExecutor.plugin.getCompiledPrompt('', this.args, promptData).tokenLength));
|
|
275
278
|
|
|
276
279
|
// find out if any prompts use both text input and previous result
|
|
277
280
|
const hasBothProperties = this.prompts.some(prompt => prompt.usesTextInput && prompt.usesPreviousResult);
|
|
278
281
|
|
|
279
282
|
// the token ratio is the ratio of the total prompt to the result text - both have to be included
|
|
280
283
|
// in computing the max token length
|
|
281
|
-
const promptRatio = this.
|
|
282
|
-
let chunkMaxTokenLength = promptRatio * this.
|
|
284
|
+
const promptRatio = this.modelExecutor.plugin.getPromptTokenRatio();
|
|
285
|
+
let chunkMaxTokenLength = promptRatio * this.modelExecutor.plugin.getModelMaxTokenLength() - maxPromptTokenLength - 1;
|
|
283
286
|
|
|
284
287
|
// if we have to deal with prompts that have both text input
|
|
285
288
|
// and previous result, we need to split the maxChunkToken in half
|
|
@@ -386,20 +389,22 @@ class PathwayResolver {
|
|
|
386
389
|
|
|
387
390
|
// If this text is empty, skip applying the prompt as it will likely be a nonsensical result
|
|
388
391
|
if (!/^\s*$/.test(text) || parameters?.file || parameters?.inputVector || this?.modelName.includes('cognitive')) {
|
|
389
|
-
result = await this.
|
|
392
|
+
result = await this.modelExecutor.execute(text, { ...parameters, ...this.savedContext }, prompt, this);
|
|
390
393
|
} else {
|
|
391
394
|
result = text;
|
|
392
395
|
}
|
|
393
396
|
|
|
394
397
|
requestState[this.requestId].completedCount++;
|
|
395
398
|
|
|
396
|
-
|
|
399
|
+
if (parameters.async) {
|
|
400
|
+
const { completedCount, totalCount } = requestState[this.requestId];
|
|
397
401
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
402
|
+
if (completedCount < totalCount) {
|
|
403
|
+
await publishRequestProgress({
|
|
404
|
+
requestId: this.requestId,
|
|
405
|
+
progress: completedCount / totalCount,
|
|
406
|
+
});
|
|
407
|
+
}
|
|
403
408
|
}
|
|
404
409
|
|
|
405
410
|
if (prompt.saveResultTo) {
|
|
@@ -4,7 +4,7 @@ import ModelPlugin from './modelPlugin.js';
|
|
|
4
4
|
import { v4 as uuidv4 } from 'uuid';
|
|
5
5
|
import path from 'path';
|
|
6
6
|
import { config } from '../../config.js';
|
|
7
|
-
import { axios } from '../../lib/
|
|
7
|
+
import { axios } from '../../lib/requestExecutor.js';
|
|
8
8
|
import logger from '../../lib/logger.js';
|
|
9
9
|
|
|
10
10
|
const API_URL = config.get('whisperMediaApiUrl');
|
|
@@ -14,8 +14,8 @@ const TOP = 1000;
|
|
|
14
14
|
let DIRECT_FILE_EXTENSIONS = [".txt", ".json", ".csv", ".md", ".xml", ".js", ".html", ".css"];
|
|
15
15
|
|
|
16
16
|
class AzureCognitivePlugin extends ModelPlugin {
|
|
17
|
-
constructor(
|
|
18
|
-
super(
|
|
17
|
+
constructor(pathway, model) {
|
|
18
|
+
super(pathway, model);
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
async getInputVector (text) {
|
|
@@ -23,14 +23,14 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
23
23
|
if(!text || !text.trim()){
|
|
24
24
|
return;
|
|
25
25
|
}
|
|
26
|
-
return JSON.parse(await callPathway(
|
|
26
|
+
return JSON.parse(await callPathway('embeddings', { text }))[0];
|
|
27
27
|
}catch(err){
|
|
28
28
|
logger.error(`Error in calculating input vector for text: ${text}, error: ${err}`);
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
// Set up parameters specific to the Azure Cognitive API
|
|
33
|
-
async getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId,
|
|
33
|
+
async getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest) {
|
|
34
34
|
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
35
35
|
const { modelPromptText } = this.getCompiledPrompt(text, combinedParameters, prompt);
|
|
36
36
|
const { inputVector, calculateInputVector, privateData, filter, docId } = combinedParameters;
|
|
@@ -44,13 +44,15 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
44
44
|
searchQuery += ` AND docId:'${docId}'`;
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
47
|
+
cortexRequest.url = searchUrl;
|
|
48
|
+
cortexRequest.data =
|
|
49
|
+
{ search: searchQuery,
|
|
50
|
+
"searchMode": "all",
|
|
51
|
+
"queryType": "full",
|
|
52
|
+
select: 'id', top: TOP
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const docsToDelete = JSON.parse(await this.executeRequest(cortexRequest));
|
|
54
56
|
|
|
55
57
|
const value = docsToDelete.value.map(({id}) => ({
|
|
56
58
|
id,
|
|
@@ -144,13 +146,13 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
144
146
|
}
|
|
145
147
|
|
|
146
148
|
// Execute the request to the Azure Cognitive API
|
|
147
|
-
async execute(text, parameters, prompt,
|
|
148
|
-
const { requestId,
|
|
149
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
150
|
+
const { requestId, savedContextId, savedContext } = cortexRequest.pathwayResolver;
|
|
149
151
|
const mode = this.promptParameters.mode || 'search';
|
|
150
152
|
let url = this.ensureMode(this.requestUrl(text), mode == 'delete' ? 'index' : mode);
|
|
151
153
|
const indexName = parameters.indexName || 'indexcortex';
|
|
152
154
|
url = this.ensureIndex(url, indexName);
|
|
153
|
-
const headers =
|
|
155
|
+
const headers = cortexRequest.headers;
|
|
154
156
|
|
|
155
157
|
const { file } = parameters;
|
|
156
158
|
if(file){
|
|
@@ -175,15 +177,14 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
175
177
|
throw Error(`No data can be extracted out of file!`);
|
|
176
178
|
}
|
|
177
179
|
|
|
178
|
-
|
|
179
|
-
return await callPathway(this.config, 'cognitive_insert', {...parameters, file:null, text:data });
|
|
180
|
+
return await callPathway('cognitive_insert', {...parameters, file:null, text:data });
|
|
180
181
|
}
|
|
181
182
|
|
|
182
183
|
if (mode === 'index' && (!text || !text.trim()) ){
|
|
183
184
|
return; // nothing to index
|
|
184
185
|
}
|
|
185
186
|
|
|
186
|
-
const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId,
|
|
187
|
+
const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, cortexRequest);
|
|
187
188
|
|
|
188
189
|
// update contextid last used
|
|
189
190
|
savedContext["lastUsed"] = new Date().toISOString();
|
|
@@ -193,11 +194,15 @@ class AzureCognitivePlugin extends ModelPlugin {
|
|
|
193
194
|
}
|
|
194
195
|
|
|
195
196
|
// execute the request
|
|
196
|
-
|
|
197
|
+
cortexRequest.url = url;
|
|
198
|
+
cortexRequest.data = data;
|
|
199
|
+
cortexRequest.params = params;
|
|
200
|
+
cortexRequest.headers = headers;
|
|
201
|
+
const result = await this.executeRequest(cortexRequest);
|
|
197
202
|
|
|
198
203
|
// if still has more to delete
|
|
199
204
|
if (mode === 'delete' && data?.value?.length == TOP) {
|
|
200
|
-
return await this.execute(text, parameters, prompt,
|
|
205
|
+
return await this.execute(text, parameters, prompt, cortexRequest);
|
|
201
206
|
}
|
|
202
207
|
|
|
203
208
|
return result;
|
|
@@ -3,8 +3,8 @@ import ModelPlugin from './modelPlugin.js';
|
|
|
3
3
|
import logger from '../../lib/logger.js';
|
|
4
4
|
|
|
5
5
|
class AzureTranslatePlugin extends ModelPlugin {
|
|
6
|
-
constructor(
|
|
7
|
-
super(
|
|
6
|
+
constructor(pathway, model) {
|
|
7
|
+
super(pathway, model);
|
|
8
8
|
}
|
|
9
9
|
|
|
10
10
|
// Set up parameters specific to the Azure Translate API
|
|
@@ -25,17 +25,13 @@ class AzureTranslatePlugin extends ModelPlugin {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
// Execute the request to the Azure Translate API
|
|
28
|
-
async execute(text, parameters, prompt,
|
|
28
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
29
29
|
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
30
|
-
const { requestId, pathway} = pathwayResolver;
|
|
31
30
|
|
|
32
|
-
|
|
31
|
+
cortexRequest.data = requestParameters.data;
|
|
32
|
+
cortexRequest.params = requestParameters.params;
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
const params = requestParameters.params;
|
|
36
|
-
const headers = this.model.headers || {};
|
|
37
|
-
|
|
38
|
-
return this.executeRequest(url, data, params, headers, prompt, requestId, pathway);
|
|
34
|
+
return this.executeRequest(cortexRequest);
|
|
39
35
|
}
|
|
40
36
|
|
|
41
37
|
// Parse the response from the Azure Translate API
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
3
|
|
|
4
4
|
class CohereGeneratePlugin extends ModelPlugin {
|
|
5
|
-
constructor(
|
|
6
|
-
super(
|
|
5
|
+
constructor(pathway, model) {
|
|
6
|
+
super(pathway, model);
|
|
7
7
|
}
|
|
8
8
|
|
|
9
9
|
// Set up parameters specific to the Cohere API
|
|
@@ -33,17 +33,10 @@ class CohereGeneratePlugin extends ModelPlugin {
|
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
// Execute the request to the Cohere API
|
|
36
|
-
async execute(text, parameters, prompt,
|
|
37
|
-
const url = this.requestUrl();
|
|
36
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
38
37
|
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
const data = { ...(this.model.params || {}), ...requestParameters };
|
|
42
|
-
const params = {};
|
|
43
|
-
const headers = {
|
|
44
|
-
...this.model.headers || {}
|
|
45
|
-
};
|
|
46
|
-
return this.executeRequest(url, data, params, headers, prompt, requestId, pathway);
|
|
38
|
+
cortexRequest.data = { ...cortexRequest.data, ...requestParameters };
|
|
39
|
+
return this.executeRequest(cortexRequest);
|
|
47
40
|
}
|
|
48
41
|
|
|
49
42
|
// Parse the response from the Cohere API
|