@aj-archipelago/cortex 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -1
- package/graphql/plugins/azureTranslatePlugin.js +0 -10
- package/graphql/plugins/modelPlugin.js +16 -2
- package/graphql/plugins/openAiChatPlugin.js +0 -16
- package/graphql/plugins/openAiCompletionPlugin.js +0 -15
- package/graphql/plugins/openAiWhisperPlugin.js +35 -23
- package/lib/fileChunker.js +4 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -81,7 +81,31 @@ To add a new pathway to Cortex, you create a new JavaScript file and define the
|
|
|
81
81
|
### Prompt
|
|
82
82
|
When you define a new pathway, you need to at least specify a prompt that will be passed to the model for processing. In the simplest case, a prompt is really just a string, but the prompt is polymorphic - it can be a string or an object that contains information for the model API that you wish to call. Prompts can also be an array of strings or an array of objects for sequential operations. In this way Cortex aims to support the most simple to advanced prompting scenarios.
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
```js
|
|
85
|
+
// a prompt can be a string
|
|
86
|
+
prompt: `{{{text}}}\nCopy the names of all people and places exactly from this document in the language above:\n`
|
|
87
|
+
|
|
88
|
+
// or an array of strings
|
|
89
|
+
prompt: [
|
|
90
|
+
`{{{text}}}\nCopy the names of all people and places exactly from this document in the language above:\n`,
|
|
91
|
+
`Original Language:\n{{{previousResult}}}\n\n{{to}}:\n`,
|
|
92
|
+
`Entities in the document:\n\n{{{previousResult}}}\n\nDocument:\n{{{text}}}\nRewrite the document in {{to}}. If the document is already in {{to}}, copy it exactly below:\n`
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
// or an array of one or more Prompt objects
|
|
96
|
+
// as you can see below a Prompt object can also have a messages array, which is how you can
|
|
97
|
+
// express your prompts for chat-style interfaces
|
|
98
|
+
prompt: [
|
|
99
|
+
new Prompt({ messages: [
|
|
100
|
+
{"role": "system", "content": "Assistant is a highly skilled multilingual translator for a prestigious news agency. When the user posts any text in any language, assistant will create a translation of that text in {{to}}. Assistant will produce only the translation and no additional notes or commentary."},
|
|
101
|
+
{"role": "user", "content": "{{{text}}}"}
|
|
102
|
+
]}),
|
|
103
|
+
]
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
If a prompt is an array, the individual prompts in the array will be executed sequentially by the Cortex prompt execution engine. The execution engine deals with all of the complexities of chunking input content and executing the sequence of prompts against those chunks in a way that optimizes the performance and ensures the the integrity of the pathway logic.
|
|
107
|
+
|
|
108
|
+
If you look closely at the examples above, you'll notice embedded parameters like `{{text}}`. In Cortex, all prompt strings are actually [Handlebars](https://handlebarsjs.com/) templates. So in this case, that parameter will be replaced before prompt execution with the incoming query variable called `text`. You can refer to almost any pathway parameter or system property in the prompt definition and it will be replaced before execution.
|
|
85
109
|
### Parameters
|
|
86
110
|
Pathways support an arbitrary number of input parameters. These are defined in the pathway like this:
|
|
87
111
|
```js
|
|
@@ -1,20 +1,10 @@
|
|
|
1
1
|
// AzureTranslatePlugin.js
|
|
2
2
|
const ModelPlugin = require('./modelPlugin');
|
|
3
|
-
const handlebars = require("handlebars");
|
|
4
|
-
const { encode } = require("gpt-3-encoder");
|
|
5
3
|
|
|
6
4
|
class AzureTranslatePlugin extends ModelPlugin {
|
|
7
5
|
constructor(config, pathway) {
|
|
8
6
|
super(config, pathway);
|
|
9
7
|
}
|
|
10
|
-
|
|
11
|
-
getCompiledPrompt(text, parameters, prompt) {
|
|
12
|
-
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
13
|
-
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
14
|
-
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
15
|
-
|
|
16
|
-
return { modelPromptText, tokenLength: encode(modelPromptText).length };
|
|
17
|
-
}
|
|
18
8
|
|
|
19
9
|
// Set up parameters specific to the Azure Translate API
|
|
20
10
|
getRequestParameters(text, parameters, prompt) {
|
|
@@ -73,6 +73,20 @@ class ModelPlugin {
|
|
|
73
73
|
return output;
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
getCompiledPrompt(text, parameters, prompt) {
|
|
77
|
+
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
78
|
+
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
79
|
+
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
80
|
+
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
81
|
+
const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
|
|
82
|
+
|
|
83
|
+
if (modelPromptMessagesML) {
|
|
84
|
+
return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
|
|
85
|
+
} else {
|
|
86
|
+
return { modelPromptText, tokenLength: encode(modelPromptText).length };
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
76
90
|
getModelMaxTokenLength() {
|
|
77
91
|
return (this.promptParameters.maxTokenLength ?? this.model.maxTokenLength ?? DEFAULT_MAX_TOKENS);
|
|
78
92
|
}
|
|
@@ -161,7 +175,7 @@ class ModelPlugin {
|
|
|
161
175
|
|
|
162
176
|
const modelInput = data.prompt || (data.messages && data.messages[0].content) || (data.length > 0 && data[0].Text) || null;
|
|
163
177
|
|
|
164
|
-
if (data.messages && data.messages.length > 1) {
|
|
178
|
+
if (data && data.messages && data.messages.length > 1) {
|
|
165
179
|
data.messages.forEach((message, index) => {
|
|
166
180
|
const words = message.content.split(" ");
|
|
167
181
|
const tokenCount = encode(message.content).length;
|
|
@@ -175,7 +189,7 @@ class ModelPlugin {
|
|
|
175
189
|
|
|
176
190
|
console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
|
|
177
191
|
|
|
178
|
-
prompt.debugInfo += `${separator}${JSON.stringify(data)}
|
|
192
|
+
prompt && prompt.debugInfo && (prompt.debugInfo += `${separator}${JSON.stringify(data)}`);
|
|
179
193
|
}
|
|
180
194
|
|
|
181
195
|
async executeRequest(url, data, params, headers, prompt) {
|
|
@@ -1,27 +1,11 @@
|
|
|
1
1
|
// OpenAIChatPlugin.js
|
|
2
2
|
const ModelPlugin = require('./modelPlugin');
|
|
3
|
-
const handlebars = require("handlebars");
|
|
4
|
-
const { encode } = require("gpt-3-encoder");
|
|
5
3
|
|
|
6
4
|
class OpenAIChatPlugin extends ModelPlugin {
|
|
7
5
|
constructor(config, pathway) {
|
|
8
6
|
super(config, pathway);
|
|
9
7
|
}
|
|
10
8
|
|
|
11
|
-
getCompiledPrompt(text, parameters, prompt) {
|
|
12
|
-
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
13
|
-
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
14
|
-
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
15
|
-
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
16
|
-
const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
|
|
17
|
-
|
|
18
|
-
if (modelPromptMessagesML) {
|
|
19
|
-
return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
|
|
20
|
-
} else {
|
|
21
|
-
return { modelPromptText, tokenLength: encode(modelPromptText).length };
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
9
|
// Set up parameters specific to the OpenAI Chat API
|
|
26
10
|
getRequestParameters(text, parameters, prompt) {
|
|
27
11
|
const { modelPromptText, modelPromptMessages, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// OpenAICompletionPlugin.js
|
|
2
2
|
const ModelPlugin = require('./modelPlugin');
|
|
3
|
-
const handlebars = require("handlebars");
|
|
4
3
|
const { encode } = require("gpt-3-encoder");
|
|
5
4
|
|
|
6
5
|
class OpenAICompletionPlugin extends ModelPlugin {
|
|
@@ -8,20 +7,6 @@ class OpenAICompletionPlugin extends ModelPlugin {
|
|
|
8
7
|
super(config, pathway);
|
|
9
8
|
}
|
|
10
9
|
|
|
11
|
-
getCompiledPrompt(text, parameters, prompt) {
|
|
12
|
-
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
13
|
-
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
14
|
-
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
15
|
-
const modelPromptMessages = this.getModelPromptMessages(modelPrompt, combinedParameters, text);
|
|
16
|
-
const modelPromptMessagesML = this.messagesToChatML(modelPromptMessages);
|
|
17
|
-
|
|
18
|
-
if (modelPromptMessagesML) {
|
|
19
|
-
return { modelPromptMessages, tokenLength: encode(modelPromptMessagesML).length };
|
|
20
|
-
} else {
|
|
21
|
-
return { modelPromptText, tokenLength: encode(modelPromptText).length };
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
10
|
// Set up parameters specific to the OpenAI Completion API
|
|
26
11
|
getRequestParameters(text, parameters, prompt) {
|
|
27
12
|
let { modelPromptMessages, modelPromptText, tokenLength } = this.getCompiledPrompt(text, parameters, prompt);
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
// OpenAICompletionPlugin.js
|
|
2
2
|
const ModelPlugin = require('./modelPlugin');
|
|
3
|
-
const handlebars = require("handlebars");
|
|
4
|
-
const { encode } = require("gpt-3-encoder");
|
|
5
3
|
const FormData = require('form-data');
|
|
6
4
|
const fs = require('fs');
|
|
7
5
|
const { splitMediaFile, isValidYoutubeUrl, processYoutubeUrl, deleteTempPath } = require('../../lib/fileChunker');
|
|
@@ -12,14 +10,6 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
12
10
|
super(config, pathway);
|
|
13
11
|
}
|
|
14
12
|
|
|
15
|
-
getCompiledPrompt(text, parameters, prompt) {
|
|
16
|
-
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
17
|
-
const modelPrompt = this.getModelPrompt(prompt, parameters);
|
|
18
|
-
const modelPromptText = modelPrompt.prompt ? handlebars.compile(modelPrompt.prompt)({ ...combinedParameters, text }) : '';
|
|
19
|
-
|
|
20
|
-
return { modelPromptText, tokenLength: encode(modelPromptText).length };
|
|
21
|
-
}
|
|
22
|
-
|
|
23
13
|
// Execute the request to the OpenAI Whisper API
|
|
24
14
|
async execute(text, parameters, prompt, pathwayResolver) {
|
|
25
15
|
const url = this.requestUrl(text);
|
|
@@ -41,29 +31,51 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
41
31
|
}
|
|
42
32
|
}
|
|
43
33
|
|
|
44
|
-
let result
|
|
34
|
+
let result = ``;
|
|
45
35
|
let { file } = parameters;
|
|
46
36
|
let folder;
|
|
47
37
|
const isYoutubeUrl = isValidYoutubeUrl(file);
|
|
38
|
+
let totalCount = 0;
|
|
39
|
+
let completedCount = 0;
|
|
40
|
+
const { requestId } = pathwayResolver;
|
|
48
41
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
file = await processYoutubeUrl(file);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
const mediaSplit = await splitMediaFile(file);
|
|
55
|
-
|
|
56
|
-
const { requestId } = pathwayResolver;
|
|
42
|
+
const sendProgress = () => {
|
|
43
|
+
completedCount++;
|
|
57
44
|
pubsub.publish('REQUEST_PROGRESS', {
|
|
58
45
|
requestProgress: {
|
|
59
46
|
requestId,
|
|
60
|
-
progress:
|
|
47
|
+
progress: completedCount / totalCount,
|
|
61
48
|
data: null,
|
|
62
49
|
}
|
|
63
50
|
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
if (isYoutubeUrl) {
|
|
55
|
+
// totalCount += 1; // extra 1 step for youtube download
|
|
56
|
+
file = await processYoutubeUrl(file);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(file);
|
|
60
|
+
folder = uniqueOutputPath;
|
|
61
|
+
totalCount += chunkPromises.length * 2; // 2 steps for each chunk (download and upload)
|
|
62
|
+
// isYoutubeUrl && sendProgress(); // send progress for youtube download after total count is calculated
|
|
63
|
+
|
|
64
|
+
// sequential download of chunks
|
|
65
|
+
const chunks = [];
|
|
66
|
+
for (const chunkPromise of chunkPromises) {
|
|
67
|
+
sendProgress();
|
|
68
|
+
chunks.push(await chunkPromise);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// sequential processing of chunks
|
|
72
|
+
for (const chunk of chunks) {
|
|
73
|
+
result += await processChunk(chunk);
|
|
74
|
+
sendProgress();
|
|
75
|
+
}
|
|
64
76
|
|
|
65
|
-
|
|
66
|
-
result = await Promise.all(mediaSplit.chunks.map(processChunk));
|
|
77
|
+
// parallel processing, dropped
|
|
78
|
+
// result = await Promise.all(mediaSplit.chunks.map(processChunk));
|
|
67
79
|
|
|
68
80
|
} catch (error) {
|
|
69
81
|
console.error("An error occurred:", error);
|
|
@@ -71,7 +83,7 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
71
83
|
isYoutubeUrl && (await deleteTempPath(file));
|
|
72
84
|
folder && (await deleteTempPath(folder));
|
|
73
85
|
}
|
|
74
|
-
return result
|
|
86
|
+
return result;
|
|
75
87
|
}
|
|
76
88
|
}
|
|
77
89
|
|
package/lib/fileChunker.js
CHANGED
|
@@ -70,9 +70,10 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
|
|
|
70
70
|
chunkPromises.push(chunkPromise);
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
const chunkedFiles = await Promise.all(chunkPromises);
|
|
74
|
-
console.log('All chunks processed. Chunked file names:', chunkedFiles);
|
|
75
|
-
return { chunks: chunkedFiles, folder: uniqueOutputPath }
|
|
73
|
+
// const chunkedFiles = await Promise.all(chunkPromises);
|
|
74
|
+
// console.log('All chunks processed. Chunked file names:', chunkedFiles);
|
|
75
|
+
// return { chunks: chunkedFiles, folder: uniqueOutputPath }
|
|
76
|
+
return { chunkPromises, uniqueOutputPath }
|
|
76
77
|
} catch (err) {
|
|
77
78
|
console.error('Error occurred during the splitting process:', err);
|
|
78
79
|
}
|
package/package.json
CHANGED