@aj-archipelago/cortex 1.0.12 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +21 -1
- package/helper_apps/MediaFileChunker/docHelper.js +94 -0
- package/helper_apps/MediaFileChunker/fileChunker.js +1 -1
- package/helper_apps/MediaFileChunker/index.js +44 -31
- package/helper_apps/MediaFileChunker/package-lock.json +1647 -17
- package/helper_apps/MediaFileChunker/package.json +5 -1
- package/helper_apps/MediaFileChunker/start.js +2 -1
- package/lib/request.js +3 -3
- package/package.json +1 -1
- package/pathways/cognitive_delete.js +10 -0
- package/pathways/cognitive_insert.js +14 -0
- package/pathways/cognitive_search.js +13 -0
- package/pathways/embeddings.js +14 -0
- package/pathways/index.js +8 -0
- package/pathways/language.js +10 -0
- package/server/pathwayPrompter.js +8 -0
- package/server/pathwayResolver.js +1 -1
- package/server/plugins/azureCognitivePlugin.js +143 -0
- package/server/plugins/openAiEmbeddingsPlugin.js +38 -0
|
@@ -13,11 +13,15 @@
|
|
|
13
13
|
"@ffprobe-installer/ffprobe": "^2.0.0",
|
|
14
14
|
"axios": "^1.3.6",
|
|
15
15
|
"busboy": "^1.6.0",
|
|
16
|
+
"cors": "^2.8.5",
|
|
16
17
|
"express": "^4.18.2",
|
|
17
18
|
"fluent-ffmpeg": "^2.1.2",
|
|
18
19
|
"ioredis": "^5.3.1",
|
|
20
|
+
"mammoth": "^1.6.0",
|
|
21
|
+
"pdfjs-dist": "^3.9.179",
|
|
19
22
|
"public-ip": "^6.0.1",
|
|
20
23
|
"uuid": "^9.0.0",
|
|
21
|
-
"ytdl-core": "
|
|
24
|
+
"ytdl-core": "git+ssh://git@github.com:khlevon/node-ytdl-core.git#v4.11.4-patch.2",
|
|
25
|
+
"xlsx": "^0.18.5"
|
|
22
26
|
}
|
|
23
27
|
}
|
|
@@ -2,6 +2,7 @@ import MediaFileChunker from "./index.js";
|
|
|
2
2
|
import express from "express";
|
|
3
3
|
import { fileURLToPath } from 'url';
|
|
4
4
|
import { dirname, join } from 'path';
|
|
5
|
+
import cors from 'cors';
|
|
5
6
|
|
|
6
7
|
import { publicIpv4 } from 'public-ip';
|
|
7
8
|
const ipAddress = await publicIpv4();
|
|
@@ -10,7 +11,7 @@ const app = express();
|
|
|
10
11
|
const port = process.env.PORT || 7071;
|
|
11
12
|
const publicFolder = join(dirname(fileURLToPath(import.meta.url)), 'files');
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
app.use(cors());
|
|
14
15
|
// Serve static files from the public folder
|
|
15
16
|
app.use('/files', express.static(publicFolder));
|
|
16
17
|
|
package/lib/request.js
CHANGED
|
@@ -70,16 +70,16 @@ const postRequest = async ({ url, data, params, headers, cache }, model, request
|
|
|
70
70
|
let promises = [];
|
|
71
71
|
for (let i = 0; i < MAX_RETRY; i++) {
|
|
72
72
|
const modelProperties = config.get('models')[model];
|
|
73
|
-
const enableDuplicateRequests = pathway
|
|
73
|
+
const enableDuplicateRequests = pathway?.enableDuplicateRequests !== undefined ? pathway.enableDuplicateRequests : config.get('enableDuplicateRequests');
|
|
74
74
|
let maxDuplicateRequests = enableDuplicateRequests ? MAX_DUPLICATE_REQUESTS : 1;
|
|
75
|
-
let duplicateRequestAfter = (pathway
|
|
75
|
+
let duplicateRequestAfter = (pathway?.duplicateRequestAfter || DUPLICATE_REQUEST_AFTER) * 1000;
|
|
76
76
|
|
|
77
77
|
if (enableDuplicateRequests) {
|
|
78
78
|
//console.log(`>>> [${requestId}] Duplicate requests enabled after ${duplicateRequestAfter / 1000} seconds`);
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
const axiosConfigObj = { params, headers, cache };
|
|
82
|
-
const streamRequested = (params
|
|
82
|
+
const streamRequested = (params?.stream || data?.stream);
|
|
83
83
|
if (streamRequested && modelProperties.supportsStreaming) {
|
|
84
84
|
axiosConfigObj.responseType = 'stream';
|
|
85
85
|
promises.push(limiters[model].schedule(() => postWithMonitor(model, url, data, axiosConfigObj)));
|
package/package.json
CHANGED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
prompt: `{{text}}`,
|
|
3
|
+
model: 'azure-cognitive',
|
|
4
|
+
inputParameters: {
|
|
5
|
+
inputVector: ``,
|
|
6
|
+
file: ``,
|
|
7
|
+
privateData: true,
|
|
8
|
+
docId: ``,
|
|
9
|
+
},
|
|
10
|
+
mode: 'index', // 'index' or 'search',
|
|
11
|
+
inputChunkSize: 500,
|
|
12
|
+
enableDuplicateRequests: false,
|
|
13
|
+
timeout: 300,
|
|
14
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// embeddings.js
|
|
2
|
+
// Embeddings module that returns the embeddings for the text.
|
|
3
|
+
|
|
4
|
+
export default {
|
|
5
|
+
prompt: `{{text}}`,
|
|
6
|
+
model: 'oai-embeddings',
|
|
7
|
+
enableCache: true,
|
|
8
|
+
inputParameters: {
|
|
9
|
+
input: [],
|
|
10
|
+
},
|
|
11
|
+
enableDuplicateRequests: false,
|
|
12
|
+
timeout: 300,
|
|
13
|
+
};
|
|
14
|
+
|
package/pathways/index.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import edit from './edit.js';
|
|
2
2
|
import chat from './chat.js';
|
|
3
3
|
import bias from './bias.js';
|
|
4
|
+
import cognitive_delete from './cognitive_delete.js';
|
|
5
|
+
import cognitive_insert from './cognitive_insert.js';
|
|
6
|
+
import cognitive_search from './cognitive_search.js';
|
|
4
7
|
import complete from './complete.js';
|
|
5
8
|
import entities from './entities.js';
|
|
6
9
|
import paraphrase from './paraphrase.js';
|
|
@@ -13,12 +16,17 @@ import test_langchain from './test_langchain.mjs';
|
|
|
13
16
|
import test_palm_chat from './test_palm_chat.js';
|
|
14
17
|
import transcribe from './transcribe.js';
|
|
15
18
|
import translate from './translate.js';
|
|
19
|
+
import embeddings from './embeddings.js';
|
|
16
20
|
|
|
17
21
|
export {
|
|
18
22
|
edit,
|
|
19
23
|
chat,
|
|
20
24
|
bias,
|
|
25
|
+
cognitive_delete,
|
|
26
|
+
cognitive_insert,
|
|
27
|
+
cognitive_search,
|
|
21
28
|
complete,
|
|
29
|
+
embeddings,
|
|
22
30
|
entities,
|
|
23
31
|
paraphrase,
|
|
24
32
|
sentiment,
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// language.js
|
|
2
|
+
// Analyze the language of a given text and return the language code.
|
|
3
|
+
|
|
4
|
+
export default {
|
|
5
|
+
// Uncomment the following line to enable caching for this prompt, if desired.
|
|
6
|
+
enableCache: true,
|
|
7
|
+
temperature: 0,
|
|
8
|
+
|
|
9
|
+
prompt: `{{text}}\n\nPick one language that best represents what the text above is written in. Please return the ISO 639-1 two letter language code:\n`
|
|
10
|
+
};
|
|
@@ -10,6 +10,8 @@ import PalmCompletionPlugin from './plugins/palmCompletionPlugin.js';
|
|
|
10
10
|
import PalmCodeCompletionPlugin from './plugins/palmCodeCompletionPlugin.js';
|
|
11
11
|
import CohereGeneratePlugin from './plugins/cohereGeneratePlugin.js';
|
|
12
12
|
import CohereSummarizePlugin from './plugins/cohereSummarizePlugin.js';
|
|
13
|
+
import AzureCognitivePlugin from './plugins/azureCognitivePlugin.js';
|
|
14
|
+
import OpenAiEmbeddingsPlugin from './plugins/openAiEmbeddingsPlugin.js';
|
|
13
15
|
|
|
14
16
|
class PathwayPrompter {
|
|
15
17
|
constructor(config, pathway, modelName, model) {
|
|
@@ -26,6 +28,12 @@ class PathwayPrompter {
|
|
|
26
28
|
case 'AZURE-TRANSLATE':
|
|
27
29
|
plugin = new AzureTranslatePlugin(config, pathway, modelName, model);
|
|
28
30
|
break;
|
|
31
|
+
case 'AZURE-COGNITIVE':
|
|
32
|
+
plugin = new AzureCognitivePlugin(config, pathway, modelName, model);
|
|
33
|
+
break;
|
|
34
|
+
case 'OPENAI-EMBEDDINGS':
|
|
35
|
+
plugin = new OpenAiEmbeddingsPlugin(config, pathway, modelName, model);
|
|
36
|
+
break;
|
|
29
37
|
case 'OPENAI-COMPLETION':
|
|
30
38
|
plugin = new OpenAICompletionPlugin(config, pathway, modelName, model);
|
|
31
39
|
break;
|
|
@@ -382,7 +382,7 @@ class PathwayResolver {
|
|
|
382
382
|
let result = '';
|
|
383
383
|
|
|
384
384
|
// If this text is empty, skip applying the prompt as it will likely be a nonsensical result
|
|
385
|
-
if (!/^\s*$/.test(text) || parameters?.file) {
|
|
385
|
+
if (!/^\s*$/.test(text) || parameters?.file || parameters?.inputVector || this?.modelName.includes('cognitive')) {
|
|
386
386
|
result = await this.pathwayPrompter.execute(text, { ...parameters, ...this.savedContext }, prompt, this);
|
|
387
387
|
} else {
|
|
388
388
|
result = text;
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
// Azure Cognitive Services plugin for the server
|
|
2
|
+
import { callPathway } from '../../lib/pathwayTools.js';
|
|
3
|
+
import ModelPlugin from './modelPlugin.js';
|
|
4
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
5
|
+
|
|
6
|
+
const TOP = 1000;
|
|
7
|
+
|
|
8
|
+
class AzureCognitivePlugin extends ModelPlugin {
|
|
9
|
+
constructor(config, pathway, modelName, model) {
|
|
10
|
+
super(config, pathway, modelName, model);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
// Set up parameters specific to the Azure Cognitive API
|
|
14
|
+
async getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, {headers, requestId, pathway, url}) {
|
|
15
|
+
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
16
|
+
const { modelPromptText } = this.getCompiledPrompt(text, combinedParameters, prompt);
|
|
17
|
+
const { inputVector, filter, docId } = combinedParameters;
|
|
18
|
+
const data = {};
|
|
19
|
+
|
|
20
|
+
if (mode == 'delete') {
|
|
21
|
+
const searchUrl = this.ensureMode(this.requestUrl(text), 'search');
|
|
22
|
+
let searchQuery = `owner:${savedContextId}`;
|
|
23
|
+
|
|
24
|
+
if (docId) {
|
|
25
|
+
searchQuery += ` AND docId:'${docId}'`;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const docsToDelete = JSON.parse(await this.executeRequest(searchUrl,
|
|
29
|
+
{ search: searchQuery,
|
|
30
|
+
"searchMode": "all",
|
|
31
|
+
"queryType": "full",
|
|
32
|
+
select: 'id', top: TOP
|
|
33
|
+
},
|
|
34
|
+
{}, headers, prompt, requestId, pathway));
|
|
35
|
+
|
|
36
|
+
const value = docsToDelete.value.map(({id}) => ({
|
|
37
|
+
id,
|
|
38
|
+
"@search.action": "delete"
|
|
39
|
+
}));
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
data: {
|
|
43
|
+
value
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (mode == 'index') {
|
|
49
|
+
const calculateInputVector = async () => {
|
|
50
|
+
return JSON.parse(await callPathway(this.config, 'embeddings', { text }))[0];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const doc = {
|
|
54
|
+
id: uuidv4(),
|
|
55
|
+
content: text,
|
|
56
|
+
contentVector: inputVector || (await calculateInputVector()),
|
|
57
|
+
owner: savedContextId,
|
|
58
|
+
docId: docId || uuidv4()
|
|
59
|
+
}
|
|
60
|
+
// if(!privateData){
|
|
61
|
+
// delete doc.owner;
|
|
62
|
+
// }
|
|
63
|
+
data.value = [doc];
|
|
64
|
+
return { data };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
//default mode, 'search'
|
|
68
|
+
if (inputVector) {
|
|
69
|
+
data.vectors = [
|
|
70
|
+
{
|
|
71
|
+
"value": typeof inputVector === 'string' ? JSON.parse(inputVector) : inputVector,
|
|
72
|
+
"fields": "contentVector",
|
|
73
|
+
"k": 20
|
|
74
|
+
}
|
|
75
|
+
];
|
|
76
|
+
} else {
|
|
77
|
+
data.search = modelPromptText;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
filter && (data.filter = filter);
|
|
81
|
+
if (indexName == 'indexcortex') { //if private, filter by owner via contextId //privateData &&
|
|
82
|
+
data.filter && (data.filter = data.filter + ' and ');
|
|
83
|
+
data.filter = `owner eq '${savedContextId}'`;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return { data };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
ensureMode(url, mode) {
|
|
90
|
+
const pattern = new RegExp(`indexes\/.*\/docs\/${mode}`);
|
|
91
|
+
if (pattern.test(url)) {
|
|
92
|
+
// if the URL is already in the correct form, return it as is
|
|
93
|
+
return url;
|
|
94
|
+
} else {
|
|
95
|
+
// otherwise, perform the replacement
|
|
96
|
+
return url.replace(/(indexes\/.*\/docs\/)([^?]+)/, `$1${mode}`);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
ensureIndex(url, indexName) {
|
|
101
|
+
const pattern = new RegExp(`indexes\/${indexName}\/docs\/search`);
|
|
102
|
+
if (pattern.test(url)) {
|
|
103
|
+
// if the URL is already in the correct form, return it as is
|
|
104
|
+
return url;
|
|
105
|
+
} else {
|
|
106
|
+
// otherwise, perform the replacement
|
|
107
|
+
return url.replace(/(indexes\/)([^\/]+)/, `$1${indexName}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Execute the request to the Azure Cognitive API
|
|
112
|
+
async execute(text, parameters, prompt, pathwayResolver) {
|
|
113
|
+
const { requestId, pathway, savedContextId } = pathwayResolver;
|
|
114
|
+
const mode = this.promptParameters.mode || 'search';
|
|
115
|
+
let url = this.ensureMode(this.requestUrl(text), mode == 'delete' ? 'index' : mode);
|
|
116
|
+
const indexName = parameters.indexName || 'indexcortex';
|
|
117
|
+
url = this.ensureIndex(url, indexName);
|
|
118
|
+
const headers = this.model.headers;
|
|
119
|
+
|
|
120
|
+
const { data, params } = await this.getRequestParameters(text, parameters, prompt, mode, indexName, savedContextId, {headers, requestId, pathway, url});
|
|
121
|
+
|
|
122
|
+
if (mode === 'delete' && data.value.length == 0){
|
|
123
|
+
return; // nothing to delete
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// execute the request
|
|
127
|
+
const result = await this.executeRequest(url, data || {}, params || {}, headers || {}, prompt, requestId, pathway);
|
|
128
|
+
|
|
129
|
+
// if still has more to delete
|
|
130
|
+
if (mode === 'delete' && data?.value?.length == TOP) {
|
|
131
|
+
return await this.execute(text, parameters, prompt, pathwayResolver);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
return result;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
parseResponse(data) {
|
|
138
|
+
return JSON.stringify(data || {});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export default AzureCognitivePlugin;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// OpenAiEmbeddingsPlugin.js
|
|
2
|
+
import ModelPlugin from './modelPlugin.js';
|
|
3
|
+
|
|
4
|
+
class OpenAiEmbeddingsPlugin extends ModelPlugin {
|
|
5
|
+
constructor(config, pathway, modelName, model) {
|
|
6
|
+
super(config, pathway, modelName, model);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
getRequestParameters(text, parameters, prompt) {
|
|
10
|
+
const combinedParameters = { ...this.promptParameters, ...parameters };
|
|
11
|
+
const { modelPromptText } = this.getCompiledPrompt(text, combinedParameters, prompt);
|
|
12
|
+
const requestParameters = {
|
|
13
|
+
data: {
|
|
14
|
+
input: combinedParameters?.input?.length ? combinedParameters.input : modelPromptText || text,
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
return requestParameters;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async execute(text, parameters, prompt, pathwayResolver) {
|
|
21
|
+
const { requestId, pathway} = pathwayResolver;
|
|
22
|
+
const requestParameters = this.getRequestParameters(text, parameters, prompt);
|
|
23
|
+
const url = this.requestUrl();
|
|
24
|
+
|
|
25
|
+
const data = requestParameters.data || {};
|
|
26
|
+
const params = requestParameters.params || {};
|
|
27
|
+
const headers = this.model.headers || {};
|
|
28
|
+
|
|
29
|
+
return this.executeRequest(url, data, params, headers, prompt, requestId, pathway);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
parseResponse(data) {
|
|
33
|
+
return JSON.stringify(data?.data?.map( ({embedding}) => embedding) || []);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export default OpenAiEmbeddingsPlugin;
|