@aj-archipelago/cortex 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +10 -4
- package/graphql/plugins/openAiWhisperPlugin.js +15 -8
- package/helper_apps/MediaFileChunker/fileChunker.js +50 -6
- package/helper_apps/MediaFileChunker/helper.js +13 -1
- package/helper_apps/MediaFileChunker/index.js +2 -4
- package/helper_apps/MediaFileChunker/package-lock.json +55 -0
- package/helper_apps/MediaFileChunker/package.json +1 -0
- package/package.json +1 -1
package/config.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
|
-
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
|
3
2
|
import convict from 'convict';
|
|
4
3
|
import HandleBars from './lib/handleBars.js';
|
|
5
4
|
import fs from 'fs';
|
|
5
|
+
import { fileURLToPath, pathToFileURL } from 'url';
|
|
6
|
+
|
|
7
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
6
8
|
|
|
7
9
|
// Schema for config
|
|
8
10
|
var config = convict({
|
|
@@ -137,17 +139,21 @@ if (configFile && fs.existsSync(configFile)) {
|
|
|
137
139
|
const buildPathways = async (config) => {
|
|
138
140
|
const { pathwaysPath, corePathwaysPath, basePathwayPath } = config.getProperties();
|
|
139
141
|
|
|
142
|
+
const pathwaysURL = pathToFileURL(pathwaysPath).toString();
|
|
143
|
+
const corePathwaysURL = pathToFileURL(corePathwaysPath).toString();
|
|
144
|
+
const basePathwayURL = pathToFileURL(basePathwayPath).toString();
|
|
145
|
+
|
|
140
146
|
// Load cortex base pathway
|
|
141
|
-
const basePathway = await import(
|
|
147
|
+
const basePathway = await import(basePathwayURL).then(module => module.default);
|
|
142
148
|
|
|
143
149
|
// Load core pathways, default from the Cortex package
|
|
144
150
|
console.log('Loading core pathways from', corePathwaysPath)
|
|
145
|
-
let loadedPathways = await import(`${
|
|
151
|
+
let loadedPathways = await import(`${corePathwaysURL}/index.js`).then(module => module);
|
|
146
152
|
|
|
147
153
|
// Load custom pathways and override core pathways if same
|
|
148
154
|
if (pathwaysPath && fs.existsSync(pathwaysPath)) {
|
|
149
155
|
console.log('Loading custom pathways from', pathwaysPath)
|
|
150
|
-
const customPathways = await import(`${
|
|
156
|
+
const customPathways = await import(`${pathwaysURL}/index.js`).then(module => module);
|
|
151
157
|
loadedPathways = { ...loadedPathways, ...customPathways };
|
|
152
158
|
}
|
|
153
159
|
|
|
@@ -52,6 +52,7 @@ const downloadFile = async (fileUrl) => {
|
|
|
52
52
|
fs.unlink(localFilePath, () => {
|
|
53
53
|
reject(error);
|
|
54
54
|
});
|
|
55
|
+
throw error;
|
|
55
56
|
}
|
|
56
57
|
});
|
|
57
58
|
};
|
|
@@ -73,15 +74,20 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
73
74
|
}
|
|
74
75
|
} catch (err) {
|
|
75
76
|
console.log(`Error getting media chunks list from api:`, err);
|
|
77
|
+
throw err;
|
|
76
78
|
}
|
|
77
79
|
}
|
|
78
80
|
|
|
79
81
|
async markCompletedForCleanUp(requestId) {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
try {
|
|
83
|
+
if (API_URL) {
|
|
84
|
+
//call helper api to mark processing as completed
|
|
85
|
+
const res = await axios.delete(API_URL, { params: { requestId } });
|
|
86
|
+
console.log(`Marked request ${requestId} as completed:`, res.data);
|
|
87
|
+
return res.data;
|
|
88
|
+
}
|
|
89
|
+
} catch (err) {
|
|
90
|
+
console.log(`Error marking request ${requestId} as completed:`, err);
|
|
85
91
|
}
|
|
86
92
|
}
|
|
87
93
|
|
|
@@ -104,6 +110,7 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
104
110
|
return this.executeRequest(url, formData, params, { ...this.model.headers, ...formData.getHeaders() });
|
|
105
111
|
} catch (err) {
|
|
106
112
|
console.log(err);
|
|
113
|
+
throw err;
|
|
107
114
|
}
|
|
108
115
|
}
|
|
109
116
|
|
|
@@ -152,11 +159,11 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
152
159
|
// result = await Promise.all(mediaSplit.chunks.map(processChunk));
|
|
153
160
|
|
|
154
161
|
} catch (error) {
|
|
155
|
-
|
|
162
|
+
const errMsg = `Transcribe error: ${error?.message || JSON.stringify(error)}`;
|
|
163
|
+
console.error(errMsg);
|
|
164
|
+
return errMsg;
|
|
156
165
|
}
|
|
157
166
|
finally {
|
|
158
|
-
// isYoutubeUrl && (await deleteTempPath(file));
|
|
159
|
-
// folder && (await deleteTempPath(folder));
|
|
160
167
|
try {
|
|
161
168
|
for (const chunk of chunks) {
|
|
162
169
|
await deleteTempPath(chunk);
|
|
@@ -7,6 +7,8 @@ import { v4 as uuidv4 } from 'uuid';
|
|
|
7
7
|
import os from 'os';
|
|
8
8
|
import ytdl from 'ytdl-core';
|
|
9
9
|
import { promisify } from 'util';
|
|
10
|
+
import axios from 'axios';
|
|
11
|
+
import { ensureEncoded } from './helper.js';
|
|
10
12
|
|
|
11
13
|
ffmpeg.setFfmpegPath(ffmpegPath);
|
|
12
14
|
ffmpeg.setFfprobePath(ffprobePath);
|
|
@@ -15,12 +17,15 @@ console.log(`ffprobePath: ${ffprobePath}`);
|
|
|
15
17
|
|
|
16
18
|
const ffmpegProbe = promisify(ffmpeg.ffprobe);
|
|
17
19
|
|
|
20
|
+
|
|
18
21
|
async function processChunk(inputPath, outputFileName, start, duration) {
|
|
19
22
|
return new Promise((resolve, reject) => {
|
|
20
23
|
ffmpeg(inputPath)
|
|
21
24
|
.seekInput(start)
|
|
22
25
|
.duration(duration)
|
|
23
|
-
.
|
|
26
|
+
.format('mp3')
|
|
27
|
+
.audioCodec('libmp3lame')
|
|
28
|
+
.audioBitrate(128)
|
|
24
29
|
.on('start', (cmd) => {
|
|
25
30
|
console.log(`Started FFmpeg with command: ${cmd}`);
|
|
26
31
|
})
|
|
@@ -43,18 +48,57 @@ const generateUniqueFolderName = () => {
|
|
|
43
48
|
return uniqueOutputPath;
|
|
44
49
|
}
|
|
45
50
|
|
|
51
|
+
async function downloadFile(url, outputPath) {
|
|
52
|
+
try {
|
|
53
|
+
// Make an HTTP request for the file
|
|
54
|
+
const response = await axios.get(url, { responseType: 'stream' });
|
|
55
|
+
|
|
56
|
+
// Create a writable file stream to save the file
|
|
57
|
+
const fileStream = fs.createWriteStream(outputPath);
|
|
58
|
+
|
|
59
|
+
// Pipe the response data into the file stream
|
|
60
|
+
response.data.pipe(fileStream);
|
|
61
|
+
|
|
62
|
+
// Wait for the file stream to finish writing
|
|
63
|
+
await new Promise((resolve, reject) => {
|
|
64
|
+
fileStream.on('finish', resolve);
|
|
65
|
+
fileStream.on('error', reject);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
console.log(`Downloaded file saved to: ${outputPath}`);
|
|
69
|
+
} catch (error) {
|
|
70
|
+
console.error(`Error downloading file from ${url}:`, error);
|
|
71
|
+
throw error;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
46
75
|
async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
|
|
47
76
|
try {
|
|
77
|
+
// Create unique folder
|
|
78
|
+
const uniqueOutputPath = generateUniqueFolderName();
|
|
79
|
+
fs.mkdirSync(uniqueOutputPath, { recursive: true });
|
|
80
|
+
|
|
81
|
+
// Download the file if it's not a local file
|
|
82
|
+
const isUrl = /^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$/i.test(inputPath);
|
|
83
|
+
if (isUrl) {
|
|
84
|
+
inputPath = ensureEncoded(inputPath);
|
|
85
|
+
// Extract the original file name from the URL
|
|
86
|
+
const urlObj = new URL(inputPath);
|
|
87
|
+
const originalFileName = path.basename(urlObj.pathname);
|
|
88
|
+
|
|
89
|
+
// Use the original file name when saving the downloaded file
|
|
90
|
+
const downloadPath = path.join(uniqueOutputPath, originalFileName);
|
|
91
|
+
await downloadFile(inputPath, downloadPath);
|
|
92
|
+
inputPath = downloadPath;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
48
96
|
const metadata = await ffmpegProbe(inputPath);
|
|
49
97
|
const duration = metadata.format.duration;
|
|
50
98
|
const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
|
|
51
99
|
|
|
52
100
|
const chunkPromises = [];
|
|
53
101
|
|
|
54
|
-
const uniqueOutputPath = generateUniqueFolderName();
|
|
55
|
-
|
|
56
|
-
// Create unique folder
|
|
57
|
-
fs.mkdirSync(uniqueOutputPath, { recursive: true });
|
|
58
102
|
|
|
59
103
|
|
|
60
104
|
for (let i = 0; i < numChunks; i++) {
|
|
@@ -73,7 +117,7 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
|
|
|
73
117
|
chunkPromises.push(chunkPromise);
|
|
74
118
|
}
|
|
75
119
|
|
|
76
|
-
return { chunkPromises, uniqueOutputPath }
|
|
120
|
+
return { chunkPromises, uniqueOutputPath };
|
|
77
121
|
} catch (err) {
|
|
78
122
|
console.error('Error occurred during the splitting process:', err);
|
|
79
123
|
}
|
|
@@ -28,6 +28,18 @@ async function deleteTempPath(path) {
|
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
function ensureEncoded(url) {
|
|
32
|
+
try {
|
|
33
|
+
const decodedUrl = decodeURI(url);
|
|
34
|
+
if (decodedUrl === url) {
|
|
35
|
+
return encodeURI(url);
|
|
36
|
+
}
|
|
37
|
+
return url;
|
|
38
|
+
} catch (e) {
|
|
39
|
+
return url;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
31
43
|
export {
|
|
32
|
-
isValidYoutubeUrl, deleteTempPath
|
|
44
|
+
isValidYoutubeUrl, deleteTempPath, ensureEncoded
|
|
33
45
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { processYoutubeUrl, splitMediaFile } from './fileChunker.js';
|
|
2
2
|
import { saveFileToBlob, deleteBlob, uploadBlob } from './blobHandler.js';
|
|
3
3
|
import { publishRequestProgress } from './redis.js';
|
|
4
|
-
import { deleteTempPath, isValidYoutubeUrl } from './helper.js';
|
|
4
|
+
import { deleteTempPath, ensureEncoded, isValidYoutubeUrl } from './helper.js';
|
|
5
5
|
import { moveFileToPublicFolder, deleteFolder } from './localFileHandler.js';
|
|
6
6
|
|
|
7
7
|
const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
|
|
@@ -10,7 +10,6 @@ console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
|
|
|
10
10
|
|
|
11
11
|
async function main(context, req) {
|
|
12
12
|
context.log('Starting req processing..');
|
|
13
|
-
// await publishRequestProgress({ requestId:222, progress: 0, data: null });
|
|
14
13
|
|
|
15
14
|
// Clean up blob when request delete which means processing marked completed
|
|
16
15
|
if (req.method.toLowerCase() === `delete`) {
|
|
@@ -48,7 +47,7 @@ async function main(context, req) {
|
|
|
48
47
|
let completedCount = 0;
|
|
49
48
|
let numberOfChunks;
|
|
50
49
|
|
|
51
|
-
let file = uri;
|
|
50
|
+
let file = ensureEncoded(uri); // encode url to handle special characters
|
|
52
51
|
let folder;
|
|
53
52
|
const isYoutubeUrl = isValidYoutubeUrl(uri);
|
|
54
53
|
|
|
@@ -111,6 +110,5 @@ async function main(context, req) {
|
|
|
111
110
|
};
|
|
112
111
|
}
|
|
113
112
|
|
|
114
|
-
// main(console, { query: { uri: "https://www.youtube.com/watch?v=QH2-TGUlwu4" } });
|
|
115
113
|
|
|
116
114
|
export default main;
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
"@azure/storage-blob": "^12.13.0",
|
|
12
12
|
"@ffmpeg-installer/ffmpeg": "^1.1.0",
|
|
13
13
|
"@ffprobe-installer/ffprobe": "^2.0.0",
|
|
14
|
+
"axios": "^1.3.6",
|
|
14
15
|
"busboy": "^1.6.0",
|
|
15
16
|
"express": "^4.18.2",
|
|
16
17
|
"fluent-ffmpeg": "^2.1.2",
|
|
@@ -511,6 +512,16 @@
|
|
|
511
512
|
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
|
512
513
|
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
|
513
514
|
},
|
|
515
|
+
"node_modules/axios": {
|
|
516
|
+
"version": "1.3.6",
|
|
517
|
+
"resolved": "https://registry.npmjs.org/axios/-/axios-1.3.6.tgz",
|
|
518
|
+
"integrity": "sha512-PEcdkk7JcdPiMDkvM4K6ZBRYq9keuVJsToxm2zQIM70Qqo2WHTdJZMXcG9X+RmRp2VPNUQC8W1RAGbgt6b1yMg==",
|
|
519
|
+
"dependencies": {
|
|
520
|
+
"follow-redirects": "^1.15.0",
|
|
521
|
+
"form-data": "^4.0.0",
|
|
522
|
+
"proxy-from-env": "^1.1.0"
|
|
523
|
+
}
|
|
524
|
+
},
|
|
514
525
|
"node_modules/body-parser": {
|
|
515
526
|
"version": "1.20.1",
|
|
516
527
|
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz",
|
|
@@ -913,6 +924,25 @@
|
|
|
913
924
|
"node": ">=0.8.0"
|
|
914
925
|
}
|
|
915
926
|
},
|
|
927
|
+
"node_modules/follow-redirects": {
|
|
928
|
+
"version": "1.15.2",
|
|
929
|
+
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
|
930
|
+
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
|
|
931
|
+
"funding": [
|
|
932
|
+
{
|
|
933
|
+
"type": "individual",
|
|
934
|
+
"url": "https://github.com/sponsors/RubenVerborgh"
|
|
935
|
+
}
|
|
936
|
+
],
|
|
937
|
+
"engines": {
|
|
938
|
+
"node": ">=4.0"
|
|
939
|
+
},
|
|
940
|
+
"peerDependenciesMeta": {
|
|
941
|
+
"debug": {
|
|
942
|
+
"optional": true
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
},
|
|
916
946
|
"node_modules/form-data": {
|
|
917
947
|
"version": "4.0.0",
|
|
918
948
|
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
|
@@ -1364,6 +1394,11 @@
|
|
|
1364
1394
|
"node": ">= 0.10"
|
|
1365
1395
|
}
|
|
1366
1396
|
},
|
|
1397
|
+
"node_modules/proxy-from-env": {
|
|
1398
|
+
"version": "1.1.0",
|
|
1399
|
+
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
|
1400
|
+
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
|
1401
|
+
},
|
|
1367
1402
|
"node_modules/public-ip": {
|
|
1368
1403
|
"version": "6.0.1",
|
|
1369
1404
|
"resolved": "https://registry.npmjs.org/public-ip/-/public-ip-6.0.1.tgz",
|
|
@@ -2049,6 +2084,16 @@
|
|
|
2049
2084
|
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
|
|
2050
2085
|
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
|
|
2051
2086
|
},
|
|
2087
|
+
"axios": {
|
|
2088
|
+
"version": "1.3.6",
|
|
2089
|
+
"resolved": "https://registry.npmjs.org/axios/-/axios-1.3.6.tgz",
|
|
2090
|
+
"integrity": "sha512-PEcdkk7JcdPiMDkvM4K6ZBRYq9keuVJsToxm2zQIM70Qqo2WHTdJZMXcG9X+RmRp2VPNUQC8W1RAGbgt6b1yMg==",
|
|
2091
|
+
"requires": {
|
|
2092
|
+
"follow-redirects": "^1.15.0",
|
|
2093
|
+
"form-data": "^4.0.0",
|
|
2094
|
+
"proxy-from-env": "^1.1.0"
|
|
2095
|
+
}
|
|
2096
|
+
},
|
|
2052
2097
|
"body-parser": {
|
|
2053
2098
|
"version": "1.20.1",
|
|
2054
2099
|
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz",
|
|
@@ -2353,6 +2398,11 @@
|
|
|
2353
2398
|
"which": "^1.1.1"
|
|
2354
2399
|
}
|
|
2355
2400
|
},
|
|
2401
|
+
"follow-redirects": {
|
|
2402
|
+
"version": "1.15.2",
|
|
2403
|
+
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
|
|
2404
|
+
"integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA=="
|
|
2405
|
+
},
|
|
2356
2406
|
"form-data": {
|
|
2357
2407
|
"version": "4.0.0",
|
|
2358
2408
|
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
|
|
@@ -2657,6 +2707,11 @@
|
|
|
2657
2707
|
"ipaddr.js": "1.9.1"
|
|
2658
2708
|
}
|
|
2659
2709
|
},
|
|
2710
|
+
"proxy-from-env": {
|
|
2711
|
+
"version": "1.1.0",
|
|
2712
|
+
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
|
|
2713
|
+
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
|
|
2714
|
+
},
|
|
2660
2715
|
"public-ip": {
|
|
2661
2716
|
"version": "6.0.1",
|
|
2662
2717
|
"resolved": "https://registry.npmjs.org/public-ip/-/public-ip-6.0.1.tgz",
|
package/package.json
CHANGED