@aj-archipelago/cortex 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +30 -0
- package/.eslintrc +31 -0
- package/README.md +13 -1
- package/config/default.example.json +70 -0
- package/config.js +5 -6
- package/graphql/chunker.js +1 -1
- package/graphql/graphql.js +1 -1
- package/graphql/parser.js +7 -0
- package/graphql/pathwayPrompter.js +8 -19
- package/graphql/pathwayResolver.js +10 -10
- package/graphql/pathwayResponseParser.js +13 -4
- package/graphql/plugins/localModelPlugin.js +54 -5
- package/graphql/plugins/modelPlugin.js +29 -20
- package/graphql/plugins/openAiCompletionPlugin.js +29 -12
- package/graphql/plugins/openAiWhisperPlugin.js +112 -19
- package/graphql/prompt.js +1 -0
- package/graphql/resolver.js +2 -2
- package/graphql/subscriptions.js +1 -1
- package/helper_apps/MediaFileChunker/blobHandler.js +150 -0
- package/helper_apps/MediaFileChunker/fileChunker.js +123 -0
- package/helper_apps/MediaFileChunker/function.json +20 -0
- package/helper_apps/MediaFileChunker/helper.js +33 -0
- package/helper_apps/MediaFileChunker/index.js +116 -0
- package/helper_apps/MediaFileChunker/localFileHandler.js +36 -0
- package/helper_apps/MediaFileChunker/package-lock.json +2919 -0
- package/helper_apps/MediaFileChunker/package.json +22 -0
- package/helper_apps/MediaFileChunker/redis.js +32 -0
- package/helper_apps/MediaFileChunker/start.js +27 -0
- package/lib/handleBars.js +26 -0
- package/lib/pathwayTools.js +15 -0
- package/lib/redisSubscription.js +51 -0
- package/lib/request.js +4 -5
- package/package.json +9 -6
- package/pathways/lc_test.mjs +9 -5
- package/pathways/summary.js +1 -1
- package/pathways/transcribe.js +2 -1
- package/tests/config.test.js +69 -0
- package/tests/handleBars.test.js +43 -0
- package/tests/mocks.js +39 -0
- package/tests/modelPlugin.test.js +129 -0
- package/tests/pathwayResolver.test.js +77 -0
- package/tests/truncateMessages.test.js +99 -0
- package/lib/fileChunker.js +0 -160
|
@@ -1,16 +1,90 @@
|
|
|
1
1
|
// openAiWhisperPlugin.js
|
|
2
2
|
import ModelPlugin from './modelPlugin.js';
|
|
3
|
-
|
|
4
3
|
import FormData from 'form-data';
|
|
5
4
|
import fs from 'fs';
|
|
6
|
-
import { splitMediaFile, isValidYoutubeUrl, processYoutubeUrl, deleteTempPath } from '../../lib/fileChunker.js';
|
|
7
5
|
import pubsub from '../pubsub.js';
|
|
6
|
+
import { axios } from '../../lib/request.js';
|
|
7
|
+
import stream from 'stream';
|
|
8
|
+
import os from 'os';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
11
|
+
import { config } from '../../config.js';
|
|
12
|
+
import { deleteTempPath } from '../../helper_apps/MediaFileChunker/helper.js';
|
|
13
|
+
import http from 'http';
|
|
14
|
+
import https from 'https';
|
|
15
|
+
import url from 'url';
|
|
16
|
+
import { promisify } from 'util';
|
|
17
|
+
const pipeline = promisify(stream.pipeline);
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
const API_URL = config.get('whisperMediaApiUrl');
|
|
21
|
+
|
|
22
|
+
function generateUniqueFilename(extension) {
|
|
23
|
+
return `${uuidv4()}.${extension}`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const downloadFile = async (fileUrl) => {
|
|
27
|
+
const fileExtension = path.extname(fileUrl).slice(1);
|
|
28
|
+
const uniqueFilename = generateUniqueFilename(fileExtension);
|
|
29
|
+
const tempDir = os.tmpdir();
|
|
30
|
+
const localFilePath = `${tempDir}/${uniqueFilename}`;
|
|
31
|
+
|
|
32
|
+
// eslint-disable-next-line no-async-promise-executor
|
|
33
|
+
return new Promise(async (resolve, reject) => {
|
|
34
|
+
try {
|
|
35
|
+
const parsedUrl = url.parse(fileUrl);
|
|
36
|
+
const protocol = parsedUrl.protocol === 'https:' ? https : http;
|
|
37
|
+
|
|
38
|
+
const response = await new Promise((resolve, reject) => {
|
|
39
|
+
protocol.get(parsedUrl, (res) => {
|
|
40
|
+
if (res.statusCode === 200) {
|
|
41
|
+
resolve(res);
|
|
42
|
+
} else {
|
|
43
|
+
reject(new Error(`HTTP request failed with status code ${res.statusCode}`));
|
|
44
|
+
}
|
|
45
|
+
}).on('error', reject);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
await pipeline(response, fs.createWriteStream(localFilePath));
|
|
49
|
+
console.log(`Downloaded file to ${localFilePath}`);
|
|
50
|
+
resolve(localFilePath);
|
|
51
|
+
} catch (error) {
|
|
52
|
+
fs.unlink(localFilePath, () => {
|
|
53
|
+
reject(error);
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
};
|
|
8
58
|
|
|
9
59
|
class OpenAIWhisperPlugin extends ModelPlugin {
|
|
10
60
|
constructor(config, pathway) {
|
|
11
61
|
super(config, pathway);
|
|
12
62
|
}
|
|
13
63
|
|
|
64
|
+
async getMediaChunks(file, requestId) {
|
|
65
|
+
try {
|
|
66
|
+
if (API_URL) {
|
|
67
|
+
//call helper api and get list of file uris
|
|
68
|
+
const res = await axios.get(API_URL, { params: { uri: file, requestId } });
|
|
69
|
+
return res.data;
|
|
70
|
+
} else {
|
|
71
|
+
console.log(`No API_URL set, returning file as chunk`);
|
|
72
|
+
return [file];
|
|
73
|
+
}
|
|
74
|
+
} catch (err) {
|
|
75
|
+
console.log(`Error getting media chunks list from api:`, err);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
async markCompletedForCleanUp(requestId) {
|
|
80
|
+
if (API_URL) {
|
|
81
|
+
//call helper api to mark processing as completed
|
|
82
|
+
const res = await axios.delete(API_URL, { params: { requestId } });
|
|
83
|
+
console.log(`Marked request ${requestId} as completed:`, res.data);
|
|
84
|
+
return res.data;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
14
88
|
// Execute the request to the OpenAI Whisper API
|
|
15
89
|
async execute(text, parameters, prompt, pathwayResolver) {
|
|
16
90
|
const url = this.requestUrl(text);
|
|
@@ -19,11 +93,12 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
19
93
|
|
|
20
94
|
const processChunk = async (chunk) => {
|
|
21
95
|
try {
|
|
96
|
+
const { language } = parameters;
|
|
22
97
|
const formData = new FormData();
|
|
23
98
|
formData.append('file', fs.createReadStream(chunk));
|
|
24
99
|
formData.append('model', this.model.params.model);
|
|
25
100
|
formData.append('response_format', 'text');
|
|
26
|
-
|
|
101
|
+
language && formData.append('language', language);
|
|
27
102
|
modelPromptText && formData.append('prompt', modelPromptText);
|
|
28
103
|
|
|
29
104
|
return this.executeRequest(url, formData, params, { ...this.model.headers, ...formData.getHeaders() });
|
|
@@ -34,14 +109,13 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
34
109
|
|
|
35
110
|
let result = ``;
|
|
36
111
|
let { file } = parameters;
|
|
37
|
-
let folder;
|
|
38
|
-
const isYoutubeUrl = isValidYoutubeUrl(file);
|
|
39
112
|
let totalCount = 0;
|
|
40
113
|
let completedCount = 0;
|
|
41
114
|
const { requestId } = pathwayResolver;
|
|
42
115
|
|
|
43
116
|
const sendProgress = () => {
|
|
44
117
|
completedCount++;
|
|
118
|
+
if (completedCount >= totalCount) return;
|
|
45
119
|
pubsub.publish('REQUEST_PROGRESS', {
|
|
46
120
|
requestProgress: {
|
|
47
121
|
requestId,
|
|
@@ -51,24 +125,23 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
51
125
|
});
|
|
52
126
|
}
|
|
53
127
|
|
|
128
|
+
let chunks = []; // array of local file paths
|
|
54
129
|
try {
|
|
55
|
-
if (isYoutubeUrl) {
|
|
56
|
-
// totalCount += 1; // extra 1 step for youtube download
|
|
57
|
-
file = await processYoutubeUrl(file);
|
|
58
|
-
}
|
|
59
130
|
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
131
|
+
const uris = await this.getMediaChunks(file, requestId); // array of remote file uris
|
|
132
|
+
if (!uris || !uris.length) {
|
|
133
|
+
throw new Error(`Error in getting chunks from media helper for file ${file}`);
|
|
134
|
+
}
|
|
135
|
+
totalCount = uris.length * 4; // 4 steps for each chunk (download and upload)
|
|
136
|
+
API_URL && (completedCount = uris.length); // api progress is already calculated
|
|
64
137
|
|
|
65
138
|
// sequential download of chunks
|
|
66
|
-
const
|
|
67
|
-
|
|
139
|
+
for (const uri of uris) {
|
|
140
|
+
chunks.push(await downloadFile(uri));
|
|
68
141
|
sendProgress();
|
|
69
|
-
chunks.push(await chunkPromise);
|
|
70
142
|
}
|
|
71
143
|
|
|
144
|
+
|
|
72
145
|
// sequential processing of chunks
|
|
73
146
|
for (const chunk of chunks) {
|
|
74
147
|
result += await processChunk(chunk);
|
|
@@ -80,9 +153,29 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
80
153
|
|
|
81
154
|
} catch (error) {
|
|
82
155
|
console.error("An error occurred:", error);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
|
|
156
|
+
}
|
|
157
|
+
finally {
|
|
158
|
+
// isYoutubeUrl && (await deleteTempPath(file));
|
|
159
|
+
// folder && (await deleteTempPath(folder));
|
|
160
|
+
try {
|
|
161
|
+
for (const chunk of chunks) {
|
|
162
|
+
await deleteTempPath(chunk);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
await this.markCompletedForCleanUp(requestId);
|
|
166
|
+
|
|
167
|
+
//check cleanup for whisper temp uploaded files url
|
|
168
|
+
const regex = /whispertempfiles\/([a-z0-9-]+)/;
|
|
169
|
+
const match = file.match(regex);
|
|
170
|
+
if (match && match[1]) {
|
|
171
|
+
const extractedValue = match[1];
|
|
172
|
+
await this.markCompletedForCleanUp(extractedValue);
|
|
173
|
+
console.log(`Cleaned temp whisper file ${file} with request id ${extractedValue}`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
} catch (error) {
|
|
177
|
+
console.error("An error occurred while deleting:", error);
|
|
178
|
+
}
|
|
86
179
|
}
|
|
87
180
|
return result;
|
|
88
181
|
}
|
package/graphql/prompt.js
CHANGED
|
@@ -26,6 +26,7 @@ function promptContains(variable, prompt) {
|
|
|
26
26
|
// if it's an array, it's the messages format
|
|
27
27
|
if (Array.isArray(prompt)) {
|
|
28
28
|
prompt.forEach(p => {
|
|
29
|
+
// eslint-disable-next-line no-cond-assign
|
|
29
30
|
while (match = p.content && regexp.exec(p.content)) {
|
|
30
31
|
matches.push(match[1]);
|
|
31
32
|
}
|
package/graphql/resolver.js
CHANGED
|
@@ -26,12 +26,12 @@ const rootResolver = async (parent, args, contextValue, info) => {
|
|
|
26
26
|
}
|
|
27
27
|
|
|
28
28
|
// This resolver is used by the root resolver to process the request
|
|
29
|
-
const resolver = async (parent, args, contextValue,
|
|
29
|
+
const resolver = async (parent, args, contextValue, _info) => {
|
|
30
30
|
const { pathwayResolver } = contextValue;
|
|
31
31
|
return await pathwayResolver.resolve(args);
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
const cancelRequestResolver = (parent, args, contextValue,
|
|
34
|
+
const cancelRequestResolver = (parent, args, contextValue, _info) => {
|
|
35
35
|
const { requestId } = args;
|
|
36
36
|
const { requestState } = contextValue;
|
|
37
37
|
requestState[requestId] = { canceled: true };
|
package/graphql/subscriptions.js
CHANGED
|
@@ -10,7 +10,7 @@ import { requestState } from './requestState.js';
|
|
|
10
10
|
const subscriptions = {
|
|
11
11
|
requestProgress: {
|
|
12
12
|
subscribe: withFilter(
|
|
13
|
-
(_, args, __,
|
|
13
|
+
(_, args, __, _info) => {
|
|
14
14
|
const { requestIds } = args;
|
|
15
15
|
for (const requestId of requestIds) {
|
|
16
16
|
if (!requestState[requestId]) {
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { BlobServiceClient } from '@azure/storage-blob';
|
|
4
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
5
|
+
import Busboy from 'busboy';
|
|
6
|
+
import { PassThrough } from 'stream';
|
|
7
|
+
import { pipeline as _pipeline } from 'stream';
|
|
8
|
+
import { promisify } from 'util';
|
|
9
|
+
const pipeline = promisify(_pipeline);
|
|
10
|
+
import { join } from 'path';
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
import { publicFolder, port, ipAddress } from "./start.js";
|
|
14
|
+
|
|
15
|
+
const getBlobClient = () => {
|
|
16
|
+
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
17
|
+
const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
|
|
18
|
+
if (!connectionString || !containerName) {
|
|
19
|
+
throw new Error('Missing Azure Storage connection string or container name environment variable');
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
|
|
23
|
+
const containerClient = blobServiceClient.getContainerClient(containerName);
|
|
24
|
+
|
|
25
|
+
return { blobServiceClient, containerClient };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function saveFileToBlob(chunkPath, requestId) {
|
|
29
|
+
const { containerClient } = getBlobClient();
|
|
30
|
+
// Use the filename with a UUID as the blob name
|
|
31
|
+
const blobName = `${requestId}/${uuidv4()}_${path.basename(chunkPath)}`;
|
|
32
|
+
|
|
33
|
+
// Create a read stream for the chunk file
|
|
34
|
+
const fileStream = fs.createReadStream(chunkPath);
|
|
35
|
+
|
|
36
|
+
// Upload the chunk to Azure Blob Storage using the stream
|
|
37
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
38
|
+
await blockBlobClient.uploadStream(fileStream);
|
|
39
|
+
|
|
40
|
+
// Return the full URI of the uploaded blob
|
|
41
|
+
const blobUrl = blockBlobClient.url;
|
|
42
|
+
return blobUrl;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
//deletes blob that has the requestId
|
|
46
|
+
async function deleteBlob(requestId) {
|
|
47
|
+
if (!requestId) throw new Error('Missing requestId parameter');
|
|
48
|
+
const { containerClient } = getBlobClient();
|
|
49
|
+
// List the blobs in the container with the specified prefix
|
|
50
|
+
const blobs = containerClient.listBlobsFlat({ prefix: `${requestId}/` });
|
|
51
|
+
|
|
52
|
+
const result = []
|
|
53
|
+
// Iterate through the blobs
|
|
54
|
+
for await (const blob of blobs) {
|
|
55
|
+
// Delete the matching blob
|
|
56
|
+
const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
|
|
57
|
+
await blockBlobClient.delete();
|
|
58
|
+
console.log(`Cleaned blob: ${blob.name}`);
|
|
59
|
+
result.push(blob.name);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return result
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function uploadBlob(context, req, saveToLocal = false) {
|
|
66
|
+
return new Promise((resolve, reject) => {
|
|
67
|
+
try {
|
|
68
|
+
const busboy = Busboy({ headers: req.headers });
|
|
69
|
+
let requestId = uuidv4();
|
|
70
|
+
|
|
71
|
+
busboy.on('field', (fieldname, value) => {
|
|
72
|
+
if (fieldname === 'requestId') {
|
|
73
|
+
requestId = value;
|
|
74
|
+
}
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
busboy.on('file', async (fieldname, file, info) => {
|
|
78
|
+
if (saveToLocal) {
|
|
79
|
+
// Create the target folder if it doesn't exist
|
|
80
|
+
const localPath = join(publicFolder, requestId);
|
|
81
|
+
fs.mkdirSync(localPath, { recursive: true });
|
|
82
|
+
|
|
83
|
+
const filename = `${uuidv4()}_${info.filename}`;
|
|
84
|
+
const destinationPath = `${localPath}/${filename}`;
|
|
85
|
+
|
|
86
|
+
await pipeline(file, fs.createWriteStream(destinationPath));
|
|
87
|
+
|
|
88
|
+
const message = `File '${filename}' saved to folder successfully.`;
|
|
89
|
+
context.log(message);
|
|
90
|
+
|
|
91
|
+
const url = `http://${ipAddress}:${port}/files/${requestId}/${filename}`;
|
|
92
|
+
|
|
93
|
+
const body = { message, url };
|
|
94
|
+
|
|
95
|
+
context.res = {
|
|
96
|
+
status: 200,
|
|
97
|
+
body,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
resolve(body); // Resolve the promise
|
|
102
|
+
} else {
|
|
103
|
+
const { containerClient } = getBlobClient();
|
|
104
|
+
const filename = `${requestId}/${uuidv4()}_${info.filename}`;
|
|
105
|
+
|
|
106
|
+
const blockBlobClient = containerClient.getBlockBlobClient(filename);
|
|
107
|
+
|
|
108
|
+
const passThroughStream = new PassThrough();
|
|
109
|
+
file.pipe(passThroughStream);
|
|
110
|
+
|
|
111
|
+
await blockBlobClient.uploadStream(passThroughStream);
|
|
112
|
+
|
|
113
|
+
const message = `File '${filename}' uploaded successfully.`;
|
|
114
|
+
const url = blockBlobClient.url;
|
|
115
|
+
context.log(message);
|
|
116
|
+
const body = { message, url };
|
|
117
|
+
|
|
118
|
+
context.res = {
|
|
119
|
+
status: 200,
|
|
120
|
+
body,
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
resolve(body); // Resolve the promise
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
busboy.on('error', (error) => {
|
|
128
|
+
context.log.error('Error processing file upload:', error);
|
|
129
|
+
context.res = {
|
|
130
|
+
status: 500,
|
|
131
|
+
body: 'Error processing file upload.',
|
|
132
|
+
};
|
|
133
|
+
reject(error); // Reject the promise
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
req.pipe(busboy);
|
|
137
|
+
} catch (error) {
|
|
138
|
+
context.log.error('Error processing file upload:', error);
|
|
139
|
+
context.res = {
|
|
140
|
+
status: 500,
|
|
141
|
+
body: 'Error processing file upload.',
|
|
142
|
+
};
|
|
143
|
+
reject(error); // Reject the promise
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export {
|
|
149
|
+
saveFileToBlob, deleteBlob, uploadBlob
|
|
150
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg';
|
|
3
|
+
import { path as ffprobePath } from '@ffprobe-installer/ffprobe';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import ffmpeg from 'fluent-ffmpeg';
|
|
6
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
7
|
+
import os from 'os';
|
|
8
|
+
import ytdl from 'ytdl-core';
|
|
9
|
+
import { promisify } from 'util';
|
|
10
|
+
|
|
11
|
+
ffmpeg.setFfmpegPath(ffmpegPath);
|
|
12
|
+
ffmpeg.setFfprobePath(ffprobePath);
|
|
13
|
+
console.log(`ffmpegPath: ${ffmpegPath}`);
|
|
14
|
+
console.log(`ffprobePath: ${ffprobePath}`);
|
|
15
|
+
|
|
16
|
+
const ffmpegProbe = promisify(ffmpeg.ffprobe);
|
|
17
|
+
|
|
18
|
+
async function processChunk(inputPath, outputFileName, start, duration) {
|
|
19
|
+
return new Promise((resolve, reject) => {
|
|
20
|
+
ffmpeg(inputPath)
|
|
21
|
+
.seekInput(start)
|
|
22
|
+
.duration(duration)
|
|
23
|
+
.audioCodec('libmp3lame') // Ensure output is always in MP3 format
|
|
24
|
+
.on('start', (cmd) => {
|
|
25
|
+
console.log(`Started FFmpeg with command: ${cmd}`);
|
|
26
|
+
})
|
|
27
|
+
.on('error', (err) => {
|
|
28
|
+
console.error(`Error occurred while processing chunk:`, err);
|
|
29
|
+
reject(err);
|
|
30
|
+
})
|
|
31
|
+
.on('end', () => {
|
|
32
|
+
console.log(`Finished processing chunk`);
|
|
33
|
+
resolve(outputFileName);
|
|
34
|
+
})
|
|
35
|
+
.save(outputFileName);
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const generateUniqueFolderName = () => {
|
|
40
|
+
const uniqueFolderName = uuidv4();
|
|
41
|
+
const tempFolderPath = os.tmpdir(); // Get the system's temporary folder
|
|
42
|
+
const uniqueOutputPath = path.join(tempFolderPath, uniqueFolderName);
|
|
43
|
+
return uniqueOutputPath;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
|
|
47
|
+
try {
|
|
48
|
+
const metadata = await ffmpegProbe(inputPath);
|
|
49
|
+
const duration = metadata.format.duration;
|
|
50
|
+
const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
|
|
51
|
+
|
|
52
|
+
const chunkPromises = [];
|
|
53
|
+
|
|
54
|
+
const uniqueOutputPath = generateUniqueFolderName();
|
|
55
|
+
|
|
56
|
+
// Create unique folder
|
|
57
|
+
fs.mkdirSync(uniqueOutputPath, { recursive: true });
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
for (let i = 0; i < numChunks; i++) {
|
|
61
|
+
const outputFileName = path.join(
|
|
62
|
+
uniqueOutputPath,
|
|
63
|
+
`chunk-${i + 1}-${path.parse(inputPath).name}.mp3`
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
const chunkPromise = processChunk(
|
|
67
|
+
inputPath,
|
|
68
|
+
outputFileName,
|
|
69
|
+
i * chunkDurationInSeconds,
|
|
70
|
+
chunkDurationInSeconds
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
chunkPromises.push(chunkPromise);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return { chunkPromises, uniqueOutputPath }
|
|
77
|
+
} catch (err) {
|
|
78
|
+
console.error('Error occurred during the splitting process:', err);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const ytdlDownload = async (url, filename) => {
|
|
83
|
+
return new Promise((resolve, reject) => {
|
|
84
|
+
const video = ytdl(url, { quality: 'highestaudio' });
|
|
85
|
+
let lastLoggedTime = Date.now();
|
|
86
|
+
|
|
87
|
+
video.on('error', (error) => {
|
|
88
|
+
reject(error);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
video.on('progress', (chunkLength, downloaded, total) => {
|
|
92
|
+
const currentTime = Date.now();
|
|
93
|
+
if (currentTime - lastLoggedTime >= 2000) { // Log every 2 seconds
|
|
94
|
+
const percent = downloaded / total;
|
|
95
|
+
console.log(`${(percent * 100).toFixed(2)}% downloaded ${url}`);
|
|
96
|
+
lastLoggedTime = currentTime;
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
video.pipe(fs.createWriteStream(filename))
|
|
101
|
+
.on('finish', () => {
|
|
102
|
+
resolve();
|
|
103
|
+
})
|
|
104
|
+
.on('error', (error) => {
|
|
105
|
+
reject(error);
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
const processYoutubeUrl = async (url) => {
|
|
111
|
+
try {
|
|
112
|
+
const outputFileName = path.join(os.tmpdir(), `${uuidv4()}.mp3`);
|
|
113
|
+
await ytdlDownload(url, outputFileName);
|
|
114
|
+
return outputFileName;
|
|
115
|
+
} catch (e) {
|
|
116
|
+
console.log(e);
|
|
117
|
+
throw e;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export {
|
|
122
|
+
splitMediaFile, processYoutubeUrl
|
|
123
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"bindings": [
|
|
3
|
+
{
|
|
4
|
+
"authLevel": "function",
|
|
5
|
+
"type": "httpTrigger",
|
|
6
|
+
"direction": "in",
|
|
7
|
+
"name": "req",
|
|
8
|
+
"methods": [
|
|
9
|
+
"get",
|
|
10
|
+
"post",
|
|
11
|
+
"delete"
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"type": "http",
|
|
16
|
+
"direction": "out",
|
|
17
|
+
"name": "res"
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
3
|
+
function isValidYoutubeUrl(url) {
|
|
4
|
+
const regex = /^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$/;
|
|
5
|
+
return regex.test(url);
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
async function deleteTempPath(path) {
|
|
9
|
+
try {
|
|
10
|
+
if (!path) {
|
|
11
|
+
console.log('Temporary path is not defined.');
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
if (!fs.existsSync(path)) {
|
|
15
|
+
console.log(`Temporary path ${path} does not exist.`);
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
const stats = fs.statSync(path);
|
|
19
|
+
if (stats.isFile()) {
|
|
20
|
+
fs.unlinkSync(path);
|
|
21
|
+
console.log(`Temporary file ${path} deleted successfully.`);
|
|
22
|
+
} else if (stats.isDirectory()) {
|
|
23
|
+
fs.rmSync(path, { recursive: true });
|
|
24
|
+
console.log(`Temporary folder ${path} and its contents deleted successfully.`);
|
|
25
|
+
}
|
|
26
|
+
} catch (err) {
|
|
27
|
+
console.error('Error occurred while deleting the temporary path:', err);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export {
|
|
32
|
+
isValidYoutubeUrl, deleteTempPath
|
|
33
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { processYoutubeUrl, splitMediaFile } from './fileChunker.js';
|
|
2
|
+
import { saveFileToBlob, deleteBlob, uploadBlob } from './blobHandler.js';
|
|
3
|
+
import { publishRequestProgress } from './redis.js';
|
|
4
|
+
import { deleteTempPath, isValidYoutubeUrl } from './helper.js';
|
|
5
|
+
import { moveFileToPublicFolder, deleteFolder } from './localFileHandler.js';
|
|
6
|
+
|
|
7
|
+
const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
|
|
8
|
+
console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async function main(context, req) {
|
|
12
|
+
context.log('Starting req processing..');
|
|
13
|
+
// await publishRequestProgress({ requestId:222, progress: 0, data: null });
|
|
14
|
+
|
|
15
|
+
// Clean up blob when request delete which means processing marked completed
|
|
16
|
+
if (req.method.toLowerCase() === `delete`) {
|
|
17
|
+
const { requestId } = req.query;
|
|
18
|
+
if (!requestId) {
|
|
19
|
+
context.res = {
|
|
20
|
+
status: 400,
|
|
21
|
+
body: "Please pass a requestId on the query string"
|
|
22
|
+
};
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
const result = useAzure ? await deleteBlob(requestId) : await deleteFolder(requestId);
|
|
26
|
+
context.res = {
|
|
27
|
+
body: result
|
|
28
|
+
};
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (req.method.toLowerCase() === `post`) {
|
|
33
|
+
const { url } = await uploadBlob(context, req, !useAzure);
|
|
34
|
+
context.log(`File url: ${url}`);
|
|
35
|
+
return
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const { uri, requestId } = req.body?.params || req.query;
|
|
39
|
+
if (!uri || !requestId) {
|
|
40
|
+
context.res = {
|
|
41
|
+
status: 400,
|
|
42
|
+
body: "Please pass a uri and requestId on the query string or in the request body"
|
|
43
|
+
};
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let totalCount = 0;
|
|
48
|
+
let completedCount = 0;
|
|
49
|
+
let numberOfChunks;
|
|
50
|
+
|
|
51
|
+
let file = uri;
|
|
52
|
+
let folder;
|
|
53
|
+
const isYoutubeUrl = isValidYoutubeUrl(uri);
|
|
54
|
+
|
|
55
|
+
const result = [];
|
|
56
|
+
|
|
57
|
+
const sendProgress = async (data = null) => {
|
|
58
|
+
completedCount++;
|
|
59
|
+
const progress = completedCount / totalCount;
|
|
60
|
+
await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
if (isYoutubeUrl) {
|
|
65
|
+
// totalCount += 1; // extra 1 step for youtube download
|
|
66
|
+
file = await processYoutubeUrl(file);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(file);
|
|
70
|
+
folder = uniqueOutputPath;
|
|
71
|
+
|
|
72
|
+
numberOfChunks = chunkPromises.length; // for progress reporting
|
|
73
|
+
totalCount += chunkPromises.length * 4; // 4 steps for each chunk (download and upload)
|
|
74
|
+
// isYoutubeUrl && sendProgress(); // send progress for youtube download after total count is calculated
|
|
75
|
+
|
|
76
|
+
// sequential download of chunks
|
|
77
|
+
const chunks = [];
|
|
78
|
+
for (const chunkPromise of chunkPromises) {
|
|
79
|
+
chunks.push(await chunkPromise);
|
|
80
|
+
sendProgress();
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// sequential processing of chunks
|
|
84
|
+
for (const chunk of chunks) {
|
|
85
|
+
const blobName = useAzure ? await saveFileToBlob(chunk, requestId) : await moveFileToPublicFolder(chunk, requestId);
|
|
86
|
+
result.push(blobName);
|
|
87
|
+
context.log(`Saved chunk as: ${blobName}`);
|
|
88
|
+
sendProgress();
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// parallel processing, dropped
|
|
92
|
+
// result = await Promise.all(mediaSplit.chunks.map(processChunk));
|
|
93
|
+
|
|
94
|
+
} catch (error) {
|
|
95
|
+
console.error("An error occurred:", error);
|
|
96
|
+
} finally {
|
|
97
|
+
try {
|
|
98
|
+
isYoutubeUrl && (await deleteTempPath(file));
|
|
99
|
+
folder && (await deleteTempPath(folder));
|
|
100
|
+
} catch (error) {
|
|
101
|
+
console.error("An error occurred while deleting:", error);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
console.log(`result: ${result}`);
|
|
107
|
+
|
|
108
|
+
context.res = {
|
|
109
|
+
// status: 200, /* Defaults to 200 */
|
|
110
|
+
body: result
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// main(console, { query: { uri: "https://www.youtube.com/watch?v=QH2-TGUlwu4" } });
|
|
115
|
+
|
|
116
|
+
export default main;
|