@aj-archipelago/cortex 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { BlobServiceClient } from '@azure/storage-blob';
4
+ import { v4 as uuidv4 } from 'uuid';
5
+ import Busboy from 'busboy';
6
+ import { PassThrough } from 'stream';
7
+ import { pipeline as _pipeline } from 'stream';
8
+ import { promisify } from 'util';
9
+ const pipeline = promisify(_pipeline);
10
+ import { join } from 'path';
11
+
12
+
13
+ import { publicFolder, port, ipAddress } from "./start.js";
14
+
15
+ const getBlobClient = () => {
16
+ const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
17
+ const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
18
+ if (!connectionString || !containerName) {
19
+ throw new Error('Missing Azure Storage connection string or container name environment variable');
20
+ }
21
+
22
+ const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
23
+ const containerClient = blobServiceClient.getContainerClient(containerName);
24
+
25
+ return { blobServiceClient, containerClient };
26
+ }
27
+
28
+ async function saveFileToBlob(chunkPath, requestId) {
29
+ const { containerClient } = getBlobClient();
30
+ // Use the filename with a UUID as the blob name
31
+ const blobName = `${requestId}/${uuidv4()}_${path.basename(chunkPath)}`;
32
+
33
+ // Create a read stream for the chunk file
34
+ const fileStream = fs.createReadStream(chunkPath);
35
+
36
+ // Upload the chunk to Azure Blob Storage using the stream
37
+ const blockBlobClient = containerClient.getBlockBlobClient(blobName);
38
+ await blockBlobClient.uploadStream(fileStream);
39
+
40
+ // Return the full URI of the uploaded blob
41
+ const blobUrl = blockBlobClient.url;
42
+ return blobUrl;
43
+ }
44
+
45
+ //deletes blob that has the requestId
46
+ async function deleteBlob(requestId) {
47
+ if (!requestId) throw new Error('Missing requestId parameter');
48
+ const { containerClient } = getBlobClient();
49
+ // List the blobs in the container with the specified prefix
50
+ const blobs = containerClient.listBlobsFlat({ prefix: `${requestId}/` });
51
+
52
+ const result = []
53
+ // Iterate through the blobs
54
+ for await (const blob of blobs) {
55
+ // Delete the matching blob
56
+ const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
57
+ await blockBlobClient.delete();
58
+ console.log(`Cleaned blob: ${blob.name}`);
59
+ result.push(blob.name);
60
+ }
61
+
62
+ return result
63
+ }
64
+
65
+ async function uploadBlob(context, req, saveToLocal = false) {
66
+ return new Promise((resolve, reject) => {
67
+ try {
68
+ const busboy = Busboy({ headers: req.headers });
69
+ let requestId = uuidv4();
70
+
71
+ busboy.on('field', (fieldname, value) => {
72
+ if (fieldname === 'requestId') {
73
+ requestId = value;
74
+ }
75
+ });
76
+
77
+ busboy.on('file', async (fieldname, file, info) => {
78
+ if (saveToLocal) {
79
+ // Create the target folder if it doesn't exist
80
+ const localPath = join(publicFolder, requestId);
81
+ fs.mkdirSync(localPath, { recursive: true });
82
+
83
+ const filename = `${uuidv4()}_${info.filename}`;
84
+ const destinationPath = `${localPath}/${filename}`;
85
+
86
+ await pipeline(file, fs.createWriteStream(destinationPath));
87
+
88
+ const message = `File '${filename}' saved to folder successfully.`;
89
+ context.log(message);
90
+
91
+ const url = `http://${ipAddress}:${port}/files/${requestId}/${filename}`;
92
+
93
+ const body = { message, url };
94
+
95
+ context.res = {
96
+ status: 200,
97
+ body,
98
+ };
99
+
100
+
101
+ resolve(body); // Resolve the promise
102
+ } else {
103
+ const { containerClient } = getBlobClient();
104
+ const filename = `${requestId}/${uuidv4()}_${info.filename}`;
105
+
106
+ const blockBlobClient = containerClient.getBlockBlobClient(filename);
107
+
108
+ const passThroughStream = new PassThrough();
109
+ file.pipe(passThroughStream);
110
+
111
+ await blockBlobClient.uploadStream(passThroughStream);
112
+
113
+ const message = `File '${filename}' uploaded successfully.`;
114
+ const url = blockBlobClient.url;
115
+ context.log(message);
116
+ const body = { message, url };
117
+
118
+ context.res = {
119
+ status: 200,
120
+ body,
121
+ };
122
+
123
+ resolve(body); // Resolve the promise
124
+ }
125
+ });
126
+
127
+ busboy.on('error', (error) => {
128
+ context.log.error('Error processing file upload:', error);
129
+ context.res = {
130
+ status: 500,
131
+ body: 'Error processing file upload.',
132
+ };
133
+ reject(error); // Reject the promise
134
+ });
135
+
136
+ req.pipe(busboy);
137
+ } catch (error) {
138
+ context.log.error('Error processing file upload:', error);
139
+ context.res = {
140
+ status: 500,
141
+ body: 'Error processing file upload.',
142
+ };
143
+ reject(error); // Reject the promise
144
+ }
145
+ });
146
+ }
147
+
148
+ export {
149
+ saveFileToBlob, deleteBlob, uploadBlob
150
+ }
@@ -0,0 +1,123 @@
1
+ import fs from 'fs';
2
+ import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg';
3
+ import { path as ffprobePath } from '@ffprobe-installer/ffprobe';
4
+ import path from 'path';
5
+ import ffmpeg from 'fluent-ffmpeg';
6
+ import { v4 as uuidv4 } from 'uuid';
7
+ import os from 'os';
8
+ import ytdl from 'ytdl-core';
9
+ import { promisify } from 'util';
10
+
11
+ ffmpeg.setFfmpegPath(ffmpegPath);
12
+ ffmpeg.setFfprobePath(ffprobePath);
13
+ console.log(`ffmpegPath: ${ffmpegPath}`);
14
+ console.log(`ffprobePath: ${ffprobePath}`);
15
+
16
+ const ffmpegProbe = promisify(ffmpeg.ffprobe);
17
+
18
+ async function processChunk(inputPath, outputFileName, start, duration) {
19
+ return new Promise((resolve, reject) => {
20
+ ffmpeg(inputPath)
21
+ .seekInput(start)
22
+ .duration(duration)
23
+ .audioCodec('libmp3lame') // Ensure output is always in MP3 format
24
+ .on('start', (cmd) => {
25
+ console.log(`Started FFmpeg with command: ${cmd}`);
26
+ })
27
+ .on('error', (err) => {
28
+ console.error(`Error occurred while processing chunk:`, err);
29
+ reject(err);
30
+ })
31
+ .on('end', () => {
32
+ console.log(`Finished processing chunk`);
33
+ resolve(outputFileName);
34
+ })
35
+ .save(outputFileName);
36
+ });
37
+ }
38
+
39
+ const generateUniqueFolderName = () => {
40
+ const uniqueFolderName = uuidv4();
41
+ const tempFolderPath = os.tmpdir(); // Get the system's temporary folder
42
+ const uniqueOutputPath = path.join(tempFolderPath, uniqueFolderName);
43
+ return uniqueOutputPath;
44
+ }
45
+
46
+ async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
47
+ try {
48
+ const metadata = await ffmpegProbe(inputPath);
49
+ const duration = metadata.format.duration;
50
+ const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
51
+
52
+ const chunkPromises = [];
53
+
54
+ const uniqueOutputPath = generateUniqueFolderName();
55
+
56
+ // Create unique folder
57
+ fs.mkdirSync(uniqueOutputPath, { recursive: true });
58
+
59
+
60
+ for (let i = 0; i < numChunks; i++) {
61
+ const outputFileName = path.join(
62
+ uniqueOutputPath,
63
+ `chunk-${i + 1}-${path.parse(inputPath).name}.mp3`
64
+ );
65
+
66
+ const chunkPromise = processChunk(
67
+ inputPath,
68
+ outputFileName,
69
+ i * chunkDurationInSeconds,
70
+ chunkDurationInSeconds
71
+ );
72
+
73
+ chunkPromises.push(chunkPromise);
74
+ }
75
+
76
+ return { chunkPromises, uniqueOutputPath }
77
+ } catch (err) {
78
+ console.error('Error occurred during the splitting process:', err);
79
+ }
80
+ }
81
+
82
+ const ytdlDownload = async (url, filename) => {
83
+ return new Promise((resolve, reject) => {
84
+ const video = ytdl(url, { quality: 'highestaudio' });
85
+ let lastLoggedTime = Date.now();
86
+
87
+ video.on('error', (error) => {
88
+ reject(error);
89
+ });
90
+
91
+ video.on('progress', (chunkLength, downloaded, total) => {
92
+ const currentTime = Date.now();
93
+ if (currentTime - lastLoggedTime >= 2000) { // Log every 2 seconds
94
+ const percent = downloaded / total;
95
+ console.log(`${(percent * 100).toFixed(2)}% downloaded ${url}`);
96
+ lastLoggedTime = currentTime;
97
+ }
98
+ });
99
+
100
+ video.pipe(fs.createWriteStream(filename))
101
+ .on('finish', () => {
102
+ resolve();
103
+ })
104
+ .on('error', (error) => {
105
+ reject(error);
106
+ });
107
+ });
108
+ };
109
+
110
+ const processYoutubeUrl = async (url) => {
111
+ try {
112
+ const outputFileName = path.join(os.tmpdir(), `${uuidv4()}.mp3`);
113
+ await ytdlDownload(url, outputFileName);
114
+ return outputFileName;
115
+ } catch (e) {
116
+ console.log(e);
117
+ throw e;
118
+ }
119
+ }
120
+
121
+ export {
122
+ splitMediaFile, processYoutubeUrl
123
+ };
@@ -0,0 +1,20 @@
1
+ {
2
+ "bindings": [
3
+ {
4
+ "authLevel": "function",
5
+ "type": "httpTrigger",
6
+ "direction": "in",
7
+ "name": "req",
8
+ "methods": [
9
+ "get",
10
+ "post",
11
+ "delete"
12
+ ]
13
+ },
14
+ {
15
+ "type": "http",
16
+ "direction": "out",
17
+ "name": "res"
18
+ }
19
+ ]
20
+ }
@@ -0,0 +1,33 @@
1
+ import fs from 'fs';
2
+
3
+ function isValidYoutubeUrl(url) {
4
+ const regex = /^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$/;
5
+ return regex.test(url);
6
+ }
7
+
8
+ async function deleteTempPath(path) {
9
+ try {
10
+ if (!path) {
11
+ console.log('Temporary path is not defined.');
12
+ return;
13
+ }
14
+ if (!fs.existsSync(path)) {
15
+ console.log(`Temporary path ${path} does not exist.`);
16
+ return;
17
+ }
18
+ const stats = fs.statSync(path);
19
+ if (stats.isFile()) {
20
+ fs.unlinkSync(path);
21
+ console.log(`Temporary file ${path} deleted successfully.`);
22
+ } else if (stats.isDirectory()) {
23
+ fs.rmSync(path, { recursive: true });
24
+ console.log(`Temporary folder ${path} and its contents deleted successfully.`);
25
+ }
26
+ } catch (err) {
27
+ console.error('Error occurred while deleting the temporary path:', err);
28
+ }
29
+ }
30
+
31
+ export {
32
+ isValidYoutubeUrl, deleteTempPath
33
+ }
@@ -0,0 +1,116 @@
1
+ import { processYoutubeUrl, splitMediaFile } from './fileChunker.js';
2
+ import { saveFileToBlob, deleteBlob, uploadBlob } from './blobHandler.js';
3
+ import { publishRequestProgress } from './redis.js';
4
+ import { deleteTempPath, isValidYoutubeUrl } from './helper.js';
5
+ import { moveFileToPublicFolder, deleteFolder } from './localFileHandler.js';
6
+
7
+ const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
8
+ console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
9
+
10
+
11
+ async function main(context, req) {
12
+ context.log('Starting req processing..');
13
+ // await publishRequestProgress({ requestId:222, progress: 0, data: null });
14
+
15
+ // Clean up blob when request delete which means processing marked completed
16
+ if (req.method.toLowerCase() === `delete`) {
17
+ const { requestId } = req.query;
18
+ if (!requestId) {
19
+ context.res = {
20
+ status: 400,
21
+ body: "Please pass a requestId on the query string"
22
+ };
23
+ return;
24
+ }
25
+ const result = useAzure ? await deleteBlob(requestId) : await deleteFolder(requestId);
26
+ context.res = {
27
+ body: result
28
+ };
29
+ return;
30
+ }
31
+
32
+ if (req.method.toLowerCase() === `post`) {
33
+ const { url } = await uploadBlob(context, req, !useAzure);
34
+ context.log(`File url: ${url}`);
35
+ return
36
+ }
37
+
38
+ const { uri, requestId } = req.body?.params || req.query;
39
+ if (!uri || !requestId) {
40
+ context.res = {
41
+ status: 400,
42
+ body: "Please pass a uri and requestId on the query string or in the request body"
43
+ };
44
+ return;
45
+ }
46
+
47
+ let totalCount = 0;
48
+ let completedCount = 0;
49
+ let numberOfChunks;
50
+
51
+ let file = uri;
52
+ let folder;
53
+ const isYoutubeUrl = isValidYoutubeUrl(uri);
54
+
55
+ const result = [];
56
+
57
+ const sendProgress = async (data = null) => {
58
+ completedCount++;
59
+ const progress = completedCount / totalCount;
60
+ await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
61
+ }
62
+
63
+ try {
64
+ if (isYoutubeUrl) {
65
+ // totalCount += 1; // extra 1 step for youtube download
66
+ file = await processYoutubeUrl(file);
67
+ }
68
+
69
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(file);
70
+ folder = uniqueOutputPath;
71
+
72
+ numberOfChunks = chunkPromises.length; // for progress reporting
73
+ totalCount += chunkPromises.length * 4; // 4 steps for each chunk (download and upload)
74
+ // isYoutubeUrl && sendProgress(); // send progress for youtube download after total count is calculated
75
+
76
+ // sequential download of chunks
77
+ const chunks = [];
78
+ for (const chunkPromise of chunkPromises) {
79
+ chunks.push(await chunkPromise);
80
+ sendProgress();
81
+ }
82
+
83
+ // sequential processing of chunks
84
+ for (const chunk of chunks) {
85
+ const blobName = useAzure ? await saveFileToBlob(chunk, requestId) : await moveFileToPublicFolder(chunk, requestId);
86
+ result.push(blobName);
87
+ context.log(`Saved chunk as: ${blobName}`);
88
+ sendProgress();
89
+ }
90
+
91
+ // parallel processing, dropped
92
+ // result = await Promise.all(mediaSplit.chunks.map(processChunk));
93
+
94
+ } catch (error) {
95
+ console.error("An error occurred:", error);
96
+ } finally {
97
+ try {
98
+ isYoutubeUrl && (await deleteTempPath(file));
99
+ folder && (await deleteTempPath(folder));
100
+ } catch (error) {
101
+ console.error("An error occurred while deleting:", error);
102
+ }
103
+ }
104
+
105
+
106
+ console.log(`result: ${result}`);
107
+
108
+ context.res = {
109
+ // status: 200, /* Defaults to 200 */
110
+ body: result
111
+ };
112
+ }
113
+
114
+ // main(console, { query: { uri: "https://www.youtube.com/watch?v=QH2-TGUlwu4" } });
115
+
116
+ export default main;
@@ -0,0 +1,36 @@
1
+ import { promises as fs } from 'fs';
2
+ import { join, basename } from 'path';
3
+ import { v4 as uuidv4 } from 'uuid';
4
+
5
+ import { publicFolder, port, ipAddress } from "./start.js";
6
+
7
+
8
+ async function moveFileToPublicFolder(chunkPath, requestId) {
9
+ // Use the filename with a UUID as the blob name
10
+ const filename = `${requestId}/${uuidv4()}_${basename(chunkPath)}`;
11
+
12
+ // Create the target folder if it doesn't exist
13
+ const targetFolder = join(publicFolder, requestId);
14
+ await fs.mkdir(targetFolder, { recursive: true });
15
+
16
+ // Move the file to the target folder
17
+ const targetPath = join(targetFolder, basename(filename));
18
+ await fs.rename(chunkPath, targetPath);
19
+
20
+ // Return the complete URL of the file
21
+ const fileUrl = `http://${ipAddress}:${port}/files/${filename}`;
22
+ // const fileUrl = `http://localhost:${port}/files/${filename}`;
23
+ return fileUrl;
24
+ }
25
+
26
+ async function deleteFolder(requestId) {
27
+ if (!requestId) throw new Error('Missing requestId parameter');
28
+ const targetFolder = join(publicFolder, requestId);
29
+ await fs.rm(targetFolder, { recursive: true });
30
+ console.log(`Cleaned folder: ${targetFolder}`);
31
+ }
32
+
33
+
34
+ export {
35
+ moveFileToPublicFolder, deleteFolder
36
+ };