@aj-archipelago/cortex 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { BlobServiceClient } from '@azure/storage-blob';
4
+ import { v4 as uuidv4 } from 'uuid';
5
+ import Busboy from 'busboy';
6
+ import { PassThrough } from 'stream';
7
+ import { pipeline as _pipeline } from 'stream';
8
+ import { promisify } from 'util';
9
+ const pipeline = promisify(_pipeline);
10
+ import { join } from 'path';
11
+
12
+
13
+ import { publicFolder, port, ipAddress } from "./start.js";
14
+
15
+ const getBlobClient = () => {
16
+ const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
17
+ const containerName = process.env.AZURE_STORAGE_CONTAINER_NAME;
18
+ if (!connectionString || !containerName) {
19
+ throw new Error('Missing Azure Storage connection string or container name environment variable');
20
+ }
21
+
22
+ const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
23
+ const containerClient = blobServiceClient.getContainerClient(containerName);
24
+
25
+ return { blobServiceClient, containerClient };
26
+ }
27
+
28
+ async function saveFileToBlob(chunkPath, requestId) {
29
+ const { containerClient } = getBlobClient();
30
+ // Use the filename with a UUID as the blob name
31
+ const blobName = `${requestId}/${uuidv4()}_${path.basename(chunkPath)}`;
32
+
33
+ // Create a read stream for the chunk file
34
+ const fileStream = fs.createReadStream(chunkPath);
35
+
36
+ // Upload the chunk to Azure Blob Storage using the stream
37
+ const blockBlobClient = containerClient.getBlockBlobClient(blobName);
38
+ await blockBlobClient.uploadStream(fileStream);
39
+
40
+ // Return the full URI of the uploaded blob
41
+ const blobUrl = blockBlobClient.url;
42
+ return blobUrl;
43
+ }
44
+
45
+ //deletes blob that has the requestId
46
+ async function deleteBlob(requestId) {
47
+ if (!requestId) throw new Error('Missing requestId parameter');
48
+ const { containerClient } = getBlobClient();
49
+ // List the blobs in the container with the specified prefix
50
+ const blobs = containerClient.listBlobsFlat({ prefix: `${requestId}/` });
51
+
52
+ const result = []
53
+ // Iterate through the blobs
54
+ for await (const blob of blobs) {
55
+ // Delete the matching blob
56
+ const blockBlobClient = containerClient.getBlockBlobClient(blob.name);
57
+ await blockBlobClient.delete();
58
+ console.log(`Cleaned blob: ${blob.name}`);
59
+ result.push(blob.name);
60
+ }
61
+
62
+ return result
63
+ }
64
+
65
+ async function uploadBlob(context, req, saveToLocal = false) {
66
+ return new Promise((resolve, reject) => {
67
+ try {
68
+ const busboy = Busboy({ headers: req.headers });
69
+ let requestId = uuidv4();
70
+
71
+ busboy.on('field', (fieldname, value) => {
72
+ if (fieldname === 'requestId') {
73
+ requestId = value;
74
+ }
75
+ });
76
+
77
+ busboy.on('file', async (fieldname, file, info) => {
78
+ if (saveToLocal) {
79
+ // Create the target folder if it doesn't exist
80
+ const localPath = join(publicFolder, requestId);
81
+ fs.mkdirSync(localPath, { recursive: true });
82
+
83
+ const filename = `${uuidv4()}_${info.filename}`;
84
+ const destinationPath = `${localPath}/${filename}`;
85
+
86
+ await pipeline(file, fs.createWriteStream(destinationPath));
87
+
88
+ const message = `File '${filename}' saved to folder successfully.`;
89
+ context.log(message);
90
+
91
+ const url = `http://${ipAddress}:${port}/files/${requestId}/${filename}`;
92
+
93
+ const body = { message, url };
94
+
95
+ context.res = {
96
+ status: 200,
97
+ body,
98
+ };
99
+
100
+
101
+ resolve(body); // Resolve the promise
102
+ } else {
103
+ const { containerClient } = getBlobClient();
104
+ const filename = `${requestId}/${uuidv4()}_${info.filename}`;
105
+
106
+ const blockBlobClient = containerClient.getBlockBlobClient(filename);
107
+
108
+ const passThroughStream = new PassThrough();
109
+ file.pipe(passThroughStream);
110
+
111
+ await blockBlobClient.uploadStream(passThroughStream);
112
+
113
+ const message = `File '${filename}' uploaded successfully.`;
114
+ const url = blockBlobClient.url;
115
+ context.log(message);
116
+ const body = { message, url };
117
+
118
+ context.res = {
119
+ status: 200,
120
+ body,
121
+ };
122
+
123
+ resolve(body); // Resolve the promise
124
+ }
125
+ });
126
+
127
+ busboy.on('error', (error) => {
128
+ context.log.error('Error processing file upload:', error);
129
+ context.res = {
130
+ status: 500,
131
+ body: 'Error processing file upload.',
132
+ };
133
+ reject(error); // Reject the promise
134
+ });
135
+
136
+ req.pipe(busboy);
137
+ } catch (error) {
138
+ context.log.error('Error processing file upload:', error);
139
+ context.res = {
140
+ status: 500,
141
+ body: 'Error processing file upload.',
142
+ };
143
+ reject(error); // Reject the promise
144
+ }
145
+ });
146
+ }
147
+
148
+ export {
149
+ saveFileToBlob, deleteBlob, uploadBlob
150
+ }
@@ -0,0 +1,167 @@
1
+ import fs from 'fs';
2
+ import { path as ffmpegPath } from '@ffmpeg-installer/ffmpeg';
3
+ import { path as ffprobePath } from '@ffprobe-installer/ffprobe';
4
+ import path from 'path';
5
+ import ffmpeg from 'fluent-ffmpeg';
6
+ import { v4 as uuidv4 } from 'uuid';
7
+ import os from 'os';
8
+ import ytdl from 'ytdl-core';
9
+ import { promisify } from 'util';
10
+ import axios from 'axios';
11
+ import { ensureEncoded } from './helper.js';
12
+
13
+ ffmpeg.setFfmpegPath(ffmpegPath);
14
+ ffmpeg.setFfprobePath(ffprobePath);
15
+ console.log(`ffmpegPath: ${ffmpegPath}`);
16
+ console.log(`ffprobePath: ${ffprobePath}`);
17
+
18
+ const ffmpegProbe = promisify(ffmpeg.ffprobe);
19
+
20
+
21
+ async function processChunk(inputPath, outputFileName, start, duration) {
22
+ return new Promise((resolve, reject) => {
23
+ ffmpeg(inputPath)
24
+ .seekInput(start)
25
+ .duration(duration)
26
+ .format('mp3')
27
+ .audioCodec('libmp3lame')
28
+ .audioBitrate(128)
29
+ .on('start', (cmd) => {
30
+ console.log(`Started FFmpeg with command: ${cmd}`);
31
+ })
32
+ .on('error', (err) => {
33
+ console.error(`Error occurred while processing chunk:`, err);
34
+ reject(err);
35
+ })
36
+ .on('end', () => {
37
+ console.log(`Finished processing chunk`);
38
+ resolve(outputFileName);
39
+ })
40
+ .save(outputFileName);
41
+ });
42
+ }
43
+
44
+ const generateUniqueFolderName = () => {
45
+ const uniqueFolderName = uuidv4();
46
+ const tempFolderPath = os.tmpdir(); // Get the system's temporary folder
47
+ const uniqueOutputPath = path.join(tempFolderPath, uniqueFolderName);
48
+ return uniqueOutputPath;
49
+ }
50
+
51
+ async function downloadFile(url, outputPath) {
52
+ try {
53
+ // Make an HTTP request for the file
54
+ const response = await axios.get(url, { responseType: 'stream' });
55
+
56
+ // Create a writable file stream to save the file
57
+ const fileStream = fs.createWriteStream(outputPath);
58
+
59
+ // Pipe the response data into the file stream
60
+ response.data.pipe(fileStream);
61
+
62
+ // Wait for the file stream to finish writing
63
+ await new Promise((resolve, reject) => {
64
+ fileStream.on('finish', resolve);
65
+ fileStream.on('error', reject);
66
+ });
67
+
68
+ console.log(`Downloaded file saved to: ${outputPath}`);
69
+ } catch (error) {
70
+ console.error(`Error downloading file from ${url}:`, error);
71
+ throw error;
72
+ }
73
+ }
74
+
75
+ async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
76
+ try {
77
+ // Create unique folder
78
+ const uniqueOutputPath = generateUniqueFolderName();
79
+ fs.mkdirSync(uniqueOutputPath, { recursive: true });
80
+
81
+ // Download the file if it's not a local file
82
+ const isUrl = /^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$/i.test(inputPath);
83
+ if (isUrl) {
84
+ inputPath = ensureEncoded(inputPath);
85
+ // Extract the original file name from the URL
86
+ const urlObj = new URL(inputPath);
87
+ const originalFileName = path.basename(urlObj.pathname);
88
+
89
+ // Use the original file name when saving the downloaded file
90
+ const downloadPath = path.join(uniqueOutputPath, originalFileName);
91
+ await downloadFile(inputPath, downloadPath);
92
+ inputPath = downloadPath;
93
+ }
94
+
95
+
96
+ const metadata = await ffmpegProbe(inputPath);
97
+ const duration = metadata.format.duration;
98
+ const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
99
+
100
+ const chunkPromises = [];
101
+
102
+
103
+
104
+ for (let i = 0; i < numChunks; i++) {
105
+ const outputFileName = path.join(
106
+ uniqueOutputPath,
107
+ `chunk-${i + 1}-${path.parse(inputPath).name}.mp3`
108
+ );
109
+
110
+ const chunkPromise = processChunk(
111
+ inputPath,
112
+ outputFileName,
113
+ i * chunkDurationInSeconds,
114
+ chunkDurationInSeconds
115
+ );
116
+
117
+ chunkPromises.push(chunkPromise);
118
+ }
119
+
120
+ return { chunkPromises, uniqueOutputPath };
121
+ } catch (err) {
122
+ console.error('Error occurred during the splitting process:', err);
123
+ }
124
+ }
125
+
126
+ const ytdlDownload = async (url, filename) => {
127
+ return new Promise((resolve, reject) => {
128
+ const video = ytdl(url, { quality: 'highestaudio' });
129
+ let lastLoggedTime = Date.now();
130
+
131
+ video.on('error', (error) => {
132
+ reject(error);
133
+ });
134
+
135
+ video.on('progress', (chunkLength, downloaded, total) => {
136
+ const currentTime = Date.now();
137
+ if (currentTime - lastLoggedTime >= 2000) { // Log every 2 seconds
138
+ const percent = downloaded / total;
139
+ console.log(`${(percent * 100).toFixed(2)}% downloaded ${url}`);
140
+ lastLoggedTime = currentTime;
141
+ }
142
+ });
143
+
144
+ video.pipe(fs.createWriteStream(filename))
145
+ .on('finish', () => {
146
+ resolve();
147
+ })
148
+ .on('error', (error) => {
149
+ reject(error);
150
+ });
151
+ });
152
+ };
153
+
154
+ const processYoutubeUrl = async (url) => {
155
+ try {
156
+ const outputFileName = path.join(os.tmpdir(), `${uuidv4()}.mp3`);
157
+ await ytdlDownload(url, outputFileName);
158
+ return outputFileName;
159
+ } catch (e) {
160
+ console.log(e);
161
+ throw e;
162
+ }
163
+ }
164
+
165
+ export {
166
+ splitMediaFile, processYoutubeUrl
167
+ };
@@ -0,0 +1,20 @@
1
+ {
2
+ "bindings": [
3
+ {
4
+ "authLevel": "function",
5
+ "type": "httpTrigger",
6
+ "direction": "in",
7
+ "name": "req",
8
+ "methods": [
9
+ "get",
10
+ "post",
11
+ "delete"
12
+ ]
13
+ },
14
+ {
15
+ "type": "http",
16
+ "direction": "out",
17
+ "name": "res"
18
+ }
19
+ ]
20
+ }
@@ -0,0 +1,45 @@
1
+ import fs from 'fs';
2
+
3
+ function isValidYoutubeUrl(url) {
4
+ const regex = /^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.?be)\/.+$/;
5
+ return regex.test(url);
6
+ }
7
+
8
+ async function deleteTempPath(path) {
9
+ try {
10
+ if (!path) {
11
+ console.log('Temporary path is not defined.');
12
+ return;
13
+ }
14
+ if (!fs.existsSync(path)) {
15
+ console.log(`Temporary path ${path} does not exist.`);
16
+ return;
17
+ }
18
+ const stats = fs.statSync(path);
19
+ if (stats.isFile()) {
20
+ fs.unlinkSync(path);
21
+ console.log(`Temporary file ${path} deleted successfully.`);
22
+ } else if (stats.isDirectory()) {
23
+ fs.rmSync(path, { recursive: true });
24
+ console.log(`Temporary folder ${path} and its contents deleted successfully.`);
25
+ }
26
+ } catch (err) {
27
+ console.error('Error occurred while deleting the temporary path:', err);
28
+ }
29
+ }
30
+
31
+ function ensureEncoded(url) {
32
+ try {
33
+ const decodedUrl = decodeURI(url);
34
+ if (decodedUrl === url) {
35
+ return encodeURI(url);
36
+ }
37
+ return url;
38
+ } catch (e) {
39
+ return url;
40
+ }
41
+ }
42
+
43
+ export {
44
+ isValidYoutubeUrl, deleteTempPath, ensureEncoded
45
+ }
@@ -0,0 +1,114 @@
1
+ import { processYoutubeUrl, splitMediaFile } from './fileChunker.js';
2
+ import { saveFileToBlob, deleteBlob, uploadBlob } from './blobHandler.js';
3
+ import { publishRequestProgress } from './redis.js';
4
+ import { deleteTempPath, ensureEncoded, isValidYoutubeUrl } from './helper.js';
5
+ import { moveFileToPublicFolder, deleteFolder } from './localFileHandler.js';
6
+
7
+ const useAzure = process.env.AZURE_STORAGE_CONNECTION_STRING ? true : false;
8
+ console.log(useAzure ? 'Using Azure Storage' : 'Using local file system');
9
+
10
+
11
+ async function main(context, req) {
12
+ context.log('Starting req processing..');
13
+
14
+ // Clean up blob when request delete which means processing marked completed
15
+ if (req.method.toLowerCase() === `delete`) {
16
+ const { requestId } = req.query;
17
+ if (!requestId) {
18
+ context.res = {
19
+ status: 400,
20
+ body: "Please pass a requestId on the query string"
21
+ };
22
+ return;
23
+ }
24
+ const result = useAzure ? await deleteBlob(requestId) : await deleteFolder(requestId);
25
+ context.res = {
26
+ body: result
27
+ };
28
+ return;
29
+ }
30
+
31
+ if (req.method.toLowerCase() === `post`) {
32
+ const { url } = await uploadBlob(context, req, !useAzure);
33
+ context.log(`File url: ${url}`);
34
+ return
35
+ }
36
+
37
+ const { uri, requestId } = req.body?.params || req.query;
38
+ if (!uri || !requestId) {
39
+ context.res = {
40
+ status: 400,
41
+ body: "Please pass a uri and requestId on the query string or in the request body"
42
+ };
43
+ return;
44
+ }
45
+
46
+ let totalCount = 0;
47
+ let completedCount = 0;
48
+ let numberOfChunks;
49
+
50
+ let file = ensureEncoded(uri); // encode url to handle special characters
51
+ let folder;
52
+ const isYoutubeUrl = isValidYoutubeUrl(uri);
53
+
54
+ const result = [];
55
+
56
+ const sendProgress = async (data = null) => {
57
+ completedCount++;
58
+ const progress = completedCount / totalCount;
59
+ await publishRequestProgress({ requestId, progress, completedCount, totalCount, numberOfChunks, data });
60
+ }
61
+
62
+ try {
63
+ if (isYoutubeUrl) {
64
+ // totalCount += 1; // extra 1 step for youtube download
65
+ file = await processYoutubeUrl(file);
66
+ }
67
+
68
+ const { chunkPromises, uniqueOutputPath } = await splitMediaFile(file);
69
+ folder = uniqueOutputPath;
70
+
71
+ numberOfChunks = chunkPromises.length; // for progress reporting
72
+ totalCount += chunkPromises.length * 4; // 4 steps for each chunk (download and upload)
73
+ // isYoutubeUrl && sendProgress(); // send progress for youtube download after total count is calculated
74
+
75
+ // sequential download of chunks
76
+ const chunks = [];
77
+ for (const chunkPromise of chunkPromises) {
78
+ chunks.push(await chunkPromise);
79
+ sendProgress();
80
+ }
81
+
82
+ // sequential processing of chunks
83
+ for (const chunk of chunks) {
84
+ const blobName = useAzure ? await saveFileToBlob(chunk, requestId) : await moveFileToPublicFolder(chunk, requestId);
85
+ result.push(blobName);
86
+ context.log(`Saved chunk as: ${blobName}`);
87
+ sendProgress();
88
+ }
89
+
90
+ // parallel processing, dropped
91
+ // result = await Promise.all(mediaSplit.chunks.map(processChunk));
92
+
93
+ } catch (error) {
94
+ console.error("An error occurred:", error);
95
+ } finally {
96
+ try {
97
+ isYoutubeUrl && (await deleteTempPath(file));
98
+ folder && (await deleteTempPath(folder));
99
+ } catch (error) {
100
+ console.error("An error occurred while deleting:", error);
101
+ }
102
+ }
103
+
104
+
105
+ console.log(`result: ${result}`);
106
+
107
+ context.res = {
108
+ // status: 200, /* Defaults to 200 */
109
+ body: result
110
+ };
111
+ }
112
+
113
+
114
+ export default main;
@@ -0,0 +1,36 @@
1
+ import { promises as fs } from 'fs';
2
+ import { join, basename } from 'path';
3
+ import { v4 as uuidv4 } from 'uuid';
4
+
5
+ import { publicFolder, port, ipAddress } from "./start.js";
6
+
7
+
8
+ async function moveFileToPublicFolder(chunkPath, requestId) {
9
+ // Use the filename with a UUID as the blob name
10
+ const filename = `${requestId}/${uuidv4()}_${basename(chunkPath)}`;
11
+
12
+ // Create the target folder if it doesn't exist
13
+ const targetFolder = join(publicFolder, requestId);
14
+ await fs.mkdir(targetFolder, { recursive: true });
15
+
16
+ // Move the file to the target folder
17
+ const targetPath = join(targetFolder, basename(filename));
18
+ await fs.rename(chunkPath, targetPath);
19
+
20
+ // Return the complete URL of the file
21
+ const fileUrl = `http://${ipAddress}:${port}/files/${filename}`;
22
+ // const fileUrl = `http://localhost:${port}/files/${filename}`;
23
+ return fileUrl;
24
+ }
25
+
26
+ async function deleteFolder(requestId) {
27
+ if (!requestId) throw new Error('Missing requestId parameter');
28
+ const targetFolder = join(publicFolder, requestId);
29
+ await fs.rm(targetFolder, { recursive: true });
30
+ console.log(`Cleaned folder: ${targetFolder}`);
31
+ }
32
+
33
+
34
+ export {
35
+ moveFileToPublicFolder, deleteFolder
36
+ };