@aj-archipelago/cortex 1.3.50 → 1.3.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
- package/helper-apps/cortex-file-handler/Dockerfile +1 -1
- package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +11 -6
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +4 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1056 -0
- package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +64 -48
- package/helper-apps/cortex-file-handler/src/docHelper.js +37 -0
- package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +97 -65
- package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +34 -25
- package/helper-apps/cortex-file-handler/src/index.js +608 -0
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +107 -0
- package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +23 -17
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +309 -0
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +57 -0
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
- package/helper-apps/cortex-file-handler/src/start.js +88 -0
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +90 -66
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +105 -108
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +462 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +984 -647
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +85 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/entityConstants.js +1 -1
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/blobHandler.js +0 -567
- package/helper-apps/cortex-file-handler/docHelper.js +0 -144
- package/helper-apps/cortex-file-handler/index.js +0 -440
- package/helper-apps/cortex-file-handler/localFileHandler.js +0 -108
- package/helper-apps/cortex-file-handler/start.js +0 -63
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -1,48 +1,42 @@
|
|
|
1
1
|
export const DOC_EXTENSIONS = [
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
2
|
+
'.txt',
|
|
3
|
+
'.json',
|
|
4
|
+
'.csv',
|
|
5
|
+
'.md',
|
|
6
|
+
'.xml',
|
|
7
|
+
'.js',
|
|
8
|
+
'.html',
|
|
9
|
+
'.css',
|
|
10
|
+
'.doc',
|
|
11
|
+
'.docx',
|
|
12
|
+
'.xls',
|
|
13
|
+
'.xlsx',
|
|
14
14
|
];
|
|
15
15
|
|
|
16
16
|
export const IMAGE_EXTENSIONS = [
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
'.jpg',
|
|
18
|
+
'.jpeg',
|
|
19
|
+
'.png',
|
|
20
|
+
'.webp',
|
|
21
|
+
'.heic',
|
|
22
|
+
'.heif',
|
|
23
|
+
'.pdf',
|
|
24
24
|
];
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
export const VIDEO_EXTENSIONS = [
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
];
|
|
37
|
-
|
|
38
|
-
export const AUDIO_EXTENSIONS = [
|
|
39
|
-
".wav",
|
|
40
|
-
".mp3",
|
|
41
|
-
".aac",
|
|
42
|
-
".ogg",
|
|
43
|
-
".flac"
|
|
27
|
+
'.mp4',
|
|
28
|
+
'.mpeg',
|
|
29
|
+
'.mov',
|
|
30
|
+
'.avi',
|
|
31
|
+
'.flv',
|
|
32
|
+
'.mpg',
|
|
33
|
+
'.webm',
|
|
34
|
+
'.wmv',
|
|
35
|
+
'.3gp',
|
|
44
36
|
];
|
|
45
37
|
|
|
38
|
+
export const AUDIO_EXTENSIONS = ['.wav', '.mp3', '.aac', '.ogg', '.flac'];
|
|
39
|
+
|
|
46
40
|
export const ACCEPTED_MIME_TYPES = {
|
|
47
41
|
// Document types
|
|
48
42
|
'text/plain': ['.txt'],
|
|
@@ -53,24 +47,35 @@ export const ACCEPTED_MIME_TYPES = {
|
|
|
53
47
|
'text/javascript': ['.js'],
|
|
54
48
|
'text/html': ['.html'],
|
|
55
49
|
'text/css': ['.css'],
|
|
56
|
-
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': [
|
|
57
|
-
|
|
50
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': [
|
|
51
|
+
'.docx',
|
|
52
|
+
],
|
|
53
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': [
|
|
54
|
+
'.xlsx',
|
|
55
|
+
],
|
|
58
56
|
'application/msword': ['.doc'],
|
|
59
57
|
'application/vnd.ms-excel': ['.xls'],
|
|
60
58
|
'application/vnd.ms-word.document.macroEnabled.12': ['.docm'],
|
|
61
59
|
'application/vnd.ms-excel.sheet.macroEnabled.12': ['.xlsm'],
|
|
62
60
|
'application/vnd.ms-word.template.macroEnabled.12': ['.dotm'],
|
|
63
61
|
'application/vnd.ms-excel.template.macroEnabled.12': ['.xltm'],
|
|
64
|
-
|
|
62
|
+
|
|
65
63
|
// Image types
|
|
66
64
|
'image/jpeg': ['.jpg', '.jpeg'],
|
|
67
65
|
'image/png': ['.png'],
|
|
68
66
|
'image/webp': ['.webp'],
|
|
69
67
|
'image/heic': ['.heic'],
|
|
70
68
|
'image/heif': ['.heif'],
|
|
71
|
-
'application/octet-stream': [
|
|
69
|
+
'application/octet-stream': [
|
|
70
|
+
'.jpg',
|
|
71
|
+
'.jpeg',
|
|
72
|
+
'.png',
|
|
73
|
+
'.webp',
|
|
74
|
+
'.heic',
|
|
75
|
+
'.heif',
|
|
76
|
+
],
|
|
72
77
|
'application/pdf': ['.pdf'],
|
|
73
|
-
|
|
78
|
+
|
|
74
79
|
// Audio types
|
|
75
80
|
'audio/wav': ['.wav'],
|
|
76
81
|
'audio/mpeg': ['.mp3'],
|
|
@@ -81,7 +86,7 @@ export const ACCEPTED_MIME_TYPES = {
|
|
|
81
86
|
'audio/x-m4a': ['.m4a'],
|
|
82
87
|
'audio/mp3': ['.mp3'],
|
|
83
88
|
'audio/mp4': ['.mp4'],
|
|
84
|
-
|
|
89
|
+
|
|
85
90
|
// Video types
|
|
86
91
|
'video/mp4': ['.mp4'],
|
|
87
92
|
'video/mpeg': ['.mpeg', '.mpg'],
|
|
@@ -108,8 +113,19 @@ export function getExtensionsForMimeType(mimeType) {
|
|
|
108
113
|
|
|
109
114
|
// Helper function to check if an extension is accepted
|
|
110
115
|
export function isAcceptedExtension(extension) {
|
|
111
|
-
return
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
+
return (
|
|
117
|
+
DOC_EXTENSIONS.includes(extension) ||
|
|
118
|
+
IMAGE_EXTENSIONS.includes(extension) ||
|
|
119
|
+
VIDEO_EXTENSIONS.includes(extension) ||
|
|
120
|
+
AUDIO_EXTENSIONS.includes(extension)
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export const CONVERTED_EXTENSIONS = [
|
|
125
|
+
'.doc',
|
|
126
|
+
'.docx',
|
|
127
|
+
'.xls',
|
|
128
|
+
'.xlsx',
|
|
129
|
+
'.ppt',
|
|
130
|
+
'.pptx',
|
|
131
|
+
];
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// Utility function for chunking text into smaller pieces
|
|
2
|
+
export function easyChunker(text) {
|
|
3
|
+
const result = [];
|
|
4
|
+
const n = 10000;
|
|
5
|
+
|
|
6
|
+
// If the text is less than n characters, just process it as is
|
|
7
|
+
if (text.length <= n) {
|
|
8
|
+
return [text];
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
let startIndex = 0;
|
|
12
|
+
while (startIndex < text.length) {
|
|
13
|
+
let endIndex = Math.min(startIndex + n, text.length);
|
|
14
|
+
|
|
15
|
+
// Make sure we don't split in the middle of a sentence
|
|
16
|
+
while (
|
|
17
|
+
endIndex > startIndex &&
|
|
18
|
+
text[endIndex] !== '.' &&
|
|
19
|
+
text[endIndex] !== ' '
|
|
20
|
+
) {
|
|
21
|
+
endIndex--;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// If we didn't find a sentence break, just split at n characters
|
|
25
|
+
if (endIndex === startIndex) {
|
|
26
|
+
endIndex = startIndex + n;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Push the chunk to the result array
|
|
30
|
+
result.push(text.substring(startIndex, endIndex));
|
|
31
|
+
|
|
32
|
+
// Move the start index to the next chunk
|
|
33
|
+
startIndex = endIndex;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return result;
|
|
37
|
+
}
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import { v4 as uuidv4 } from 'uuid';
|
|
2
|
+
import http from 'http';
|
|
3
|
+
import https from 'https';
|
|
5
4
|
import os from 'os';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import { Transform } from 'stream';
|
|
7
|
+
import { pipeline } from 'stream/promises';
|
|
6
8
|
import { promisify } from 'util';
|
|
9
|
+
|
|
7
10
|
import axios from 'axios';
|
|
11
|
+
import ffmpeg from 'fluent-ffmpeg';
|
|
12
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
13
|
+
|
|
8
14
|
import { ensureEncoded } from './helper.js';
|
|
9
|
-
import http from 'http';
|
|
10
|
-
import https from 'https';
|
|
11
|
-
import { pipeline } from 'stream/promises';
|
|
12
15
|
|
|
13
16
|
const ffmpegProbe = promisify(ffmpeg.ffprobe);
|
|
14
17
|
|
|
@@ -18,7 +21,6 @@ const tempDirectories = new Map(); // dir -> { createdAt, requestId }
|
|
|
18
21
|
|
|
19
22
|
// Temp directory cleanup
|
|
20
23
|
async function cleanupTempDirectories() {
|
|
21
|
-
|
|
22
24
|
for (const [dir, info] of tempDirectories) {
|
|
23
25
|
try {
|
|
24
26
|
// Cleanup directories older than 1 hour
|
|
@@ -43,7 +45,7 @@ setInterval(async () => {
|
|
|
43
45
|
}
|
|
44
46
|
}, CLEANUP_INTERVAL_MS);
|
|
45
47
|
|
|
46
|
-
// Process a single chunk with streaming
|
|
48
|
+
// Process a single chunk with streaming and progress tracking
|
|
47
49
|
async function processChunk(inputPath, outputFileName, start, duration) {
|
|
48
50
|
return new Promise((resolve, reject) => {
|
|
49
51
|
const command = ffmpeg(inputPath)
|
|
@@ -71,8 +73,11 @@ async function processChunk(inputPath, outputFileName, start, duration) {
|
|
|
71
73
|
resolve(outputFileName);
|
|
72
74
|
});
|
|
73
75
|
|
|
74
|
-
// Use
|
|
75
|
-
|
|
76
|
+
// Use pipeline for better error handling and backpressure
|
|
77
|
+
pipeline(
|
|
78
|
+
command,
|
|
79
|
+
fs.createWriteStream(outputFileName, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
|
|
80
|
+
).catch(reject);
|
|
76
81
|
});
|
|
77
82
|
}
|
|
78
83
|
|
|
@@ -80,38 +85,42 @@ const generateUniqueFolderName = () => {
|
|
|
80
85
|
const uniqueFolderName = uuidv4();
|
|
81
86
|
const tempFolderPath = os.tmpdir();
|
|
82
87
|
return path.join(tempFolderPath, uniqueFolderName);
|
|
83
|
-
}
|
|
88
|
+
};
|
|
84
89
|
|
|
85
90
|
async function downloadFile(url, outputPath) {
|
|
86
91
|
try {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
timeout:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
})
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
92
|
+
const agent = {
|
|
93
|
+
http: new http.Agent({
|
|
94
|
+
keepAlive: true,
|
|
95
|
+
maxSockets: 10,
|
|
96
|
+
maxFreeSockets: 10,
|
|
97
|
+
timeout: 60000,
|
|
98
|
+
}),
|
|
99
|
+
https: new https.Agent({
|
|
100
|
+
keepAlive: true,
|
|
101
|
+
maxSockets: 10,
|
|
102
|
+
maxFreeSockets: 10,
|
|
103
|
+
timeout: 60000,
|
|
104
|
+
}),
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
// Use the original URL without any decoding
|
|
108
|
+
const response = await axios.get(url, {
|
|
109
|
+
responseType: 'stream',
|
|
110
|
+
timeout: 30000,
|
|
111
|
+
maxContentLength: Infinity,
|
|
112
|
+
decompress: true,
|
|
113
|
+
httpAgent: agent.http,
|
|
114
|
+
httpsAgent: agent.https,
|
|
115
|
+
maxRedirects: 5,
|
|
116
|
+
validateStatus: (status) => status >= 200 && status < 300,
|
|
117
|
+
});
|
|
110
118
|
|
|
111
|
-
const writer = fs.createWriteStream(outputPath);
|
|
112
|
-
|
|
113
119
|
// Use pipeline for better error handling and memory management
|
|
114
|
-
await pipeline(
|
|
120
|
+
await pipeline(
|
|
121
|
+
response.data,
|
|
122
|
+
fs.createWriteStream(outputPath, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
|
|
123
|
+
);
|
|
115
124
|
|
|
116
125
|
if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
|
|
117
126
|
throw new Error('Download failed or file is empty');
|
|
@@ -124,25 +133,30 @@ async function downloadFile(url, outputPath) {
|
|
|
124
133
|
}
|
|
125
134
|
}
|
|
126
135
|
|
|
127
|
-
async function splitMediaFile(
|
|
136
|
+
async function splitMediaFile(
|
|
137
|
+
inputPath,
|
|
138
|
+
chunkDurationInSeconds = 500,
|
|
139
|
+
requestId = uuidv4(),
|
|
140
|
+
) {
|
|
128
141
|
let tempPath = null;
|
|
129
142
|
let uniqueOutputPath = null;
|
|
130
143
|
let inputStream = null;
|
|
131
|
-
|
|
144
|
+
|
|
132
145
|
try {
|
|
133
146
|
uniqueOutputPath = generateUniqueFolderName();
|
|
134
147
|
fs.mkdirSync(uniqueOutputPath, { recursive: true });
|
|
135
|
-
|
|
148
|
+
|
|
136
149
|
tempDirectories.set(uniqueOutputPath, {
|
|
137
150
|
createdAt: Date.now(),
|
|
138
|
-
requestId
|
|
151
|
+
requestId,
|
|
139
152
|
});
|
|
140
153
|
|
|
141
154
|
// Handle URL downloads with streaming
|
|
142
155
|
const isUrl = /^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$/i.test(inputPath);
|
|
143
156
|
if (isUrl) {
|
|
144
157
|
const urlObj = new URL(ensureEncoded(inputPath));
|
|
145
|
-
const originalFileName =
|
|
158
|
+
const originalFileName =
|
|
159
|
+
path.basename(urlObj.pathname) || 'downloaded_file';
|
|
146
160
|
tempPath = path.join(uniqueOutputPath, originalFileName);
|
|
147
161
|
console.log('Downloading file to:', tempPath);
|
|
148
162
|
await downloadFile(inputPath, tempPath);
|
|
@@ -155,9 +169,9 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
|
|
|
155
169
|
}
|
|
156
170
|
|
|
157
171
|
// Use a larger chunk size for better throughput while still managing memory
|
|
158
|
-
inputStream = fs.createReadStream(inputPath, {
|
|
172
|
+
inputStream = fs.createReadStream(inputPath, {
|
|
159
173
|
highWaterMark: 4 * 1024 * 1024, // 4MB chunks
|
|
160
|
-
autoClose: true
|
|
174
|
+
autoClose: true,
|
|
161
175
|
});
|
|
162
176
|
|
|
163
177
|
console.log('Probing file:', inputPath);
|
|
@@ -168,33 +182,50 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
|
|
|
168
182
|
|
|
169
183
|
const duration = metadata.format.duration;
|
|
170
184
|
const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
|
|
171
|
-
console.log(
|
|
185
|
+
console.log(
|
|
186
|
+
`Processing ${numChunks} chunks of ${chunkDurationInSeconds} seconds each`,
|
|
187
|
+
);
|
|
172
188
|
|
|
173
189
|
const chunkResults = new Array(numChunks); // Pre-allocate array to maintain order
|
|
174
190
|
const chunkOffsets = new Array(numChunks); // Pre-allocate offsets array
|
|
175
191
|
|
|
176
192
|
// Process chunks in parallel with a concurrency limit
|
|
177
|
-
const CONCURRENT_CHUNKS = 3; //
|
|
193
|
+
const CONCURRENT_CHUNKS = Math.min(3, os.cpus().length); // Use CPU count to determine concurrency
|
|
194
|
+
const chunkPromises = [];
|
|
195
|
+
|
|
178
196
|
for (let i = 0; i < numChunks; i += CONCURRENT_CHUNKS) {
|
|
179
197
|
const chunkBatch = [];
|
|
180
198
|
for (let j = 0; j < CONCURRENT_CHUNKS && i + j < numChunks; j++) {
|
|
181
199
|
const chunkIndex = i + j;
|
|
182
|
-
const outputFileName = path.join(
|
|
200
|
+
const outputFileName = path.join(
|
|
201
|
+
uniqueOutputPath,
|
|
202
|
+
`chunk-${chunkIndex + 1}-${path.parse(inputPath).name}.mp3`,
|
|
203
|
+
);
|
|
183
204
|
const offset = chunkIndex * chunkDurationInSeconds;
|
|
184
|
-
|
|
185
|
-
chunkBatch.push(
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
205
|
+
|
|
206
|
+
chunkBatch.push(
|
|
207
|
+
processChunk(
|
|
208
|
+
inputPath,
|
|
209
|
+
outputFileName,
|
|
210
|
+
offset,
|
|
211
|
+
chunkDurationInSeconds,
|
|
212
|
+
)
|
|
213
|
+
.then((result) => {
|
|
214
|
+
chunkResults[chunkIndex] = result; // Store in correct position
|
|
215
|
+
chunkOffsets[chunkIndex] = offset; // Store offset in correct position
|
|
216
|
+
console.log(`Completed chunk ${chunkIndex + 1}/${numChunks}`);
|
|
217
|
+
return result;
|
|
218
|
+
})
|
|
219
|
+
.catch((error) => {
|
|
220
|
+
console.error(
|
|
221
|
+
`Failed to process chunk ${chunkIndex + 1}:`,
|
|
222
|
+
error,
|
|
223
|
+
);
|
|
224
|
+
return null;
|
|
225
|
+
}),
|
|
226
|
+
);
|
|
196
227
|
}
|
|
197
|
-
|
|
228
|
+
|
|
198
229
|
// Wait for the current batch to complete before starting the next
|
|
199
230
|
await Promise.all(chunkBatch);
|
|
200
231
|
}
|
|
@@ -207,7 +238,11 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
|
|
|
207
238
|
throw new Error('No chunks were successfully processed');
|
|
208
239
|
}
|
|
209
240
|
|
|
210
|
-
return {
|
|
241
|
+
return {
|
|
242
|
+
chunkPromises: validChunks,
|
|
243
|
+
chunkOffsets: validOffsets,
|
|
244
|
+
uniqueOutputPath,
|
|
245
|
+
};
|
|
211
246
|
} catch (err) {
|
|
212
247
|
if (uniqueOutputPath && fs.existsSync(uniqueOutputPath)) {
|
|
213
248
|
try {
|
|
@@ -230,7 +265,4 @@ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500, requestId
|
|
|
230
265
|
}
|
|
231
266
|
}
|
|
232
267
|
|
|
233
|
-
export {
|
|
234
|
-
splitMediaFile,
|
|
235
|
-
downloadFile
|
|
236
|
-
};
|
|
268
|
+
export { splitMediaFile, downloadFile };
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
|
-
import { ACCEPTED_MIME_TYPES, isAcceptedMimeType } from './constants.js';
|
|
3
|
-
import path from 'path';
|
|
4
2
|
import http from 'http';
|
|
5
3
|
import https from 'https';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
|
|
6
|
+
import { ACCEPTED_MIME_TYPES, isAcceptedMimeType } from './constants.js';
|
|
6
7
|
|
|
7
8
|
export async function deleteTempPath(path) {
|
|
8
9
|
try {
|
|
@@ -20,7 +21,9 @@ export async function deleteTempPath(path) {
|
|
|
20
21
|
console.log(`Temporary file ${path} deleted successfully.`);
|
|
21
22
|
} else if (stats.isDirectory()) {
|
|
22
23
|
fs.rmSync(path, { recursive: true });
|
|
23
|
-
console.log(
|
|
24
|
+
console.log(
|
|
25
|
+
`Temporary folder ${path} and its contents deleted successfully.`,
|
|
26
|
+
);
|
|
24
27
|
}
|
|
25
28
|
} catch (err) {
|
|
26
29
|
console.error('Error occurred while deleting the temporary path:', err);
|
|
@@ -38,7 +41,7 @@ export function getExtensionForMimeType(mimeType) {
|
|
|
38
41
|
// Ensure a filename has the correct extension based on its mime type
|
|
39
42
|
export function ensureFileExtension(filename, mimeType) {
|
|
40
43
|
if (!mimeType) return filename;
|
|
41
|
-
|
|
44
|
+
|
|
42
45
|
const extension = getExtensionForMimeType(mimeType);
|
|
43
46
|
if (!extension) return filename;
|
|
44
47
|
|
|
@@ -49,12 +52,12 @@ export function ensureFileExtension(filename, mimeType) {
|
|
|
49
52
|
|
|
50
53
|
// Get the current extension if any
|
|
51
54
|
const currentExt = path.extname(filename);
|
|
52
|
-
|
|
55
|
+
|
|
53
56
|
// If there's no current extension, just append the new one
|
|
54
57
|
if (!currentExt) {
|
|
55
58
|
return `${filename}${extension}`;
|
|
56
59
|
}
|
|
57
|
-
|
|
60
|
+
|
|
58
61
|
// Replace the current extension with the new one
|
|
59
62
|
return filename.slice(0, -currentExt.length) + extension;
|
|
60
63
|
}
|
|
@@ -69,39 +72,45 @@ export function ensureEncoded(url) {
|
|
|
69
72
|
}
|
|
70
73
|
|
|
71
74
|
export async function urlExists(url) {
|
|
72
|
-
if(!url) return false;
|
|
73
|
-
|
|
75
|
+
if (!url) return false;
|
|
76
|
+
|
|
74
77
|
try {
|
|
75
|
-
|
|
78
|
+
// Basic URL validation
|
|
76
79
|
const urlObj = new URL(url);
|
|
77
80
|
if (!['http:', 'https:'].includes(urlObj.protocol)) {
|
|
78
81
|
throw new Error('Invalid protocol - only HTTP and HTTPS are supported');
|
|
79
82
|
}
|
|
80
83
|
|
|
81
84
|
const httpModule = urlObj.protocol === 'https:' ? https : http;
|
|
82
|
-
|
|
85
|
+
|
|
83
86
|
return new Promise((resolve) => {
|
|
84
|
-
const request = httpModule.request(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
87
|
+
const request = httpModule.request(
|
|
88
|
+
url,
|
|
89
|
+
{ method: 'HEAD' },
|
|
90
|
+
function (response) {
|
|
91
|
+
if (response.statusCode >= 200 && response.statusCode < 400) {
|
|
92
|
+
const contentType = response.headers['content-type'];
|
|
93
|
+
const cleanContentType = contentType
|
|
94
|
+
? contentType.split(';')[0].trim()
|
|
95
|
+
: '';
|
|
96
|
+
// Check if the content type is one we accept
|
|
97
|
+
if (cleanContentType && isAcceptedMimeType(cleanContentType)) {
|
|
98
|
+
resolve({ valid: true, contentType: cleanContentType });
|
|
99
|
+
} else {
|
|
100
|
+
console.log(`Unsupported content type: ${contentType}`);
|
|
101
|
+
resolve({ valid: false });
|
|
102
|
+
}
|
|
91
103
|
} else {
|
|
92
|
-
console.log(`Unsupported content type: ${contentType}`);
|
|
93
104
|
resolve({ valid: false });
|
|
94
105
|
}
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
request.on('error', function(err) {
|
|
106
|
+
},
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
request.on('error', function (err) {
|
|
101
110
|
console.error('URL validation error:', err.message);
|
|
102
111
|
resolve({ valid: false });
|
|
103
112
|
});
|
|
104
|
-
|
|
113
|
+
|
|
105
114
|
request.end();
|
|
106
115
|
});
|
|
107
116
|
} catch (error) {
|