@aj-archipelago/cortex 1.3.57 → 1.3.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +6 -0
  2. package/config.js +22 -0
  3. package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
  4. package/helper-apps/cortex-file-handler/package-lock.json +2 -2
  5. package/helper-apps/cortex-file-handler/package.json +1 -1
  6. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
  7. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
  8. package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
  9. package/helper-apps/cortex-file-handler/src/constants.js +98 -98
  10. package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
  11. package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
  12. package/helper-apps/cortex-file-handler/src/helper.js +93 -93
  13. package/helper-apps/cortex-file-handler/src/index.js +584 -550
  14. package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
  15. package/helper-apps/cortex-file-handler/src/redis.js +186 -90
  16. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
  17. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
  18. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
  19. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
  20. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
  21. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
  22. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
  23. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
  24. package/helper-apps/cortex-file-handler/src/start.js +4 -1
  25. package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
  26. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
  27. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
  28. package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
  29. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
  30. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
  31. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
  32. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
  33. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
  34. package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
  35. package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
  36. package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
  37. package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
  38. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
  39. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
  40. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
  41. package/package.json +1 -1
  42. package/pathways/translate_apptek.js +33 -0
  43. package/pathways/translate_subtitle.js +15 -8
  44. package/server/plugins/apptekTranslatePlugin.js +46 -91
  45. package/tests/apptekTranslatePlugin.test.js +0 -2
  46. package/tests/integration/apptekTranslatePlugin.integration.test.js +159 -93
  47. package/tests/translate_apptek.test.js +16 -0
@@ -1,17 +1,18 @@
1
- import fs from 'fs';
2
- import http from 'http';
3
- import https from 'https';
4
- import os from 'os';
5
- import path from 'path';
6
- import { Transform } from 'stream';
7
- import { pipeline } from 'stream/promises';
8
- import { promisify } from 'util';
1
+ import fs from "fs";
2
+ import http from "http";
3
+ import https from "https";
4
+ import os from "os";
5
+ import path from "path";
6
+ import { Transform } from "stream";
7
+ import { pipeline } from "stream/promises";
8
+ import { promisify } from "util";
9
9
 
10
- import axios from 'axios';
11
- import ffmpeg from 'fluent-ffmpeg';
12
- import { v4 as uuidv4 } from 'uuid';
10
+ import axios from "axios";
11
+ import ffmpeg from "fluent-ffmpeg";
12
+ import { v4 as uuidv4 } from "uuid";
13
13
 
14
- import { ensureEncoded } from './helper.js';
14
+ import { ensureEncoded } from "./helper.js";
15
+ import { generateShortId } from "./utils/filenameUtils.js";
15
16
 
16
17
  const ffmpegProbe = promisify(ffmpeg.ffprobe);
17
18
 
@@ -21,248 +22,257 @@ const tempDirectories = new Map(); // dir -> { createdAt, requestId }
21
22
 
22
23
  // Temp directory cleanup
23
24
  async function cleanupTempDirectories() {
24
- for (const [dir, info] of tempDirectories) {
25
- try {
26
- // Cleanup directories older than 1 hour
27
- if (Date.now() - info.createdAt > 60 * 60 * 1000) {
28
- await fs.promises.rm(dir, { recursive: true, force: true });
29
- tempDirectories.delete(dir);
30
- console.log(`Cleaned up old temp directory: ${dir}`);
31
- }
32
- } catch (err) {
33
- // Directory might be gone
34
- tempDirectories.delete(dir);
35
- }
25
+ for (const [dir, info] of tempDirectories) {
26
+ try {
27
+ // Cleanup directories older than 1 hour
28
+ if (Date.now() - info.createdAt > 60 * 60 * 1000) {
29
+ await fs.promises.rm(dir, { recursive: true, force: true });
30
+ tempDirectories.delete(dir);
31
+ console.log(`Cleaned up old temp directory: ${dir}`);
32
+ }
33
+ } catch (err) {
34
+ // Directory might be gone
35
+ tempDirectories.delete(dir);
36
36
  }
37
+ }
37
38
  }
38
39
 
39
40
  // Setup periodic cleanup
40
41
  setInterval(async () => {
41
- try {
42
- await cleanupTempDirectories();
43
- } catch (err) {
44
- console.error('Error during periodic cleanup:', err);
45
- }
42
+ try {
43
+ await cleanupTempDirectories();
44
+ } catch (err) {
45
+ console.error("Error during periodic cleanup:", err);
46
+ }
46
47
  }, CLEANUP_INTERVAL_MS);
47
48
 
48
49
  // Process a single chunk with streaming and progress tracking
49
50
  async function processChunk(inputPath, outputFileName, start, duration) {
50
- return new Promise((resolve, reject) => {
51
- const command = ffmpeg(inputPath)
52
- .seekInput(start)
53
- .duration(duration)
54
- .format('mp3')
55
- .audioCodec('libmp3lame')
56
- .audioBitrate(128)
57
- .on('start', () => {
58
- console.log(`Processing chunk: ${start}s -> ${start + duration}s`);
59
- })
60
- .on('progress', (progress) => {
61
- if (progress.percent) {
62
- console.log(`Chunk progress: ${progress.percent}%`);
63
- }
64
- })
65
- .on('error', (err, stdout, stderr) => {
66
- console.error('FFmpeg error:', err.message);
67
- if (stdout) console.log('FFmpeg stdout:', stdout);
68
- if (stderr) console.error('FFmpeg stderr:', stderr);
69
- reject(err);
70
- })
71
- .on('end', () => {
72
- console.log(`Chunk complete: ${outputFileName}`);
73
- resolve(outputFileName);
74
- });
51
+ return new Promise((resolve, reject) => {
52
+ const command = ffmpeg(inputPath)
53
+ .seekInput(start)
54
+ .duration(duration)
55
+ .format("mp3")
56
+ .audioCodec("libmp3lame")
57
+ .audioBitrate(128)
58
+ .on("start", () => {
59
+ console.log(`Processing chunk: ${start}s -> ${start + duration}s`);
60
+ })
61
+ .on("progress", (progress) => {
62
+ if (progress.percent) {
63
+ console.log(`Chunk progress: ${progress.percent}%`);
64
+ }
65
+ })
66
+ .on("error", (err, stdout, stderr) => {
67
+ console.error("FFmpeg error:", err.message);
68
+ if (stdout) console.log("FFmpeg stdout:", stdout);
69
+ if (stderr) console.error("FFmpeg stderr:", stderr);
70
+ reject(err);
71
+ })
72
+ .on("end", () => {
73
+ console.log(`Chunk complete: ${outputFileName}`);
74
+ resolve(outputFileName);
75
+ });
75
76
 
76
- // Use pipeline for better error handling and backpressure
77
- pipeline(
78
- command,
79
- fs.createWriteStream(outputFileName, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
80
- ).catch(reject);
81
- });
77
+ // Use pipeline for better error handling and backpressure
78
+ pipeline(
79
+ command,
80
+ fs.createWriteStream(outputFileName, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
81
+ ).catch(reject);
82
+ });
82
83
  }
83
84
 
84
85
  const generateUniqueFolderName = () => {
85
- const uniqueFolderName = uuidv4();
86
- const tempFolderPath = os.tmpdir();
87
- return path.join(tempFolderPath, uniqueFolderName);
86
+ const uniqueFolderName = uuidv4();
87
+ const tempFolderPath = os.tmpdir();
88
+ return path.join(tempFolderPath, uniqueFolderName);
88
89
  };
89
90
 
90
91
  async function downloadFile(url, outputPath) {
91
- try {
92
- const agent = {
93
- http: new http.Agent({
94
- keepAlive: true,
95
- maxSockets: 10,
96
- maxFreeSockets: 10,
97
- timeout: 60000,
98
- }),
99
- https: new https.Agent({
100
- keepAlive: true,
101
- maxSockets: 10,
102
- maxFreeSockets: 10,
103
- timeout: 60000,
104
- }),
105
- };
92
+ try {
93
+ const agent = {
94
+ http: new http.Agent({
95
+ keepAlive: true,
96
+ maxSockets: 10,
97
+ maxFreeSockets: 10,
98
+ timeout: 60000,
99
+ }),
100
+ https: new https.Agent({
101
+ keepAlive: true,
102
+ maxSockets: 10,
103
+ maxFreeSockets: 10,
104
+ timeout: 60000,
105
+ }),
106
+ };
106
107
 
107
- // Use the original URL without any decoding
108
- const response = await axios.get(url, {
109
- responseType: 'stream',
110
- timeout: 30000,
111
- maxContentLength: Infinity,
112
- decompress: true,
113
- httpAgent: agent.http,
114
- httpsAgent: agent.https,
115
- maxRedirects: 5,
116
- validateStatus: (status) => status >= 200 && status < 300,
117
- });
108
+ // Use the original URL without any decoding
109
+ const response = await axios.get(url, {
110
+ responseType: "stream",
111
+ timeout: 30000,
112
+ maxContentLength: Infinity,
113
+ decompress: true,
114
+ httpAgent: agent.http,
115
+ httpsAgent: agent.https,
116
+ maxRedirects: 5,
117
+ validateStatus: (status) => status >= 200 && status < 300,
118
+ });
118
119
 
119
- // Use pipeline for better error handling and memory management
120
- await pipeline(
121
- response.data,
122
- fs.createWriteStream(outputPath, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
123
- );
120
+ // Use pipeline for better error handling and memory management
121
+ await pipeline(
122
+ response.data,
123
+ fs.createWriteStream(outputPath, { highWaterMark: 4 * 1024 * 1024 }), // 4MB chunks
124
+ );
124
125
 
125
- if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
126
- throw new Error('Download failed or file is empty');
127
- }
128
- } catch (error) {
129
- if (fs.existsSync(outputPath)) {
130
- fs.unlinkSync(outputPath);
131
- }
132
- throw error;
126
+ if (!fs.existsSync(outputPath) || fs.statSync(outputPath).size === 0) {
127
+ throw new Error("Download failed or file is empty");
128
+ }
129
+ } catch (error) {
130
+ if (fs.existsSync(outputPath)) {
131
+ fs.unlinkSync(outputPath);
133
132
  }
133
+ throw error;
134
+ }
134
135
  }
135
136
 
136
137
  async function splitMediaFile(
137
- inputPath,
138
- chunkDurationInSeconds = 500,
139
- requestId = uuidv4(),
138
+ inputPath,
139
+ chunkDurationInSeconds = 500,
140
+ requestId = uuidv4(),
140
141
  ) {
141
- let tempPath = null;
142
- let uniqueOutputPath = null;
143
- let inputStream = null;
142
+ let tempPath = null;
143
+ let uniqueOutputPath = null;
144
+ let inputStream = null;
144
145
 
145
- try {
146
- uniqueOutputPath = generateUniqueFolderName();
147
- fs.mkdirSync(uniqueOutputPath, { recursive: true });
146
+ try {
147
+ uniqueOutputPath = generateUniqueFolderName();
148
+ fs.mkdirSync(uniqueOutputPath, { recursive: true });
149
+
150
+ tempDirectories.set(uniqueOutputPath, {
151
+ createdAt: Date.now(),
152
+ requestId,
153
+ });
148
154
 
149
- tempDirectories.set(uniqueOutputPath, {
150
- createdAt: Date.now(),
151
- requestId,
152
- });
155
+ // Handle URL downloads with streaming
156
+ const isUrl = /^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$/i.test(inputPath);
157
+ if (isUrl) {
158
+ const urlObj = new URL(ensureEncoded(inputPath));
159
+ // Use LLM-friendly naming for temp files instead of original filename
160
+ const fileExtension = path.extname(urlObj.pathname) || ".mp3";
161
+ const shortId = generateShortId();
162
+ const tempFileName = `${shortId}${fileExtension}`;
163
+ tempPath = path.join(uniqueOutputPath, tempFileName);
164
+ console.log("Downloading file to:", tempPath);
165
+ await downloadFile(inputPath, tempPath);
166
+ inputPath = tempPath;
167
+ }
153
168
 
154
- // Handle URL downloads with streaming
155
- const isUrl = /^(https?|ftp):\/\/[^\s/$.?#].[^\s]*$/i.test(inputPath);
156
- if (isUrl) {
157
- const urlObj = new URL(ensureEncoded(inputPath));
158
- const originalFileName =
159
- path.basename(urlObj.pathname) || 'downloaded_file';
160
- tempPath = path.join(uniqueOutputPath, originalFileName);
161
- console.log('Downloading file to:', tempPath);
162
- await downloadFile(inputPath, tempPath);
163
- inputPath = tempPath;
164
- }
169
+ inputPath = path.resolve(inputPath);
170
+ if (!fs.existsSync(inputPath)) {
171
+ throw new Error(`Input file not found: ${inputPath}`);
172
+ }
165
173
 
166
- inputPath = path.resolve(inputPath);
167
- if (!fs.existsSync(inputPath)) {
168
- throw new Error(`Input file not found: ${inputPath}`);
169
- }
174
+ // Use a larger chunk size for better throughput while still managing memory
175
+ inputStream = fs.createReadStream(inputPath, {
176
+ highWaterMark: 4 * 1024 * 1024, // 4MB chunks
177
+ autoClose: true,
178
+ });
170
179
 
171
- // Use a larger chunk size for better throughput while still managing memory
172
- inputStream = fs.createReadStream(inputPath, {
173
- highWaterMark: 4 * 1024 * 1024, // 4MB chunks
174
- autoClose: true,
175
- });
180
+ console.log("Probing file:", inputPath);
181
+ const metadata = await ffmpegProbe(inputPath);
182
+ if (!metadata?.format?.duration) {
183
+ throw new Error("Invalid media file or unable to determine duration");
184
+ }
176
185
 
177
- console.log('Probing file:', inputPath);
178
- const metadata = await ffmpegProbe(inputPath);
179
- if (!metadata?.format?.duration) {
180
- throw new Error('Invalid media file or unable to determine duration');
181
- }
186
+ const duration = metadata.format.duration;
187
+ const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
188
+ console.log(
189
+ `Processing ${numChunks} chunks of ${chunkDurationInSeconds} seconds each`,
190
+ );
182
191
 
183
- const duration = metadata.format.duration;
184
- const numChunks = Math.ceil((duration - 1) / chunkDurationInSeconds);
185
- console.log(
186
- `Processing ${numChunks} chunks of ${chunkDurationInSeconds} seconds each`,
187
- );
192
+ // Generate filename once for all chunks
193
+ // Since we're converting to MP3 format for transcription, use .mp3 extension
194
+ const chunkBaseId = generateShortId();
195
+ const chunkBaseName = `${chunkBaseId}.mp3`;
188
196
 
189
- const chunkResults = new Array(numChunks); // Pre-allocate array to maintain order
190
- const chunkOffsets = new Array(numChunks); // Pre-allocate offsets array
197
+ const chunkResults = new Array(numChunks); // Pre-allocate array to maintain order
198
+ const chunkOffsets = new Array(numChunks); // Pre-allocate offsets array
191
199
 
192
- // Process chunks in parallel with a concurrency limit
193
- const CONCURRENT_CHUNKS = Math.min(3, os.cpus().length); // Use CPU count to determine concurrency
194
- const chunkPromises = [];
200
+ // Process chunks in parallel with a concurrency limit
201
+ const CONCURRENT_CHUNKS = Math.min(3, os.cpus().length); // Use CPU count to determine concurrency
202
+ const chunkPromises = [];
195
203
 
196
- for (let i = 0; i < numChunks; i += CONCURRENT_CHUNKS) {
197
- const chunkBatch = [];
198
- for (let j = 0; j < CONCURRENT_CHUNKS && i + j < numChunks; j++) {
199
- const chunkIndex = i + j;
200
- const outputFileName = path.join(
201
- uniqueOutputPath,
202
- `chunk-${chunkIndex + 1}-${path.parse(inputPath).name}.mp3`,
203
- );
204
- const offset = chunkIndex * chunkDurationInSeconds;
204
+ for (let i = 0; i < numChunks; i += CONCURRENT_CHUNKS) {
205
+ const chunkBatch = [];
206
+ for (let j = 0; j < CONCURRENT_CHUNKS && i + j < numChunks; j++) {
207
+ const chunkIndex = i + j;
208
+ // Use the same base filename for all chunks
209
+ const outputFileName = path.join(
210
+ uniqueOutputPath,
211
+ `chunk-${chunkIndex + 1}-${chunkBaseName}`,
212
+ );
213
+ const offset = chunkIndex * chunkDurationInSeconds;
205
214
 
206
- chunkBatch.push(
207
- processChunk(
208
- inputPath,
209
- outputFileName,
210
- offset,
211
- chunkDurationInSeconds,
212
- )
213
- .then((result) => {
214
- chunkResults[chunkIndex] = result; // Store in correct position
215
- chunkOffsets[chunkIndex] = offset; // Store offset in correct position
216
- console.log(`Completed chunk ${chunkIndex + 1}/${numChunks}`);
217
- return result;
218
- })
219
- .catch((error) => {
220
- console.error(
221
- `Failed to process chunk ${chunkIndex + 1}:`,
222
- error,
223
- );
224
- return null;
225
- }),
226
- );
227
- }
215
+ chunkBatch.push(
216
+ processChunk(
217
+ inputPath,
218
+ outputFileName,
219
+ offset,
220
+ chunkDurationInSeconds,
221
+ )
222
+ .then((result) => {
223
+ chunkResults[chunkIndex] = result; // Store in correct position
224
+ chunkOffsets[chunkIndex] = offset; // Store offset in correct position
225
+ console.log(`Completed chunk ${chunkIndex + 1}/${numChunks}`);
226
+ return result;
227
+ })
228
+ .catch((error) => {
229
+ console.error(
230
+ `Failed to process chunk ${chunkIndex + 1}:`,
231
+ error,
232
+ );
233
+ return null;
234
+ }),
235
+ );
236
+ }
228
237
 
229
- // Wait for the current batch to complete before starting the next
230
- await Promise.all(chunkBatch);
231
- }
238
+ // Wait for the current batch to complete before starting the next
239
+ await Promise.all(chunkBatch);
240
+ }
232
241
 
233
- // Filter out any failed chunks
234
- const validChunks = chunkResults.filter(Boolean);
235
- const validOffsets = chunkOffsets.filter((_, index) => chunkResults[index]);
242
+ // Filter out any failed chunks
243
+ const validChunks = chunkResults.filter(Boolean);
244
+ const validOffsets = chunkOffsets.filter((_, index) => chunkResults[index]);
236
245
 
237
- if (validChunks.length === 0) {
238
- throw new Error('No chunks were successfully processed');
239
- }
246
+ if (validChunks.length === 0) {
247
+ throw new Error("No chunks were successfully processed");
248
+ }
240
249
 
241
- return {
242
- chunkPromises: validChunks,
243
- chunkOffsets: validOffsets,
244
- uniqueOutputPath,
245
- };
246
- } catch (err) {
247
- if (uniqueOutputPath && fs.existsSync(uniqueOutputPath)) {
248
- try {
249
- fs.rmSync(uniqueOutputPath, { recursive: true, force: true });
250
- tempDirectories.delete(uniqueOutputPath);
251
- } catch (cleanupErr) {
252
- console.error('Error during cleanup:', cleanupErr);
253
- }
254
- }
255
- console.error('Error in splitMediaFile:', err);
256
- throw new Error(`Error processing media file: ${err.message}`);
257
- } finally {
258
- if (inputStream) {
259
- try {
260
- inputStream.destroy();
261
- } catch (err) {
262
- console.error('Error closing input stream:', err);
263
- }
264
- }
250
+ return {
251
+ chunkPromises: validChunks,
252
+ chunkOffsets: validOffsets,
253
+ uniqueOutputPath,
254
+ chunkBaseName, // Return the base filename for consistent naming
255
+ };
256
+ } catch (err) {
257
+ if (uniqueOutputPath && fs.existsSync(uniqueOutputPath)) {
258
+ try {
259
+ fs.rmSync(uniqueOutputPath, { recursive: true, force: true });
260
+ tempDirectories.delete(uniqueOutputPath);
261
+ } catch (cleanupErr) {
262
+ console.error("Error during cleanup:", cleanupErr);
263
+ }
264
+ }
265
+ console.error("Error in splitMediaFile:", err);
266
+ throw new Error(`Error processing media file: ${err.message}`);
267
+ } finally {
268
+ if (inputStream) {
269
+ try {
270
+ inputStream.destroy();
271
+ } catch (err) {
272
+ console.error("Error closing input stream:", err);
273
+ }
265
274
  }
275
+ }
266
276
  }
267
277
 
268
278
  export { splitMediaFile, downloadFile };