@aj-archipelago/cortex 1.3.58 → 1.3.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
- package/helper-apps/cortex-file-handler/package-lock.json +2 -2
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
- package/helper-apps/cortex-file-handler/src/constants.js +98 -98
- package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
- package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
- package/helper-apps/cortex-file-handler/src/helper.js +93 -93
- package/helper-apps/cortex-file-handler/src/index.js +584 -550
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
- package/helper-apps/cortex-file-handler/src/redis.js +186 -90
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
- package/helper-apps/cortex-file-handler/src/start.js +4 -1
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
- package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
- package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
- package/package.json +1 -1
- package/pathways/translate_subtitle.js +15 -8
|
@@ -1,113 +1,113 @@
|
|
|
1
|
-
import test from
|
|
2
|
-
import { StorageService } from
|
|
3
|
-
import { StorageFactory } from
|
|
4
|
-
import path from
|
|
5
|
-
import os from
|
|
6
|
-
import fs from
|
|
7
|
-
|
|
8
|
-
test(
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
import test from "ava";
|
|
2
|
+
import { StorageService } from "../../src/services/storage/StorageService.js";
|
|
3
|
+
import { StorageFactory } from "../../src/services/storage/StorageFactory.js";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import os from "os";
|
|
6
|
+
import fs from "fs";
|
|
7
|
+
|
|
8
|
+
test("should create storage service with factory", (t) => {
|
|
9
|
+
const factory = new StorageFactory();
|
|
10
|
+
const service = new StorageService(factory);
|
|
11
|
+
t.truthy(service);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
test("should get primary provider", (t) => {
|
|
15
|
+
const factory = new StorageFactory();
|
|
16
|
+
const service = new StorageService(factory);
|
|
17
|
+
const provider = service.getPrimaryProvider();
|
|
18
|
+
t.truthy(provider);
|
|
12
19
|
});
|
|
13
20
|
|
|
14
|
-
test(
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
21
|
+
test("should get backup provider", (t) => {
|
|
22
|
+
const factory = new StorageFactory();
|
|
23
|
+
const service = new StorageService(factory);
|
|
24
|
+
const provider = service.getBackupProvider();
|
|
25
|
+
if (!provider) {
|
|
26
|
+
t.log("GCS not configured, skipping test");
|
|
27
|
+
t.pass();
|
|
28
|
+
} else {
|
|
18
29
|
t.truthy(provider);
|
|
30
|
+
}
|
|
19
31
|
});
|
|
20
32
|
|
|
21
|
-
test(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
test("should upload file to primary storage", async (t) => {
|
|
34
|
+
const factory = new StorageFactory();
|
|
35
|
+
const service = new StorageService(factory);
|
|
36
|
+
const testContent = "test content";
|
|
37
|
+
const buffer = Buffer.from(testContent);
|
|
38
|
+
|
|
39
|
+
const result = await service.uploadFile(buffer, "test.txt");
|
|
40
|
+
t.truthy(result.url);
|
|
41
|
+
|
|
42
|
+
// Cleanup
|
|
43
|
+
await service.deleteFile(result.url);
|
|
31
44
|
});
|
|
32
45
|
|
|
33
|
-
test(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
46
|
+
test("should upload file to backup storage", async (t) => {
|
|
47
|
+
const factory = new StorageFactory();
|
|
48
|
+
const service = new StorageService(factory);
|
|
49
|
+
const provider = service.getBackupProvider();
|
|
50
|
+
if (!provider) {
|
|
51
|
+
t.log("GCS not configured, skipping test");
|
|
52
|
+
t.pass();
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
const testContent = "test content";
|
|
56
|
+
const buffer = Buffer.from(testContent);
|
|
57
|
+
|
|
58
|
+
const result = await service.uploadFileToBackup(buffer, "test.txt");
|
|
59
|
+
t.truthy(result.url);
|
|
60
|
+
|
|
61
|
+
// Cleanup
|
|
62
|
+
await service.deleteFileFromBackup(result.url);
|
|
44
63
|
});
|
|
45
64
|
|
|
46
|
-
test(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
// Cleanup
|
|
62
|
-
await service.deleteFileFromBackup(result.url);
|
|
65
|
+
test("should download file from primary storage", async (t) => {
|
|
66
|
+
const factory = new StorageFactory();
|
|
67
|
+
const service = new StorageService(factory);
|
|
68
|
+
const testContent = "test content";
|
|
69
|
+
const buffer = Buffer.from(testContent);
|
|
70
|
+
|
|
71
|
+
// Upload first
|
|
72
|
+
const uploadResult = await service.uploadFile(buffer, "test.txt");
|
|
73
|
+
|
|
74
|
+
// Download
|
|
75
|
+
const downloadResult = await service.downloadFile(uploadResult.url);
|
|
76
|
+
t.deepEqual(downloadResult, buffer);
|
|
77
|
+
|
|
78
|
+
// Cleanup
|
|
79
|
+
await service.deleteFile(uploadResult.url);
|
|
63
80
|
});
|
|
64
81
|
|
|
65
|
-
test(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
82
|
+
test("should download file from backup storage", async (t) => {
|
|
83
|
+
const factory = new StorageFactory();
|
|
84
|
+
const service = new StorageService(factory);
|
|
85
|
+
const provider = service.getBackupProvider();
|
|
86
|
+
if (!provider) {
|
|
87
|
+
t.log("GCS not configured, skipping test");
|
|
88
|
+
t.pass();
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
const testContent = "test content";
|
|
92
|
+
const buffer = Buffer.from(testContent);
|
|
93
|
+
|
|
94
|
+
// Upload first
|
|
95
|
+
const uploadResult = await service.uploadFileToBackup(buffer, "test.txt");
|
|
96
|
+
|
|
97
|
+
// Create temp file for download
|
|
98
|
+
const tempFile = path.join(os.tmpdir(), "test-download.txt");
|
|
99
|
+
try {
|
|
74
100
|
// Download
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
// Cleanup
|
|
79
|
-
await service.deleteFile(uploadResult.url);
|
|
80
|
-
});
|
|
101
|
+
await service.downloadFileFromBackup(uploadResult.url, tempFile);
|
|
102
|
+
const downloadedContent = await fs.promises.readFile(tempFile);
|
|
103
|
+
t.deepEqual(downloadedContent, buffer);
|
|
81
104
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if (
|
|
87
|
-
|
|
88
|
-
t.pass();
|
|
89
|
-
return;
|
|
90
|
-
}
|
|
91
|
-
const testContent = 'test content';
|
|
92
|
-
const buffer = Buffer.from(testContent);
|
|
93
|
-
|
|
94
|
-
// Upload first
|
|
95
|
-
const uploadResult = await service.uploadFileToBackup(buffer, 'test.txt');
|
|
96
|
-
|
|
97
|
-
// Create temp file for download
|
|
98
|
-
const tempFile = path.join(os.tmpdir(), 'test-download.txt');
|
|
99
|
-
try {
|
|
100
|
-
// Download
|
|
101
|
-
await service.downloadFileFromBackup(uploadResult.url, tempFile);
|
|
102
|
-
const downloadedContent = await fs.promises.readFile(tempFile);
|
|
103
|
-
t.deepEqual(downloadedContent, buffer);
|
|
104
|
-
|
|
105
|
-
// Cleanup
|
|
106
|
-
await service.deleteFileFromBackup(uploadResult.url);
|
|
107
|
-
} finally {
|
|
108
|
-
// Cleanup temp file
|
|
109
|
-
if (fs.existsSync(tempFile)) {
|
|
110
|
-
fs.unlinkSync(tempFile);
|
|
111
|
-
}
|
|
105
|
+
// Cleanup
|
|
106
|
+
await service.deleteFileFromBackup(uploadResult.url);
|
|
107
|
+
} finally {
|
|
108
|
+
// Cleanup temp file
|
|
109
|
+
if (fs.existsSync(tempFile)) {
|
|
110
|
+
fs.unlinkSync(tempFile);
|
|
112
111
|
}
|
|
113
|
-
}
|
|
112
|
+
}
|
|
113
|
+
});
|
|
@@ -1,85 +1,89 @@
|
|
|
1
|
-
import axios from
|
|
2
|
-
import { execSync } from
|
|
3
|
-
import fs from
|
|
1
|
+
import axios from "axios";
|
|
2
|
+
import { execSync } from "child_process";
|
|
3
|
+
import fs from "fs/promises";
|
|
4
4
|
|
|
5
|
-
export async function cleanupHashAndFile(hash, uploadedUrl, baseUrl) {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
try {
|
|
9
|
-
const clearResponse = await axios.get(
|
|
10
|
-
`${baseUrl}?hash=${hash}&clearHash=true`,
|
|
11
|
-
{
|
|
12
|
-
validateStatus: (status) => true,
|
|
13
|
-
timeout: 10000,
|
|
14
|
-
},
|
|
15
|
-
);
|
|
16
|
-
} catch (error) {
|
|
17
|
-
console.error(`[cleanupHashAndFile] Error clearing hash: ${error.message}`);
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
// Then delete the file
|
|
5
|
+
export async function cleanupHashAndFile(hash, uploadedUrl, baseUrl) {
|
|
6
|
+
// Only perform hash operations if hash is provided
|
|
7
|
+
if (hash) {
|
|
22
8
|
try {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
);
|
|
9
|
+
const clearResponse = await axios.get(
|
|
10
|
+
`${baseUrl}?hash=${hash}&clearHash=true`,
|
|
11
|
+
{
|
|
12
|
+
validateStatus: (status) => true,
|
|
13
|
+
timeout: 10000,
|
|
14
|
+
},
|
|
15
|
+
);
|
|
31
16
|
} catch (error) {
|
|
32
|
-
|
|
17
|
+
console.error(
|
|
18
|
+
`[cleanupHashAndFile] Error clearing hash: ${error.message}`,
|
|
19
|
+
);
|
|
33
20
|
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Then delete the file
|
|
24
|
+
try {
|
|
25
|
+
const folderName = getFolderNameFromUrl(uploadedUrl);
|
|
26
|
+
const deleteResponse = await axios.delete(
|
|
27
|
+
`${baseUrl}?operation=delete&requestId=${folderName}`,
|
|
28
|
+
{
|
|
29
|
+
validateStatus: (status) => true,
|
|
30
|
+
timeout: 10000,
|
|
31
|
+
},
|
|
32
|
+
);
|
|
33
|
+
} catch (error) {
|
|
34
|
+
console.error(`[cleanupHashAndFile] Error deleting file: ${error.message}`);
|
|
35
|
+
}
|
|
34
36
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
}
|
|
37
|
+
// Only verify hash if hash was provided
|
|
38
|
+
if (hash) {
|
|
39
|
+
try {
|
|
40
|
+
const verifyResponse = await axios.get(
|
|
41
|
+
`${baseUrl}?hash=${hash}&checkHash=true`,
|
|
42
|
+
{
|
|
43
|
+
validateStatus: (status) => true,
|
|
44
|
+
timeout: 10000,
|
|
45
|
+
},
|
|
46
|
+
);
|
|
47
|
+
} catch (error) {
|
|
48
|
+
console.error(
|
|
49
|
+
`[cleanupHashAndFile] Error verifying hash: ${error.message}`,
|
|
50
|
+
);
|
|
48
51
|
}
|
|
52
|
+
}
|
|
49
53
|
}
|
|
50
54
|
|
|
51
55
|
export function getFolderNameFromUrl(url) {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
const urlObj = new URL(url);
|
|
57
|
+
const parts = urlObj.pathname.split("/").filter(Boolean);
|
|
58
|
+
if (url.includes("127.0.0.1:10000")) {
|
|
59
|
+
return parts[2].split("_")[0];
|
|
60
|
+
}
|
|
61
|
+
return parts[1].split("_")[0];
|
|
58
62
|
}
|
|
59
63
|
|
|
60
64
|
// Helper function to create a test media (audio) file of specified duration using ffmpeg
|
|
61
65
|
export async function createTestMediaFile(filepath, durationSeconds = 10) {
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
66
|
+
try {
|
|
67
|
+
console.log(`Creating test file: ${filepath} (${durationSeconds}s)`);
|
|
68
|
+
// Generate silence using ffmpeg (mono, 44.1kHz)
|
|
69
|
+
execSync(
|
|
70
|
+
`ffmpeg -f lavfi -i anullsrc=r=44100:cl=mono -t ${durationSeconds} -q:a 9 -acodec libmp3lame "${filepath}"`,
|
|
71
|
+
{
|
|
72
|
+
stdio: ["ignore", "pipe", "pipe"], // Capture stdout and stderr
|
|
73
|
+
},
|
|
74
|
+
);
|
|
71
75
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
77
|
-
console.log(
|
|
78
|
-
`Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`,
|
|
79
|
-
);
|
|
80
|
-
} catch (error) {
|
|
81
|
-
console.error(`Error creating test file ${filepath}:`, error.message);
|
|
82
|
-
if (error.stderr) console.error('ffmpeg error:', error.stderr.toString());
|
|
83
|
-
throw error;
|
|
76
|
+
// Verify the file was created and has content
|
|
77
|
+
const stats = await fs.stat(filepath);
|
|
78
|
+
if (stats.size === 0) {
|
|
79
|
+
throw new Error("Generated file is empty");
|
|
84
80
|
}
|
|
85
|
-
|
|
81
|
+
console.log(
|
|
82
|
+
`Successfully created ${filepath} (${(stats.size / 1024 / 1024).toFixed(2)}MB)`,
|
|
83
|
+
);
|
|
84
|
+
} catch (error) {
|
|
85
|
+
console.error(`Error creating test file ${filepath}:`, error.message);
|
|
86
|
+
if (error.stderr) console.error("ffmpeg error:", error.stderr.toString());
|
|
87
|
+
throw error;
|
|
88
|
+
}
|
|
89
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.59",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -37,7 +37,9 @@ export function selectBestTranslation(translations, startIndex, endIndex) {
|
|
|
37
37
|
|
|
38
38
|
// For multiple translations, prefer the one whose identifier is closest to the middle
|
|
39
39
|
// of the requested range
|
|
40
|
-
const
|
|
40
|
+
const startNum = Number(startIndex);
|
|
41
|
+
const endNum = Number(endIndex);
|
|
42
|
+
const targetValue = (isNaN(startNum) || isNaN(endNum)) ? 0 : (startNum + endNum) / 2;
|
|
41
43
|
|
|
42
44
|
return translations.reduce((best, current) => {
|
|
43
45
|
try {
|
|
@@ -82,7 +84,7 @@ export async function translateChunk(chunk, args, maxRetries = 3) {
|
|
|
82
84
|
const content = match[1].trim();
|
|
83
85
|
|
|
84
86
|
const parsed = parse(content, { preserveIndexes: true });
|
|
85
|
-
return parsed.cues;
|
|
87
|
+
return parsed.cues || [];
|
|
86
88
|
|
|
87
89
|
} catch (e) {
|
|
88
90
|
logger.error(`Error translating chunk ${chunk.startIndex}-${chunk.endIndex} (attempt ${attempt + 1}): ${e}`);
|
|
@@ -105,9 +107,11 @@ export default {
|
|
|
105
107
|
model: "oai-gpt4o",
|
|
106
108
|
enableDuplicateRequests: false,
|
|
107
109
|
timeout: 3600,
|
|
108
|
-
executePathway: async ({args}) => {
|
|
110
|
+
executePathway: async ({args, resolver}) => {
|
|
109
111
|
try {
|
|
110
|
-
const {
|
|
112
|
+
const combinedArgs = { ...resolver?.pathway?.inputParameters, ...args };
|
|
113
|
+
const { text, format = 'vtt' } = combinedArgs;
|
|
114
|
+
|
|
111
115
|
const parsed = parse(text, { format, preserveIndexes: true });
|
|
112
116
|
const captions = parsed.cues;
|
|
113
117
|
|
|
@@ -120,13 +124,16 @@ export default {
|
|
|
120
124
|
logger.info(`Split subtitles into ${chunks.length} overlapping chunks`);
|
|
121
125
|
|
|
122
126
|
// Translate all chunks in parallel
|
|
123
|
-
const chunkPromises = chunks.map(chunk => translateChunk(chunk,
|
|
127
|
+
const chunkPromises = chunks.map(chunk => translateChunk(chunk, combinedArgs));
|
|
124
128
|
const translatedChunks = await Promise.all(chunkPromises);
|
|
125
129
|
|
|
126
130
|
// Create a map of caption index to all its translations
|
|
127
131
|
const translationMap = new Map();
|
|
128
132
|
translatedChunks.flat().forEach(caption => {
|
|
129
|
-
|
|
133
|
+
// Skip null/undefined captions
|
|
134
|
+
if (!caption) return;
|
|
135
|
+
|
|
136
|
+
const identifier = String(caption.identifier || caption.index || 'unknown');
|
|
130
137
|
if (!translationMap.has(identifier)) {
|
|
131
138
|
translationMap.set(identifier, []);
|
|
132
139
|
}
|
|
@@ -135,10 +142,10 @@ export default {
|
|
|
135
142
|
|
|
136
143
|
// Select best translation for each caption
|
|
137
144
|
const finalCaptions = captions.map(caption => {
|
|
138
|
-
const identifier = caption.identifier || caption.index;
|
|
145
|
+
const identifier = String(caption.identifier || caption.index || 'unknown');
|
|
139
146
|
const translations = translationMap.get(identifier) || [caption];
|
|
140
147
|
const bestTranslation = selectBestTranslation(translations, identifier, identifier);
|
|
141
|
-
const text = bestTranslation?.text || caption?.text;
|
|
148
|
+
const text = bestTranslation?.text || caption?.text || '';
|
|
142
149
|
return { ...caption, text };
|
|
143
150
|
});
|
|
144
151
|
|