@aj-archipelago/cortex 1.3.58 → 1.3.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
- package/helper-apps/cortex-file-handler/package-lock.json +2 -2
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
- package/helper-apps/cortex-file-handler/src/constants.js +98 -98
- package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
- package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
- package/helper-apps/cortex-file-handler/src/helper.js +93 -93
- package/helper-apps/cortex-file-handler/src/index.js +584 -550
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
- package/helper-apps/cortex-file-handler/src/redis.js +186 -90
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
- package/helper-apps/cortex-file-handler/src/start.js +4 -1
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
- package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
- package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
- package/package.json +1 -1
- package/pathways/translate_subtitle.js +15 -8
|
@@ -1,144 +1,147 @@
|
|
|
1
|
-
import fs from
|
|
2
|
-
import { dirname, join } from
|
|
3
|
-
import { fileURLToPath } from
|
|
4
|
-
import test from
|
|
5
|
-
import axios from
|
|
6
|
-
import XLSX from
|
|
7
|
-
import { FileConversionService } from
|
|
1
|
+
import fs from "fs/promises";
|
|
2
|
+
import { dirname, join } from "path";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
import test from "ava";
|
|
5
|
+
import axios from "axios";
|
|
6
|
+
import XLSX from "xlsx";
|
|
7
|
+
import { FileConversionService } from "../src/services/FileConversionService.js";
|
|
8
8
|
|
|
9
9
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
10
|
|
|
11
11
|
// Mock context
|
|
12
12
|
const mockContext = {
|
|
13
|
-
|
|
13
|
+
log: console.log,
|
|
14
14
|
};
|
|
15
15
|
|
|
16
16
|
// Setup: Create test documents
|
|
17
17
|
test.before(async (t) => {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
18
|
+
const testDir = join(__dirname, "test-docs");
|
|
19
|
+
await fs.mkdir(testDir, { recursive: true });
|
|
20
|
+
|
|
21
|
+
// Create various test files
|
|
22
|
+
const textFile = join(testDir, "test.txt");
|
|
23
|
+
const largeTextFile = join(testDir, "large.txt");
|
|
24
|
+
const unicodeFile = join(testDir, "unicode.txt");
|
|
25
|
+
const jsonFile = join(testDir, "test.json");
|
|
26
|
+
const emptyFile = join(testDir, "empty.txt");
|
|
27
|
+
const excelFile = join(testDir, "test.xlsx");
|
|
28
|
+
|
|
29
|
+
// Regular text content
|
|
30
|
+
await fs.writeFile(
|
|
31
|
+
textFile,
|
|
32
|
+
"This is a test document content.\nIt has multiple lines.\nThird line here.",
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
// Large text content (>100KB)
|
|
36
|
+
const largeContent = "Lorem ipsum ".repeat(10000);
|
|
37
|
+
await fs.writeFile(largeTextFile, largeContent);
|
|
38
|
+
|
|
39
|
+
// Unicode content
|
|
40
|
+
const unicodeContent =
|
|
41
|
+
"这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test";
|
|
42
|
+
await fs.writeFile(unicodeFile, unicodeContent);
|
|
43
|
+
|
|
44
|
+
// JSON content
|
|
45
|
+
await fs.writeFile(jsonFile, JSON.stringify({ test: "content" }));
|
|
46
|
+
|
|
47
|
+
// Empty file
|
|
48
|
+
await fs.writeFile(emptyFile, "");
|
|
49
|
+
|
|
50
|
+
// Create a test Excel file
|
|
51
|
+
const workbook = XLSX.utils.book_new();
|
|
52
|
+
const ws1 = XLSX.utils.aoa_to_sheet([
|
|
53
|
+
["Header 1", "Header 2"],
|
|
54
|
+
["Data 1", "Data 2"],
|
|
55
|
+
["Data 3", "Data 4"],
|
|
56
|
+
]);
|
|
57
|
+
XLSX.utils.book_append_sheet(workbook, ws1, "Sheet1");
|
|
58
|
+
XLSX.writeFile(workbook, excelFile);
|
|
59
|
+
|
|
60
|
+
t.context = {
|
|
61
|
+
testDir,
|
|
62
|
+
textFile,
|
|
63
|
+
largeTextFile,
|
|
64
|
+
unicodeFile,
|
|
65
|
+
jsonFile,
|
|
66
|
+
emptyFile,
|
|
67
|
+
excelFile,
|
|
68
|
+
};
|
|
69
69
|
});
|
|
70
70
|
|
|
71
71
|
// Cleanup
|
|
72
72
|
test.after.always(async (t) => {
|
|
73
|
-
|
|
73
|
+
await fs.rm(t.context.testDir, { recursive: true, force: true });
|
|
74
74
|
});
|
|
75
75
|
|
|
76
76
|
// Test Excel to CSV conversion
|
|
77
|
-
test(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
77
|
+
test("converts Excel to CSV successfully", async (t) => {
|
|
78
|
+
const service = new FileConversionService(mockContext);
|
|
79
|
+
const result = await service.convertFile(t.context.excelFile);
|
|
80
|
+
|
|
81
|
+
t.true(result.converted);
|
|
82
|
+
t.true(result.convertedPath.endsWith(".csv"));
|
|
83
|
+
|
|
84
|
+
// Read the converted file and verify content
|
|
85
|
+
const content = await fs.readFile(result.convertedPath, "utf-8");
|
|
86
|
+
t.true(content.includes("Header 1,Header 2"));
|
|
87
|
+
t.true(content.includes("Data 1,Data 2"));
|
|
88
|
+
t.true(content.includes("Data 3,Data 4"));
|
|
89
89
|
});
|
|
90
90
|
|
|
91
91
|
// Test document conversion with MarkItDown API
|
|
92
|
-
test(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
92
|
+
test("converts document to markdown via MarkItDown API", async (t) => {
|
|
93
|
+
// Mock axios.get for MarkItDown API
|
|
94
|
+
const originalAxiosGet = axios.get;
|
|
95
|
+
axios.get = async (url) => {
|
|
96
|
+
if (url.includes("test.docx")) {
|
|
97
|
+
return {
|
|
98
|
+
data: {
|
|
99
|
+
markdown:
|
|
100
|
+
"# Test Document\n\nThis is a test document converted to markdown.",
|
|
101
|
+
},
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
throw new Error("Invalid URL");
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
const service = new FileConversionService(mockContext);
|
|
108
|
+
const result = await service.convertFile(
|
|
109
|
+
"test.docx",
|
|
110
|
+
"https://example.com/test.docx",
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
t.true(result.converted);
|
|
114
|
+
t.true(result.convertedPath.endsWith(".md"));
|
|
115
|
+
|
|
116
|
+
// Read the converted file and verify content
|
|
117
|
+
const content = await fs.readFile(result.convertedPath, "utf-8");
|
|
118
|
+
t.true(content.includes("# Test Document"));
|
|
119
|
+
t.true(content.includes("This is a test document converted to markdown"));
|
|
120
|
+
|
|
121
|
+
// Restore original axios.get
|
|
122
|
+
axios.get = originalAxiosGet;
|
|
119
123
|
});
|
|
120
124
|
|
|
121
125
|
// Test error handling for missing original URL
|
|
122
|
-
test(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
);
|
|
126
|
+
test("handles missing original URL for document conversion", async (t) => {
|
|
127
|
+
const service = new FileConversionService(mockContext);
|
|
128
|
+
await t.throwsAsync(async () => service.convertFile("test.docx"), {
|
|
129
|
+
message: "Original URL is required for document conversion",
|
|
130
|
+
});
|
|
128
131
|
});
|
|
129
132
|
|
|
130
133
|
// Test error handling for unsupported file types
|
|
131
|
-
test(
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
134
|
+
test("handles unsupported file types", async (t) => {
|
|
135
|
+
const service = new FileConversionService(mockContext);
|
|
136
|
+
const result = await service.convertFile(t.context.jsonFile);
|
|
137
|
+
t.false(result.converted);
|
|
135
138
|
});
|
|
136
139
|
|
|
137
140
|
// Test file extension detection
|
|
138
|
-
test(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
});
|
|
141
|
+
test("correctly detects file extensions", (t) => {
|
|
142
|
+
const service = new FileConversionService(mockContext);
|
|
143
|
+
t.true(service.needsConversion("test.docx"));
|
|
144
|
+
t.true(service.needsConversion("test.xlsx"));
|
|
145
|
+
t.false(service.needsConversion("test.txt"));
|
|
146
|
+
t.false(service.needsConversion("test.json"));
|
|
147
|
+
});
|