@aj-archipelago/cortex 1.3.50 → 1.3.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
- package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
- package/helper-apps/cortex-file-handler/Dockerfile +1 -1
- package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +11 -6
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +4 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1056 -0
- package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +64 -48
- package/helper-apps/cortex-file-handler/src/docHelper.js +37 -0
- package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +97 -65
- package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +34 -25
- package/helper-apps/cortex-file-handler/src/index.js +608 -0
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +107 -0
- package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +23 -17
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +309 -0
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +57 -0
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
- package/helper-apps/cortex-file-handler/src/start.js +88 -0
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +90 -66
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +105 -108
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +462 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +984 -647
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +85 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/entityConstants.js +1 -1
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/blobHandler.js +0 -567
- package/helper-apps/cortex-file-handler/docHelper.js +0 -144
- package/helper-apps/cortex-file-handler/index.js +0 -440
- package/helper-apps/cortex-file-handler/localFileHandler.js +0 -108
- package/helper-apps/cortex-file-handler/start.js +0 -63
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base interface for storage providers
|
|
3
|
+
*/
|
|
4
|
+
export class StorageProvider {
|
|
5
|
+
/**
|
|
6
|
+
* Upload a file to storage
|
|
7
|
+
* @param {Object} context - The context object
|
|
8
|
+
* @param {string} filePath - Path to the file to upload
|
|
9
|
+
* @param {string} requestId - Unique identifier for the request
|
|
10
|
+
* @param {string} [hash] - Optional hash of the file
|
|
11
|
+
* @returns {Promise<{url: string, blobName: string}>} The URL and blob name of the uploaded file
|
|
12
|
+
*/
|
|
13
|
+
async uploadFile(context, filePath, requestId, hash = null) {
|
|
14
|
+
throw new Error('Method not implemented');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Delete files associated with a request ID
|
|
19
|
+
* @param {string} requestId - The request ID to delete files for
|
|
20
|
+
* @returns {Promise<string[]>} Array of deleted file URLs
|
|
21
|
+
*/
|
|
22
|
+
async deleteFiles(requestId) {
|
|
23
|
+
throw new Error('Method not implemented');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Check if a file exists at the given URL
|
|
28
|
+
* @param {string} url - The URL to check
|
|
29
|
+
* @returns {Promise<boolean>} Whether the file exists
|
|
30
|
+
*/
|
|
31
|
+
async fileExists(url) {
|
|
32
|
+
throw new Error('Method not implemented');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Download a file from storage
|
|
37
|
+
* @param {string} url - The URL of the file to download
|
|
38
|
+
* @param {string} destinationPath - Where to save the downloaded file
|
|
39
|
+
* @returns {Promise<void>}
|
|
40
|
+
*/
|
|
41
|
+
async downloadFile(url, destinationPath) {
|
|
42
|
+
throw new Error('Method not implemented');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Clean up files by their URLs
|
|
47
|
+
* @param {string[]} urls - Array of URLs to clean up
|
|
48
|
+
* @returns {Promise<void>}
|
|
49
|
+
*/
|
|
50
|
+
async cleanup(urls) {
|
|
51
|
+
throw new Error('Method not implemented');
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import { StorageFactory } from './StorageFactory.js';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import os from 'os';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
|
|
6
|
+
export class StorageService {
|
|
7
|
+
constructor(factory) {
|
|
8
|
+
this.factory = factory || new StorageFactory();
|
|
9
|
+
this.primaryProvider = this.factory.getPrimaryProvider();
|
|
10
|
+
this.backupProvider = this.factory.getGCSProvider();
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
getPrimaryProvider() {
|
|
14
|
+
return this.primaryProvider;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
getBackupProvider() {
|
|
18
|
+
return this.backupProvider;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async uploadFile(...args) {
|
|
22
|
+
/*
|
|
23
|
+
Supported call shapes:
|
|
24
|
+
1) uploadFile(buffer, filename)
|
|
25
|
+
2) uploadFile(context, filePath, requestId, hash?) – legacy internal use
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
// Shape (buffer, filename)
|
|
29
|
+
if (args.length === 2 && Buffer.isBuffer(args[0]) && typeof args[1] === 'string') {
|
|
30
|
+
const buffer = args[0];
|
|
31
|
+
const filename = args[1];
|
|
32
|
+
const tempFile = path.join(os.tmpdir(), `${Date.now()}_${filename}`);
|
|
33
|
+
await fs.promises.writeFile(tempFile, buffer);
|
|
34
|
+
try {
|
|
35
|
+
const { url } = await this.primaryProvider.uploadFile({}, tempFile, filename);
|
|
36
|
+
return { url };
|
|
37
|
+
} finally {
|
|
38
|
+
if (fs.existsSync(tempFile)) {
|
|
39
|
+
await fs.promises.unlink(tempFile).catch(() => {});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Fallback to legacy (context, filePath, requestId, hash?)
|
|
45
|
+
const [context, filePath, requestId, hash] = args;
|
|
46
|
+
return this.uploadFileWithProviders(context, filePath, requestId, hash);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async uploadFileToBackup(fileOrBuffer, filename) {
|
|
50
|
+
if (!this.backupProvider) {
|
|
51
|
+
throw new Error('Backup provider not configured');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (Buffer.isBuffer(fileOrBuffer)) {
|
|
55
|
+
const tempFile = path.join(os.tmpdir(), `${Date.now()}_${filename}`);
|
|
56
|
+
await fs.promises.writeFile(tempFile, fileOrBuffer);
|
|
57
|
+
try {
|
|
58
|
+
const result = await this.backupProvider.uploadFile({}, tempFile, filename);
|
|
59
|
+
return { url: result.url };
|
|
60
|
+
} finally {
|
|
61
|
+
if (fs.existsSync(tempFile)) {
|
|
62
|
+
await fs.promises.unlink(tempFile).catch(() => {});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const result = await this.backupProvider.uploadFile({}, fileOrBuffer, filename);
|
|
68
|
+
return { url: result.url };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async downloadFile(url, destinationPath = null) {
|
|
72
|
+
const useBackup = url.startsWith('gs://');
|
|
73
|
+
|
|
74
|
+
if (useBackup && !this.backupProvider) {
|
|
75
|
+
throw new Error('Backup provider not configured');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// If caller supplied a destination path, stream to disk and return void
|
|
79
|
+
if (destinationPath) {
|
|
80
|
+
if (useBackup) {
|
|
81
|
+
await this.backupProvider.downloadFile(url, destinationPath);
|
|
82
|
+
} else {
|
|
83
|
+
await this.primaryProvider.downloadFile(url, destinationPath);
|
|
84
|
+
}
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Otherwise download to a temp file and return Buffer
|
|
89
|
+
const tempFile = path.join(os.tmpdir(), path.basename(url));
|
|
90
|
+
try {
|
|
91
|
+
if (useBackup) {
|
|
92
|
+
await this.backupProvider.downloadFile(url, tempFile);
|
|
93
|
+
} else {
|
|
94
|
+
await this.primaryProvider.downloadFile(url, tempFile);
|
|
95
|
+
}
|
|
96
|
+
return await fs.promises.readFile(tempFile);
|
|
97
|
+
} finally {
|
|
98
|
+
if (fs.existsSync(tempFile)) {
|
|
99
|
+
await fs.promises.unlink(tempFile).catch(() => {});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async deleteFile(url) {
|
|
105
|
+
if (typeof this.primaryProvider.deleteFile === 'function') {
|
|
106
|
+
return await this.primaryProvider.deleteFile(url);
|
|
107
|
+
}
|
|
108
|
+
// Fallback for providers that only have deleteFiles
|
|
109
|
+
return await this.primaryProvider.deleteFiles([url]);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async deleteFileFromBackup(url) {
|
|
113
|
+
if (!this.backupProvider) {
|
|
114
|
+
throw new Error('Backup provider not configured');
|
|
115
|
+
}
|
|
116
|
+
if (typeof this.backupProvider.deleteFile === 'function') {
|
|
117
|
+
return await this.backupProvider.deleteFile(url);
|
|
118
|
+
}
|
|
119
|
+
// Fallback for providers that only have deleteFiles
|
|
120
|
+
return await this.backupProvider.deleteFiles([url]);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
async uploadFileWithProviders(context, filePath, requestId, hash = null) {
|
|
124
|
+
const primaryResult = await this.primaryProvider.uploadFile(context, filePath, requestId, hash);
|
|
125
|
+
|
|
126
|
+
let gcsResult = null;
|
|
127
|
+
if (this.backupProvider) {
|
|
128
|
+
gcsResult = await this.backupProvider.uploadFile(context, filePath, requestId, hash);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return { ...primaryResult, gcs: gcsResult?.url };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async deleteFiles(requestId) {
|
|
135
|
+
if (!requestId) {
|
|
136
|
+
throw new Error('Missing requestId parameter');
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const results = [];
|
|
140
|
+
|
|
141
|
+
// Delete from primary storage
|
|
142
|
+
try {
|
|
143
|
+
const primaryResult = await this.primaryProvider.deleteFiles(requestId);
|
|
144
|
+
if (primaryResult && primaryResult.length > 0) {
|
|
145
|
+
results.push(...primaryResult);
|
|
146
|
+
}
|
|
147
|
+
} catch (error) {
|
|
148
|
+
console.error(`Error deleting files from primary storage for ${requestId}:`, error);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// If GCS is configured, delete from there too
|
|
152
|
+
if (this.backupProvider) {
|
|
153
|
+
try {
|
|
154
|
+
const gcsResult = await this.backupProvider.deleteFiles(requestId);
|
|
155
|
+
if (gcsResult && gcsResult.length > 0) {
|
|
156
|
+
results.push(...gcsResult);
|
|
157
|
+
}
|
|
158
|
+
} catch (error) {
|
|
159
|
+
console.error(`Error deleting files from GCS for ${requestId}:`, error);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return results;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
async fileExists(url) {
|
|
167
|
+
if (!url) {
|
|
168
|
+
return false;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
if (url.startsWith('gs://')) {
|
|
173
|
+
return this.backupProvider ? await this.backupProvider.fileExists(url) : false;
|
|
174
|
+
}
|
|
175
|
+
return await this.primaryProvider.fileExists(url);
|
|
176
|
+
} catch (error) {
|
|
177
|
+
console.error(`Error checking file existence for ${url}:`, error);
|
|
178
|
+
return false;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async cleanup(urls) {
|
|
183
|
+
if (!urls || !urls.length) return;
|
|
184
|
+
|
|
185
|
+
const results = [];
|
|
186
|
+
|
|
187
|
+
// Split URLs by type
|
|
188
|
+
const primaryUrls = [];
|
|
189
|
+
const gcsUrls = [];
|
|
190
|
+
|
|
191
|
+
for (const url of urls) {
|
|
192
|
+
if (url.startsWith('gs://')) {
|
|
193
|
+
gcsUrls.push(url);
|
|
194
|
+
} else {
|
|
195
|
+
primaryUrls.push(url);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Clean up primary storage
|
|
200
|
+
if (primaryUrls.length > 0) {
|
|
201
|
+
const primaryResult = await this.primaryProvider.cleanup(primaryUrls);
|
|
202
|
+
results.push(...primaryResult);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Clean up GCS if configured
|
|
206
|
+
if (gcsUrls.length > 0 && this.backupProvider) {
|
|
207
|
+
const gcsResult = await this.backupProvider.cleanup(gcsUrls);
|
|
208
|
+
results.push(...gcsResult);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return results;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async ensureGCSUpload(context, existingFile) {
|
|
215
|
+
if (!this.backupProvider || !existingFile.url || !this.backupProvider.isConfigured()) {
|
|
216
|
+
return existingFile;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// If we already have a GCS URL, check if it exists
|
|
220
|
+
if (existingFile.gcs) {
|
|
221
|
+
const exists = await this.backupProvider.fileExists(existingFile.gcs);
|
|
222
|
+
if (exists) {
|
|
223
|
+
return existingFile;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Download from primary storage
|
|
228
|
+
const tempFile = path.join(os.tmpdir(), path.basename(existingFile.url));
|
|
229
|
+
try {
|
|
230
|
+
await this.primaryProvider.downloadFile(existingFile.url, tempFile);
|
|
231
|
+
|
|
232
|
+
// Upload to GCS
|
|
233
|
+
const requestId = path.dirname(existingFile.blobName) || 'restore';
|
|
234
|
+
const gcsResult = await this.backupProvider.uploadFile(
|
|
235
|
+
context,
|
|
236
|
+
tempFile,
|
|
237
|
+
requestId
|
|
238
|
+
);
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
...existingFile,
|
|
242
|
+
gcs: gcsResult.url
|
|
243
|
+
};
|
|
244
|
+
} finally {
|
|
245
|
+
// Cleanup temp file
|
|
246
|
+
if (fs.existsSync(tempFile)) {
|
|
247
|
+
fs.unlinkSync(tempFile);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
async downloadFileFromBackup(url, destinationPath = null) {
|
|
253
|
+
if (!this.backupProvider) {
|
|
254
|
+
throw new Error('Backup provider not configured');
|
|
255
|
+
}
|
|
256
|
+
// Delegate to the unified downloadFile handler
|
|
257
|
+
return await this.downloadFile(url, destinationPath);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import CortexFileHandler from "./index.js";
|
|
2
|
+
import express from "express";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
import { dirname, join } from "path";
|
|
5
|
+
import cors from "cors";
|
|
6
|
+
import { readFileSync } from "fs";
|
|
7
|
+
|
|
8
|
+
import { publicIpv4 } from "public-ip";
|
|
9
|
+
|
|
10
|
+
// When running under tests we want all generated URLs to resolve to the
|
|
11
|
+
// locally-running server, otherwise checks like HEAD requests inside the
|
|
12
|
+
// handler will fail (because the external IP is not reachable from inside
|
|
13
|
+
// the test runner). Use the machine's public IP in normal operation, but
|
|
14
|
+
// fall back to "localhost" when the environment variable NODE_ENV indicates
|
|
15
|
+
// a test run.
|
|
16
|
+
|
|
17
|
+
let ipAddress = "localhost";
|
|
18
|
+
if (process.env.NODE_ENV !== "test") {
|
|
19
|
+
try {
|
|
20
|
+
ipAddress = await publicIpv4();
|
|
21
|
+
} catch (err) {
|
|
22
|
+
// In rare cases querying the public IP can fail (e.g. no network when
|
|
23
|
+
// running offline). Keep the default of "localhost" in that case so we
|
|
24
|
+
// still generate valid URLs.
|
|
25
|
+
console.warn("Unable to determine public IPv4 address – defaulting to 'localhost'.", err);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const app = express();
|
|
30
|
+
const port = process.env.PORT || 7071;
|
|
31
|
+
const publicFolder = join(dirname(fileURLToPath(import.meta.url)), "files");
|
|
32
|
+
|
|
33
|
+
// Get version from package.json
|
|
34
|
+
const packageJson = JSON.parse(
|
|
35
|
+
readFileSync(
|
|
36
|
+
join(dirname(fileURLToPath(import.meta.url)), "../package.json"),
|
|
37
|
+
"utf8",
|
|
38
|
+
),
|
|
39
|
+
);
|
|
40
|
+
const version = packageJson.version;
|
|
41
|
+
|
|
42
|
+
app.use(cors());
|
|
43
|
+
// Serve static files from the public folder
|
|
44
|
+
app.use("/files", express.static(publicFolder));
|
|
45
|
+
|
|
46
|
+
// Health check endpoint
|
|
47
|
+
app.get("/health", (req, res) => {
|
|
48
|
+
res.status(200).json({
|
|
49
|
+
status: "healthy",
|
|
50
|
+
version: version,
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
// New primary endpoint
|
|
55
|
+
app.all("/api/CortexFileHandler", async (req, res) => {
|
|
56
|
+
const context = { req, res, log: console.log };
|
|
57
|
+
try {
|
|
58
|
+
await CortexFileHandler(context, req);
|
|
59
|
+
context.log(context.res);
|
|
60
|
+
res.status(context.res.status || 200).send(context.res.body);
|
|
61
|
+
} catch (error) {
|
|
62
|
+
const status = error.status || 500;
|
|
63
|
+
const message = error.message || "Internal server error";
|
|
64
|
+
res.status(status).send(message);
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Legacy endpoint for compatibility
|
|
69
|
+
app.all("/api/MediaFileChunker", async (req, res) => {
|
|
70
|
+
const context = { req, res, log: console.log };
|
|
71
|
+
try {
|
|
72
|
+
await CortexFileHandler(context, req);
|
|
73
|
+
context.log(context.res);
|
|
74
|
+
res.status(context.res.status || 200).send(context.res.body);
|
|
75
|
+
} catch (error) {
|
|
76
|
+
const status = error.status || 500;
|
|
77
|
+
const message = error.message || "Internal server error";
|
|
78
|
+
res.status(status).send(message);
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
app.listen(port, () => {
|
|
83
|
+
console.log(
|
|
84
|
+
`Cortex File Handler v${version} running on port ${port} (includes legacy MediaFileChunker endpoint)`,
|
|
85
|
+
);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
export { port, publicFolder, ipAddress };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Sanitize a filename so that it is safe and consistent across all back-ends
|
|
5
|
+
* – Decode any existing URI encoding
|
|
6
|
+
* – Strip directory components
|
|
7
|
+
* – Replace characters that are not alphanum, dash, dot, or underscore with `_`
|
|
8
|
+
* – Convert spaces to underscores to avoid unintended encoding by some SDKs
|
|
9
|
+
*
|
|
10
|
+
* @param {string} raw The raw filename/path/URL component
|
|
11
|
+
* @returns {string} A sanitized filename suitable for Azure, GCS, local FS, etc.
|
|
12
|
+
*/
|
|
13
|
+
export function sanitizeFilename(raw = '') {
|
|
14
|
+
let name = raw;
|
|
15
|
+
try {
|
|
16
|
+
name = decodeURIComponent(name);
|
|
17
|
+
} catch (_) {
|
|
18
|
+
// Already decoded / not URI encoded – ignore
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
name = path.basename(name);
|
|
22
|
+
// Replace spaces first so they don't become %20 anywhere
|
|
23
|
+
name = name.replace(/\s+/g, '_');
|
|
24
|
+
// Replace any remaining invalid characters
|
|
25
|
+
name = name.replace(/[^\w\-\.]/g, '_');
|
|
26
|
+
|
|
27
|
+
return name;
|
|
28
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import { dirname, join } from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import test from 'ava';
|
|
5
|
+
import axios from 'axios';
|
|
6
|
+
import XLSX from 'xlsx';
|
|
7
|
+
import { FileConversionService } from '../src/services/FileConversionService.js';
|
|
8
|
+
|
|
9
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
|
|
11
|
+
// Mock context
|
|
12
|
+
const mockContext = {
|
|
13
|
+
log: console.log
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
// Setup: Create test documents
|
|
17
|
+
test.before(async (t) => {
|
|
18
|
+
const testDir = join(__dirname, 'test-docs');
|
|
19
|
+
await fs.mkdir(testDir, { recursive: true });
|
|
20
|
+
|
|
21
|
+
// Create various test files
|
|
22
|
+
const textFile = join(testDir, 'test.txt');
|
|
23
|
+
const largeTextFile = join(testDir, 'large.txt');
|
|
24
|
+
const unicodeFile = join(testDir, 'unicode.txt');
|
|
25
|
+
const jsonFile = join(testDir, 'test.json');
|
|
26
|
+
const emptyFile = join(testDir, 'empty.txt');
|
|
27
|
+
const excelFile = join(testDir, 'test.xlsx');
|
|
28
|
+
|
|
29
|
+
// Regular text content
|
|
30
|
+
await fs.writeFile(
|
|
31
|
+
textFile,
|
|
32
|
+
'This is a test document content.\nIt has multiple lines.\nThird line here.',
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
// Large text content (>100KB)
|
|
36
|
+
const largeContent = 'Lorem ipsum '.repeat(10000);
|
|
37
|
+
await fs.writeFile(largeTextFile, largeContent);
|
|
38
|
+
|
|
39
|
+
// Unicode content
|
|
40
|
+
const unicodeContent =
|
|
41
|
+
'这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
|
|
42
|
+
await fs.writeFile(unicodeFile, unicodeContent);
|
|
43
|
+
|
|
44
|
+
// JSON content
|
|
45
|
+
await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
|
|
46
|
+
|
|
47
|
+
// Empty file
|
|
48
|
+
await fs.writeFile(emptyFile, '');
|
|
49
|
+
|
|
50
|
+
// Create a test Excel file
|
|
51
|
+
const workbook = XLSX.utils.book_new();
|
|
52
|
+
const ws1 = XLSX.utils.aoa_to_sheet([
|
|
53
|
+
['Header 1', 'Header 2'],
|
|
54
|
+
['Data 1', 'Data 2'],
|
|
55
|
+
['Data 3', 'Data 4']
|
|
56
|
+
]);
|
|
57
|
+
XLSX.utils.book_append_sheet(workbook, ws1, 'Sheet1');
|
|
58
|
+
XLSX.writeFile(workbook, excelFile);
|
|
59
|
+
|
|
60
|
+
t.context = {
|
|
61
|
+
testDir,
|
|
62
|
+
textFile,
|
|
63
|
+
largeTextFile,
|
|
64
|
+
unicodeFile,
|
|
65
|
+
jsonFile,
|
|
66
|
+
emptyFile,
|
|
67
|
+
excelFile
|
|
68
|
+
};
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Cleanup
|
|
72
|
+
test.after.always(async (t) => {
|
|
73
|
+
await fs.rm(t.context.testDir, { recursive: true, force: true });
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// Test Excel to CSV conversion
|
|
77
|
+
test('converts Excel to CSV successfully', async (t) => {
|
|
78
|
+
const service = new FileConversionService(mockContext);
|
|
79
|
+
const result = await service.convertFile(t.context.excelFile);
|
|
80
|
+
|
|
81
|
+
t.true(result.converted);
|
|
82
|
+
t.true(result.convertedPath.endsWith('.csv'));
|
|
83
|
+
|
|
84
|
+
// Read the converted file and verify content
|
|
85
|
+
const content = await fs.readFile(result.convertedPath, 'utf-8');
|
|
86
|
+
t.true(content.includes('Header 1,Header 2'));
|
|
87
|
+
t.true(content.includes('Data 1,Data 2'));
|
|
88
|
+
t.true(content.includes('Data 3,Data 4'));
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Test document conversion with MarkItDown API
|
|
92
|
+
test('converts document to markdown via MarkItDown API', async (t) => {
|
|
93
|
+
// Mock axios.get for MarkItDown API
|
|
94
|
+
const originalAxiosGet = axios.get;
|
|
95
|
+
axios.get = async (url) => {
|
|
96
|
+
if (url.includes('test.docx')) {
|
|
97
|
+
return {
|
|
98
|
+
data: {
|
|
99
|
+
markdown: '# Test Document\n\nThis is a test document converted to markdown.'
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
throw new Error('Invalid URL');
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const service = new FileConversionService(mockContext);
|
|
107
|
+
const result = await service.convertFile('test.docx', 'https://example.com/test.docx');
|
|
108
|
+
|
|
109
|
+
t.true(result.converted);
|
|
110
|
+
t.true(result.convertedPath.endsWith('.md'));
|
|
111
|
+
|
|
112
|
+
// Read the converted file and verify content
|
|
113
|
+
const content = await fs.readFile(result.convertedPath, 'utf-8');
|
|
114
|
+
t.true(content.includes('# Test Document'));
|
|
115
|
+
t.true(content.includes('This is a test document converted to markdown'));
|
|
116
|
+
|
|
117
|
+
// Restore original axios.get
|
|
118
|
+
axios.get = originalAxiosGet;
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Test error handling for missing original URL
|
|
122
|
+
test('handles missing original URL for document conversion', async (t) => {
|
|
123
|
+
const service = new FileConversionService(mockContext);
|
|
124
|
+
await t.throwsAsync(
|
|
125
|
+
async () => service.convertFile('test.docx'),
|
|
126
|
+
{ message: 'Original URL is required for document conversion' }
|
|
127
|
+
);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// Test error handling for unsupported file types
|
|
131
|
+
test('handles unsupported file types', async (t) => {
|
|
132
|
+
const service = new FileConversionService(mockContext);
|
|
133
|
+
const result = await service.convertFile(t.context.jsonFile);
|
|
134
|
+
t.false(result.converted);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// Test file extension detection
|
|
138
|
+
test('correctly detects file extensions', (t) => {
|
|
139
|
+
const service = new FileConversionService(mockContext);
|
|
140
|
+
t.true(service.needsConversion('test.docx'));
|
|
141
|
+
t.true(service.needsConversion('test.xlsx'));
|
|
142
|
+
t.false(service.needsConversion('test.txt'));
|
|
143
|
+
t.false(service.needsConversion('test.json'));
|
|
144
|
+
});
|