@aj-archipelago/cortex 1.3.50 → 1.3.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/config.js +1 -1
  2. package/helper-apps/cortex-browser/Dockerfile +19 -31
  3. package/helper-apps/cortex-browser/function_app.py +708 -181
  4. package/helper-apps/cortex-browser/requirements.txt +4 -4
  5. package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} +2 -1
  6. package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} +2 -1
  7. package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} +2 -1
  8. package/helper-apps/cortex-file-handler/Dockerfile +1 -1
  9. package/helper-apps/cortex-file-handler/INTERFACE.md +178 -0
  10. package/helper-apps/cortex-file-handler/function.json +2 -6
  11. package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
  12. package/helper-apps/cortex-file-handler/package.json +11 -6
  13. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
  14. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
  15. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +4 -1
  16. package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
  17. package/helper-apps/cortex-file-handler/src/blobHandler.js +1056 -0
  18. package/helper-apps/cortex-file-handler/{constants.js → src/constants.js} +64 -48
  19. package/helper-apps/cortex-file-handler/src/docHelper.js +37 -0
  20. package/helper-apps/cortex-file-handler/{fileChunker.js → src/fileChunker.js} +97 -65
  21. package/helper-apps/cortex-file-handler/{helper.js → src/helper.js} +34 -25
  22. package/helper-apps/cortex-file-handler/src/index.js +608 -0
  23. package/helper-apps/cortex-file-handler/src/localFileHandler.js +107 -0
  24. package/helper-apps/cortex-file-handler/{redis.js → src/redis.js} +23 -17
  25. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +309 -0
  26. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +57 -0
  27. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +177 -0
  28. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +258 -0
  29. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +182 -0
  30. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +86 -0
  31. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +53 -0
  32. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +259 -0
  33. package/helper-apps/cortex-file-handler/src/start.js +88 -0
  34. package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +28 -0
  35. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
  36. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +90 -66
  37. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +152 -0
  38. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +105 -108
  39. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +462 -0
  40. package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
  41. package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
  42. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +307 -0
  43. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +291 -0
  44. package/helper-apps/cortex-file-handler/tests/start.test.js +984 -647
  45. package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +120 -0
  46. package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +193 -0
  47. package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +148 -0
  48. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +100 -0
  49. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +113 -0
  50. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +85 -0
  51. package/helper-apps/cortex-markitdown/.funcignore +1 -0
  52. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
  53. package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
  54. package/helper-apps/cortex-markitdown/README.md +94 -0
  55. package/helper-apps/cortex-markitdown/host.json +15 -0
  56. package/helper-apps/cortex-markitdown/requirements.txt +2 -0
  57. package/lib/entityConstants.js +1 -1
  58. package/lib/requestExecutor.js +44 -36
  59. package/package.json +1 -1
  60. package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
  61. package/server/plugins/openAiWhisperPlugin.js +59 -87
  62. package/helper-apps/cortex-file-handler/blobHandler.js +0 -567
  63. package/helper-apps/cortex-file-handler/docHelper.js +0 -144
  64. package/helper-apps/cortex-file-handler/index.js +0 -440
  65. package/helper-apps/cortex-file-handler/localFileHandler.js +0 -108
  66. package/helper-apps/cortex-file-handler/start.js +0 -63
  67. package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Base interface for storage providers
3
+ */
4
+ export class StorageProvider {
5
+ /**
6
+ * Upload a file to storage
7
+ * @param {Object} context - The context object
8
+ * @param {string} filePath - Path to the file to upload
9
+ * @param {string} requestId - Unique identifier for the request
10
+ * @param {string} [hash] - Optional hash of the file
11
+ * @returns {Promise<{url: string, blobName: string}>} The URL and blob name of the uploaded file
12
+ */
13
+ async uploadFile(context, filePath, requestId, hash = null) {
14
+ throw new Error('Method not implemented');
15
+ }
16
+
17
+ /**
18
+ * Delete files associated with a request ID
19
+ * @param {string} requestId - The request ID to delete files for
20
+ * @returns {Promise<string[]>} Array of deleted file URLs
21
+ */
22
+ async deleteFiles(requestId) {
23
+ throw new Error('Method not implemented');
24
+ }
25
+
26
+ /**
27
+ * Check if a file exists at the given URL
28
+ * @param {string} url - The URL to check
29
+ * @returns {Promise<boolean>} Whether the file exists
30
+ */
31
+ async fileExists(url) {
32
+ throw new Error('Method not implemented');
33
+ }
34
+
35
+ /**
36
+ * Download a file from storage
37
+ * @param {string} url - The URL of the file to download
38
+ * @param {string} destinationPath - Where to save the downloaded file
39
+ * @returns {Promise<void>}
40
+ */
41
+ async downloadFile(url, destinationPath) {
42
+ throw new Error('Method not implemented');
43
+ }
44
+
45
+ /**
46
+ * Clean up files by their URLs
47
+ * @param {string[]} urls - Array of URLs to clean up
48
+ * @returns {Promise<void>}
49
+ */
50
+ async cleanup(urls) {
51
+ throw new Error('Method not implemented');
52
+ }
53
+ }
@@ -0,0 +1,259 @@
1
+ import { StorageFactory } from './StorageFactory.js';
2
+ import path from 'path';
3
+ import os from 'os';
4
+ import fs from 'fs';
5
+
6
+ export class StorageService {
7
+ constructor(factory) {
8
+ this.factory = factory || new StorageFactory();
9
+ this.primaryProvider = this.factory.getPrimaryProvider();
10
+ this.backupProvider = this.factory.getGCSProvider();
11
+ }
12
+
13
+ getPrimaryProvider() {
14
+ return this.primaryProvider;
15
+ }
16
+
17
+ getBackupProvider() {
18
+ return this.backupProvider;
19
+ }
20
+
21
+ async uploadFile(...args) {
22
+ /*
23
+ Supported call shapes:
24
+ 1) uploadFile(buffer, filename)
25
+ 2) uploadFile(context, filePath, requestId, hash?) – legacy internal use
26
+ */
27
+
28
+ // Shape (buffer, filename)
29
+ if (args.length === 2 && Buffer.isBuffer(args[0]) && typeof args[1] === 'string') {
30
+ const buffer = args[0];
31
+ const filename = args[1];
32
+ const tempFile = path.join(os.tmpdir(), `${Date.now()}_${filename}`);
33
+ await fs.promises.writeFile(tempFile, buffer);
34
+ try {
35
+ const { url } = await this.primaryProvider.uploadFile({}, tempFile, filename);
36
+ return { url };
37
+ } finally {
38
+ if (fs.existsSync(tempFile)) {
39
+ await fs.promises.unlink(tempFile).catch(() => {});
40
+ }
41
+ }
42
+ }
43
+
44
+ // Fallback to legacy (context, filePath, requestId, hash?)
45
+ const [context, filePath, requestId, hash] = args;
46
+ return this.uploadFileWithProviders(context, filePath, requestId, hash);
47
+ }
48
+
49
+ async uploadFileToBackup(fileOrBuffer, filename) {
50
+ if (!this.backupProvider) {
51
+ throw new Error('Backup provider not configured');
52
+ }
53
+
54
+ if (Buffer.isBuffer(fileOrBuffer)) {
55
+ const tempFile = path.join(os.tmpdir(), `${Date.now()}_${filename}`);
56
+ await fs.promises.writeFile(tempFile, fileOrBuffer);
57
+ try {
58
+ const result = await this.backupProvider.uploadFile({}, tempFile, filename);
59
+ return { url: result.url };
60
+ } finally {
61
+ if (fs.existsSync(tempFile)) {
62
+ await fs.promises.unlink(tempFile).catch(() => {});
63
+ }
64
+ }
65
+ }
66
+
67
+ const result = await this.backupProvider.uploadFile({}, fileOrBuffer, filename);
68
+ return { url: result.url };
69
+ }
70
+
71
+ async downloadFile(url, destinationPath = null) {
72
+ const useBackup = url.startsWith('gs://');
73
+
74
+ if (useBackup && !this.backupProvider) {
75
+ throw new Error('Backup provider not configured');
76
+ }
77
+
78
+ // If caller supplied a destination path, stream to disk and return void
79
+ if (destinationPath) {
80
+ if (useBackup) {
81
+ await this.backupProvider.downloadFile(url, destinationPath);
82
+ } else {
83
+ await this.primaryProvider.downloadFile(url, destinationPath);
84
+ }
85
+ return;
86
+ }
87
+
88
+ // Otherwise download to a temp file and return Buffer
89
+ const tempFile = path.join(os.tmpdir(), path.basename(url));
90
+ try {
91
+ if (useBackup) {
92
+ await this.backupProvider.downloadFile(url, tempFile);
93
+ } else {
94
+ await this.primaryProvider.downloadFile(url, tempFile);
95
+ }
96
+ return await fs.promises.readFile(tempFile);
97
+ } finally {
98
+ if (fs.existsSync(tempFile)) {
99
+ await fs.promises.unlink(tempFile).catch(() => {});
100
+ }
101
+ }
102
+ }
103
+
104
+ async deleteFile(url) {
105
+ if (typeof this.primaryProvider.deleteFile === 'function') {
106
+ return await this.primaryProvider.deleteFile(url);
107
+ }
108
+ // Fallback for providers that only have deleteFiles
109
+ return await this.primaryProvider.deleteFiles([url]);
110
+ }
111
+
112
+ async deleteFileFromBackup(url) {
113
+ if (!this.backupProvider) {
114
+ throw new Error('Backup provider not configured');
115
+ }
116
+ if (typeof this.backupProvider.deleteFile === 'function') {
117
+ return await this.backupProvider.deleteFile(url);
118
+ }
119
+ // Fallback for providers that only have deleteFiles
120
+ return await this.backupProvider.deleteFiles([url]);
121
+ }
122
+
123
+ async uploadFileWithProviders(context, filePath, requestId, hash = null) {
124
+ const primaryResult = await this.primaryProvider.uploadFile(context, filePath, requestId, hash);
125
+
126
+ let gcsResult = null;
127
+ if (this.backupProvider) {
128
+ gcsResult = await this.backupProvider.uploadFile(context, filePath, requestId, hash);
129
+ }
130
+
131
+ return { ...primaryResult, gcs: gcsResult?.url };
132
+ }
133
+
134
+ async deleteFiles(requestId) {
135
+ if (!requestId) {
136
+ throw new Error('Missing requestId parameter');
137
+ }
138
+
139
+ const results = [];
140
+
141
+ // Delete from primary storage
142
+ try {
143
+ const primaryResult = await this.primaryProvider.deleteFiles(requestId);
144
+ if (primaryResult && primaryResult.length > 0) {
145
+ results.push(...primaryResult);
146
+ }
147
+ } catch (error) {
148
+ console.error(`Error deleting files from primary storage for ${requestId}:`, error);
149
+ }
150
+
151
+ // If GCS is configured, delete from there too
152
+ if (this.backupProvider) {
153
+ try {
154
+ const gcsResult = await this.backupProvider.deleteFiles(requestId);
155
+ if (gcsResult && gcsResult.length > 0) {
156
+ results.push(...gcsResult);
157
+ }
158
+ } catch (error) {
159
+ console.error(`Error deleting files from GCS for ${requestId}:`, error);
160
+ }
161
+ }
162
+
163
+ return results;
164
+ }
165
+
166
+ async fileExists(url) {
167
+ if (!url) {
168
+ return false;
169
+ }
170
+
171
+ try {
172
+ if (url.startsWith('gs://')) {
173
+ return this.backupProvider ? await this.backupProvider.fileExists(url) : false;
174
+ }
175
+ return await this.primaryProvider.fileExists(url);
176
+ } catch (error) {
177
+ console.error(`Error checking file existence for ${url}:`, error);
178
+ return false;
179
+ }
180
+ }
181
+
182
+ async cleanup(urls) {
183
+ if (!urls || !urls.length) return;
184
+
185
+ const results = [];
186
+
187
+ // Split URLs by type
188
+ const primaryUrls = [];
189
+ const gcsUrls = [];
190
+
191
+ for (const url of urls) {
192
+ if (url.startsWith('gs://')) {
193
+ gcsUrls.push(url);
194
+ } else {
195
+ primaryUrls.push(url);
196
+ }
197
+ }
198
+
199
+ // Clean up primary storage
200
+ if (primaryUrls.length > 0) {
201
+ const primaryResult = await this.primaryProvider.cleanup(primaryUrls);
202
+ results.push(...primaryResult);
203
+ }
204
+
205
+ // Clean up GCS if configured
206
+ if (gcsUrls.length > 0 && this.backupProvider) {
207
+ const gcsResult = await this.backupProvider.cleanup(gcsUrls);
208
+ results.push(...gcsResult);
209
+ }
210
+
211
+ return results;
212
+ }
213
+
214
+ async ensureGCSUpload(context, existingFile) {
215
+ if (!this.backupProvider || !existingFile.url || !this.backupProvider.isConfigured()) {
216
+ return existingFile;
217
+ }
218
+
219
+ // If we already have a GCS URL, check if it exists
220
+ if (existingFile.gcs) {
221
+ const exists = await this.backupProvider.fileExists(existingFile.gcs);
222
+ if (exists) {
223
+ return existingFile;
224
+ }
225
+ }
226
+
227
+ // Download from primary storage
228
+ const tempFile = path.join(os.tmpdir(), path.basename(existingFile.url));
229
+ try {
230
+ await this.primaryProvider.downloadFile(existingFile.url, tempFile);
231
+
232
+ // Upload to GCS
233
+ const requestId = path.dirname(existingFile.blobName) || 'restore';
234
+ const gcsResult = await this.backupProvider.uploadFile(
235
+ context,
236
+ tempFile,
237
+ requestId
238
+ );
239
+
240
+ return {
241
+ ...existingFile,
242
+ gcs: gcsResult.url
243
+ };
244
+ } finally {
245
+ // Cleanup temp file
246
+ if (fs.existsSync(tempFile)) {
247
+ fs.unlinkSync(tempFile);
248
+ }
249
+ }
250
+ }
251
+
252
+ async downloadFileFromBackup(url, destinationPath = null) {
253
+ if (!this.backupProvider) {
254
+ throw new Error('Backup provider not configured');
255
+ }
256
+ // Delegate to the unified downloadFile handler
257
+ return await this.downloadFile(url, destinationPath);
258
+ }
259
+ }
@@ -0,0 +1,88 @@
1
+ import CortexFileHandler from "./index.js";
2
+ import express from "express";
3
+ import { fileURLToPath } from "url";
4
+ import { dirname, join } from "path";
5
+ import cors from "cors";
6
+ import { readFileSync } from "fs";
7
+
8
+ import { publicIpv4 } from "public-ip";
9
+
10
+ // When running under tests we want all generated URLs to resolve to the
11
+ // locally-running server, otherwise checks like HEAD requests inside the
12
+ // handler will fail (because the external IP is not reachable from inside
13
+ // the test runner). Use the machine's public IP in normal operation, but
14
+ // fall back to "localhost" when the environment variable NODE_ENV indicates
15
+ // a test run.
16
+
17
+ let ipAddress = "localhost";
18
+ if (process.env.NODE_ENV !== "test") {
19
+ try {
20
+ ipAddress = await publicIpv4();
21
+ } catch (err) {
22
+ // In rare cases querying the public IP can fail (e.g. no network when
23
+ // running offline). Keep the default of "localhost" in that case so we
24
+ // still generate valid URLs.
25
+ console.warn("Unable to determine public IPv4 address – defaulting to 'localhost'.", err);
26
+ }
27
+ }
28
+
29
+ const app = express();
30
+ const port = process.env.PORT || 7071;
31
+ const publicFolder = join(dirname(fileURLToPath(import.meta.url)), "files");
32
+
33
+ // Get version from package.json
34
+ const packageJson = JSON.parse(
35
+ readFileSync(
36
+ join(dirname(fileURLToPath(import.meta.url)), "../package.json"),
37
+ "utf8",
38
+ ),
39
+ );
40
+ const version = packageJson.version;
41
+
42
+ app.use(cors());
43
+ // Serve static files from the public folder
44
+ app.use("/files", express.static(publicFolder));
45
+
46
+ // Health check endpoint
47
+ app.get("/health", (req, res) => {
48
+ res.status(200).json({
49
+ status: "healthy",
50
+ version: version,
51
+ });
52
+ });
53
+
54
+ // New primary endpoint
55
+ app.all("/api/CortexFileHandler", async (req, res) => {
56
+ const context = { req, res, log: console.log };
57
+ try {
58
+ await CortexFileHandler(context, req);
59
+ context.log(context.res);
60
+ res.status(context.res.status || 200).send(context.res.body);
61
+ } catch (error) {
62
+ const status = error.status || 500;
63
+ const message = error.message || "Internal server error";
64
+ res.status(status).send(message);
65
+ }
66
+ });
67
+
68
+ // Legacy endpoint for compatibility
69
+ app.all("/api/MediaFileChunker", async (req, res) => {
70
+ const context = { req, res, log: console.log };
71
+ try {
72
+ await CortexFileHandler(context, req);
73
+ context.log(context.res);
74
+ res.status(context.res.status || 200).send(context.res.body);
75
+ } catch (error) {
76
+ const status = error.status || 500;
77
+ const message = error.message || "Internal server error";
78
+ res.status(status).send(message);
79
+ }
80
+ });
81
+
82
+ app.listen(port, () => {
83
+ console.log(
84
+ `Cortex File Handler v${version} running on port ${port} (includes legacy MediaFileChunker endpoint)`,
85
+ );
86
+ });
87
+
88
+ export { port, publicFolder, ipAddress };
@@ -0,0 +1,28 @@
1
+ import path from 'path';
2
+
3
+ /**
4
+ * Sanitize a filename so that it is safe and consistent across all back-ends
5
+ * – Decode any existing URI encoding
6
+ * – Strip directory components
7
+ * – Replace characters that are not alphanum, dash, dot, or underscore with `_`
8
+ * – Convert spaces to underscores to avoid unintended encoding by some SDKs
9
+ *
10
+ * @param {string} raw The raw filename/path/URL component
11
+ * @returns {string} A sanitized filename suitable for Azure, GCS, local FS, etc.
12
+ */
13
+ export function sanitizeFilename(raw = '') {
14
+ let name = raw;
15
+ try {
16
+ name = decodeURIComponent(name);
17
+ } catch (_) {
18
+ // Already decoded / not URI encoded – ignore
19
+ }
20
+
21
+ name = path.basename(name);
22
+ // Replace spaces first so they don't become %20 anywhere
23
+ name = name.replace(/\s+/g, '_');
24
+ // Replace any remaining invalid characters
25
+ name = name.replace(/[^\w\-\.]/g, '_');
26
+
27
+ return name;
28
+ }
@@ -0,0 +1,144 @@
1
+ import fs from 'fs/promises';
2
+ import { dirname, join } from 'path';
3
+ import { fileURLToPath } from 'url';
4
+ import test from 'ava';
5
+ import axios from 'axios';
6
+ import XLSX from 'xlsx';
7
+ import { FileConversionService } from '../src/services/FileConversionService.js';
8
+
9
+ const __dirname = dirname(fileURLToPath(import.meta.url));
10
+
11
+ // Mock context
12
+ const mockContext = {
13
+ log: console.log
14
+ };
15
+
16
+ // Setup: Create test documents
17
+ test.before(async (t) => {
18
+ const testDir = join(__dirname, 'test-docs');
19
+ await fs.mkdir(testDir, { recursive: true });
20
+
21
+ // Create various test files
22
+ const textFile = join(testDir, 'test.txt');
23
+ const largeTextFile = join(testDir, 'large.txt');
24
+ const unicodeFile = join(testDir, 'unicode.txt');
25
+ const jsonFile = join(testDir, 'test.json');
26
+ const emptyFile = join(testDir, 'empty.txt');
27
+ const excelFile = join(testDir, 'test.xlsx');
28
+
29
+ // Regular text content
30
+ await fs.writeFile(
31
+ textFile,
32
+ 'This is a test document content.\nIt has multiple lines.\nThird line here.',
33
+ );
34
+
35
+ // Large text content (>100KB)
36
+ const largeContent = 'Lorem ipsum '.repeat(10000);
37
+ await fs.writeFile(largeTextFile, largeContent);
38
+
39
+ // Unicode content
40
+ const unicodeContent =
41
+ '这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
42
+ await fs.writeFile(unicodeFile, unicodeContent);
43
+
44
+ // JSON content
45
+ await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
46
+
47
+ // Empty file
48
+ await fs.writeFile(emptyFile, '');
49
+
50
+ // Create a test Excel file
51
+ const workbook = XLSX.utils.book_new();
52
+ const ws1 = XLSX.utils.aoa_to_sheet([
53
+ ['Header 1', 'Header 2'],
54
+ ['Data 1', 'Data 2'],
55
+ ['Data 3', 'Data 4']
56
+ ]);
57
+ XLSX.utils.book_append_sheet(workbook, ws1, 'Sheet1');
58
+ XLSX.writeFile(workbook, excelFile);
59
+
60
+ t.context = {
61
+ testDir,
62
+ textFile,
63
+ largeTextFile,
64
+ unicodeFile,
65
+ jsonFile,
66
+ emptyFile,
67
+ excelFile
68
+ };
69
+ });
70
+
71
+ // Cleanup
72
+ test.after.always(async (t) => {
73
+ await fs.rm(t.context.testDir, { recursive: true, force: true });
74
+ });
75
+
76
+ // Test Excel to CSV conversion
77
+ test('converts Excel to CSV successfully', async (t) => {
78
+ const service = new FileConversionService(mockContext);
79
+ const result = await service.convertFile(t.context.excelFile);
80
+
81
+ t.true(result.converted);
82
+ t.true(result.convertedPath.endsWith('.csv'));
83
+
84
+ // Read the converted file and verify content
85
+ const content = await fs.readFile(result.convertedPath, 'utf-8');
86
+ t.true(content.includes('Header 1,Header 2'));
87
+ t.true(content.includes('Data 1,Data 2'));
88
+ t.true(content.includes('Data 3,Data 4'));
89
+ });
90
+
91
+ // Test document conversion with MarkItDown API
92
+ test('converts document to markdown via MarkItDown API', async (t) => {
93
+ // Mock axios.get for MarkItDown API
94
+ const originalAxiosGet = axios.get;
95
+ axios.get = async (url) => {
96
+ if (url.includes('test.docx')) {
97
+ return {
98
+ data: {
99
+ markdown: '# Test Document\n\nThis is a test document converted to markdown.'
100
+ }
101
+ };
102
+ }
103
+ throw new Error('Invalid URL');
104
+ };
105
+
106
+ const service = new FileConversionService(mockContext);
107
+ const result = await service.convertFile('test.docx', 'https://example.com/test.docx');
108
+
109
+ t.true(result.converted);
110
+ t.true(result.convertedPath.endsWith('.md'));
111
+
112
+ // Read the converted file and verify content
113
+ const content = await fs.readFile(result.convertedPath, 'utf-8');
114
+ t.true(content.includes('# Test Document'));
115
+ t.true(content.includes('This is a test document converted to markdown'));
116
+
117
+ // Restore original axios.get
118
+ axios.get = originalAxiosGet;
119
+ });
120
+
121
+ // Test error handling for missing original URL
122
+ test('handles missing original URL for document conversion', async (t) => {
123
+ const service = new FileConversionService(mockContext);
124
+ await t.throwsAsync(
125
+ async () => service.convertFile('test.docx'),
126
+ { message: 'Original URL is required for document conversion' }
127
+ );
128
+ });
129
+
130
+ // Test error handling for unsupported file types
131
+ test('handles unsupported file types', async (t) => {
132
+ const service = new FileConversionService(mockContext);
133
+ const result = await service.convertFile(t.context.jsonFile);
134
+ t.false(result.converted);
135
+ });
136
+
137
+ // Test file extension detection
138
+ test('correctly detects file extensions', (t) => {
139
+ const service = new FileConversionService(mockContext);
140
+ t.true(service.needsConversion('test.docx'));
141
+ t.true(service.needsConversion('test.xlsx'));
142
+ t.false(service.needsConversion('test.txt'));
143
+ t.false(service.needsConversion('test.json'));
144
+ });