@aj-archipelago/cortex 1.3.49 → 1.3.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +1 -1
- package/helper-apps/cortex-browser/Dockerfile +19 -31
- package/helper-apps/cortex-browser/function_app.py +708 -181
- package/helper-apps/cortex-browser/requirements.txt +4 -4
- package/helper-apps/cortex-file-handler/blobHandler.js +850 -429
- package/helper-apps/cortex-file-handler/constants.js +64 -48
- package/helper-apps/cortex-file-handler/docHelper.js +7 -114
- package/helper-apps/cortex-file-handler/fileChunker.js +96 -51
- package/helper-apps/cortex-file-handler/function.json +2 -6
- package/helper-apps/cortex-file-handler/helper.js +34 -25
- package/helper-apps/cortex-file-handler/index.js +324 -136
- package/helper-apps/cortex-file-handler/localFileHandler.js +56 -57
- package/helper-apps/cortex-file-handler/package-lock.json +6065 -5964
- package/helper-apps/cortex-file-handler/package.json +8 -4
- package/helper-apps/cortex-file-handler/redis.js +23 -17
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +12 -9
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +21 -18
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +1 -1
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +1 -1
- package/helper-apps/cortex-file-handler/services/ConversionService.js +288 -0
- package/helper-apps/cortex-file-handler/services/FileConversionService.js +53 -0
- package/helper-apps/cortex-file-handler/start.js +63 -38
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +144 -0
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +88 -64
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +114 -91
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +351 -0
- package/helper-apps/cortex-file-handler/tests/files/DOCX_TestPage.docx +0 -0
- package/helper-apps/cortex-file-handler/tests/files/tests-example.xls +0 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +943 -642
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +31 -0
- package/helper-apps/cortex-markitdown/.funcignore +1 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/__init__.py +64 -0
- package/helper-apps/cortex-markitdown/MarkitdownConverterFunction/function.json +21 -0
- package/helper-apps/cortex-markitdown/README.md +94 -0
- package/helper-apps/cortex-markitdown/host.json +15 -0
- package/helper-apps/cortex-markitdown/requirements.txt +2 -0
- package/lib/requestExecutor.js +44 -36
- package/package.json +1 -1
- package/pathways/system/entity/tools/sys_tool_cognitive_search.js +1 -1
- package/pathways/system/entity/tools/sys_tool_readfile.js +24 -2
- package/server/plugins/openAiWhisperPlugin.js +59 -87
- package/helper-apps/cortex-file-handler/tests/docHelper.test.js +0 -148
|
@@ -1,63 +1,88 @@
|
|
|
1
1
|
import CortexFileHandler from "./index.js";
|
|
2
2
|
import express from "express";
|
|
3
|
-
import { fileURLToPath } from
|
|
4
|
-
import { dirname, join } from
|
|
5
|
-
import cors from
|
|
6
|
-
import { readFileSync } from
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
import { dirname, join } from "path";
|
|
5
|
+
import cors from "cors";
|
|
6
|
+
import { readFileSync } from "fs";
|
|
7
7
|
|
|
8
|
-
import { publicIpv4 } from
|
|
9
|
-
|
|
8
|
+
import { publicIpv4 } from "public-ip";
|
|
9
|
+
|
|
10
|
+
// When running under tests we want all generated URLs to resolve to the
|
|
11
|
+
// locally-running server, otherwise checks like HEAD requests inside the
|
|
12
|
+
// handler will fail (because the external IP is not reachable from inside
|
|
13
|
+
// the test runner). Use the machine's public IP in normal operation, but
|
|
14
|
+
// fall back to "localhost" when the environment variable NODE_ENV indicates
|
|
15
|
+
// a test run.
|
|
16
|
+
|
|
17
|
+
let ipAddress = "localhost";
|
|
18
|
+
if (process.env.NODE_ENV !== "test") {
|
|
19
|
+
try {
|
|
20
|
+
ipAddress = await publicIpv4();
|
|
21
|
+
} catch (err) {
|
|
22
|
+
// In rare cases querying the public IP can fail (e.g. no network when
|
|
23
|
+
// running offline). Keep the default of "localhost" in that case so we
|
|
24
|
+
// still generate valid URLs.
|
|
25
|
+
console.warn("Unable to determine public IPv4 address – defaulting to 'localhost'.", err);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
10
28
|
|
|
11
29
|
const app = express();
|
|
12
30
|
const port = process.env.PORT || 7071;
|
|
13
|
-
const publicFolder = join(dirname(fileURLToPath(import.meta.url)),
|
|
31
|
+
const publicFolder = join(dirname(fileURLToPath(import.meta.url)), "files");
|
|
14
32
|
|
|
15
33
|
// Get version from package.json
|
|
16
|
-
const packageJson = JSON.parse(
|
|
34
|
+
const packageJson = JSON.parse(
|
|
35
|
+
readFileSync(
|
|
36
|
+
join(dirname(fileURLToPath(import.meta.url)), "package.json"),
|
|
37
|
+
"utf8",
|
|
38
|
+
),
|
|
39
|
+
);
|
|
17
40
|
const version = packageJson.version;
|
|
18
41
|
|
|
19
42
|
app.use(cors());
|
|
20
43
|
// Serve static files from the public folder
|
|
21
|
-
app.use(
|
|
44
|
+
app.use("/files", express.static(publicFolder));
|
|
22
45
|
|
|
23
46
|
// Health check endpoint
|
|
24
|
-
app.get(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
47
|
+
app.get("/health", (req, res) => {
|
|
48
|
+
res.status(200).json({
|
|
49
|
+
status: "healthy",
|
|
50
|
+
version: version,
|
|
51
|
+
});
|
|
29
52
|
});
|
|
30
53
|
|
|
31
54
|
// New primary endpoint
|
|
32
|
-
app.all(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
55
|
+
app.all("/api/CortexFileHandler", async (req, res) => {
|
|
56
|
+
const context = { req, res, log: console.log };
|
|
57
|
+
try {
|
|
58
|
+
await CortexFileHandler(context, req);
|
|
59
|
+
context.log(context.res);
|
|
60
|
+
res.status(context.res.status || 200).send(context.res.body);
|
|
61
|
+
} catch (error) {
|
|
62
|
+
const status = error.status || 500;
|
|
63
|
+
const message = error.message || "Internal server error";
|
|
64
|
+
res.status(status).send(message);
|
|
65
|
+
}
|
|
43
66
|
});
|
|
44
67
|
|
|
45
68
|
// Legacy endpoint for compatibility
|
|
46
|
-
app.all(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
69
|
+
app.all("/api/MediaFileChunker", async (req, res) => {
|
|
70
|
+
const context = { req, res, log: console.log };
|
|
71
|
+
try {
|
|
72
|
+
await CortexFileHandler(context, req);
|
|
73
|
+
context.log(context.res);
|
|
74
|
+
res.status(context.res.status || 200).send(context.res.body);
|
|
75
|
+
} catch (error) {
|
|
76
|
+
const status = error.status || 500;
|
|
77
|
+
const message = error.message || "Internal server error";
|
|
78
|
+
res.status(status).send(message);
|
|
79
|
+
}
|
|
57
80
|
});
|
|
58
81
|
|
|
59
82
|
app.listen(port, () => {
|
|
60
|
-
|
|
83
|
+
console.log(
|
|
84
|
+
`Cortex File Handler v${version} running on port ${port} (includes legacy MediaFileChunker endpoint)`,
|
|
85
|
+
);
|
|
61
86
|
});
|
|
62
87
|
|
|
63
|
-
export { port, publicFolder, ipAddress };
|
|
88
|
+
export { port, publicFolder, ipAddress };
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import { dirname, join } from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import test from 'ava';
|
|
5
|
+
import axios from 'axios';
|
|
6
|
+
import XLSX from 'xlsx';
|
|
7
|
+
import { FileConversionService } from '../services/FileConversionService.js';
|
|
8
|
+
|
|
9
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
|
|
11
|
+
// Mock context
|
|
12
|
+
const mockContext = {
|
|
13
|
+
log: console.log
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
// Setup: Create test documents
|
|
17
|
+
test.before(async (t) => {
|
|
18
|
+
const testDir = join(__dirname, 'test-docs');
|
|
19
|
+
await fs.mkdir(testDir, { recursive: true });
|
|
20
|
+
|
|
21
|
+
// Create various test files
|
|
22
|
+
const textFile = join(testDir, 'test.txt');
|
|
23
|
+
const largeTextFile = join(testDir, 'large.txt');
|
|
24
|
+
const unicodeFile = join(testDir, 'unicode.txt');
|
|
25
|
+
const jsonFile = join(testDir, 'test.json');
|
|
26
|
+
const emptyFile = join(testDir, 'empty.txt');
|
|
27
|
+
const excelFile = join(testDir, 'test.xlsx');
|
|
28
|
+
|
|
29
|
+
// Regular text content
|
|
30
|
+
await fs.writeFile(
|
|
31
|
+
textFile,
|
|
32
|
+
'This is a test document content.\nIt has multiple lines.\nThird line here.',
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
// Large text content (>100KB)
|
|
36
|
+
const largeContent = 'Lorem ipsum '.repeat(10000);
|
|
37
|
+
await fs.writeFile(largeTextFile, largeContent);
|
|
38
|
+
|
|
39
|
+
// Unicode content
|
|
40
|
+
const unicodeContent =
|
|
41
|
+
'这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test';
|
|
42
|
+
await fs.writeFile(unicodeFile, unicodeContent);
|
|
43
|
+
|
|
44
|
+
// JSON content
|
|
45
|
+
await fs.writeFile(jsonFile, JSON.stringify({ test: 'content' }));
|
|
46
|
+
|
|
47
|
+
// Empty file
|
|
48
|
+
await fs.writeFile(emptyFile, '');
|
|
49
|
+
|
|
50
|
+
// Create a test Excel file
|
|
51
|
+
const workbook = XLSX.utils.book_new();
|
|
52
|
+
const ws1 = XLSX.utils.aoa_to_sheet([
|
|
53
|
+
['Header 1', 'Header 2'],
|
|
54
|
+
['Data 1', 'Data 2'],
|
|
55
|
+
['Data 3', 'Data 4']
|
|
56
|
+
]);
|
|
57
|
+
XLSX.utils.book_append_sheet(workbook, ws1, 'Sheet1');
|
|
58
|
+
XLSX.writeFile(workbook, excelFile);
|
|
59
|
+
|
|
60
|
+
t.context = {
|
|
61
|
+
testDir,
|
|
62
|
+
textFile,
|
|
63
|
+
largeTextFile,
|
|
64
|
+
unicodeFile,
|
|
65
|
+
jsonFile,
|
|
66
|
+
emptyFile,
|
|
67
|
+
excelFile
|
|
68
|
+
};
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Cleanup
|
|
72
|
+
test.after.always(async (t) => {
|
|
73
|
+
await fs.rm(t.context.testDir, { recursive: true, force: true });
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
// Test Excel to CSV conversion
|
|
77
|
+
test('converts Excel to CSV successfully', async (t) => {
|
|
78
|
+
const service = new FileConversionService(mockContext);
|
|
79
|
+
const result = await service.convertFile(t.context.excelFile);
|
|
80
|
+
|
|
81
|
+
t.true(result.converted);
|
|
82
|
+
t.true(result.convertedPath.endsWith('.csv'));
|
|
83
|
+
|
|
84
|
+
// Read the converted file and verify content
|
|
85
|
+
const content = await fs.readFile(result.convertedPath, 'utf-8');
|
|
86
|
+
t.true(content.includes('Header 1,Header 2'));
|
|
87
|
+
t.true(content.includes('Data 1,Data 2'));
|
|
88
|
+
t.true(content.includes('Data 3,Data 4'));
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// Test document conversion with MarkItDown API
|
|
92
|
+
test('converts document to markdown via MarkItDown API', async (t) => {
|
|
93
|
+
// Mock axios.get for MarkItDown API
|
|
94
|
+
const originalAxiosGet = axios.get;
|
|
95
|
+
axios.get = async (url) => {
|
|
96
|
+
if (url.includes('test.docx')) {
|
|
97
|
+
return {
|
|
98
|
+
data: {
|
|
99
|
+
markdown: '# Test Document\n\nThis is a test document converted to markdown.'
|
|
100
|
+
}
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
throw new Error('Invalid URL');
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const service = new FileConversionService(mockContext);
|
|
107
|
+
const result = await service.convertFile('test.docx', 'https://example.com/test.docx');
|
|
108
|
+
|
|
109
|
+
t.true(result.converted);
|
|
110
|
+
t.true(result.convertedPath.endsWith('.md'));
|
|
111
|
+
|
|
112
|
+
// Read the converted file and verify content
|
|
113
|
+
const content = await fs.readFile(result.convertedPath, 'utf-8');
|
|
114
|
+
t.true(content.includes('# Test Document'));
|
|
115
|
+
t.true(content.includes('This is a test document converted to markdown'));
|
|
116
|
+
|
|
117
|
+
// Restore original axios.get
|
|
118
|
+
axios.get = originalAxiosGet;
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Test error handling for missing original URL
|
|
122
|
+
test('handles missing original URL for document conversion', async (t) => {
|
|
123
|
+
const service = new FileConversionService(mockContext);
|
|
124
|
+
await t.throwsAsync(
|
|
125
|
+
async () => service.convertFile('test.docx'),
|
|
126
|
+
{ message: 'Original URL is required for document conversion' }
|
|
127
|
+
);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// Test error handling for unsupported file types
|
|
131
|
+
test('handles unsupported file types', async (t) => {
|
|
132
|
+
const service = new FileConversionService(mockContext);
|
|
133
|
+
const result = await service.convertFile(t.context.jsonFile);
|
|
134
|
+
t.false(result.converted);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// Test file extension detection
|
|
138
|
+
test('correctly detects file extensions', (t) => {
|
|
139
|
+
const service = new FileConversionService(mockContext);
|
|
140
|
+
t.true(service.needsConversion('test.docx'));
|
|
141
|
+
t.true(service.needsConversion('test.xlsx'));
|
|
142
|
+
t.false(service.needsConversion('test.txt'));
|
|
143
|
+
t.false(service.needsConversion('test.json'));
|
|
144
|
+
});
|
|
@@ -1,19 +1,30 @@
|
|
|
1
|
-
import test from 'ava';
|
|
2
1
|
import fs from 'fs';
|
|
3
2
|
import path from 'path';
|
|
4
3
|
import { fileURLToPath } from 'url';
|
|
5
|
-
|
|
4
|
+
|
|
5
|
+
import test from 'ava';
|
|
6
6
|
import axios from 'axios';
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
uploadBlob,
|
|
10
|
+
ensureGCSUpload,
|
|
11
|
+
gcsUrlExists,
|
|
12
|
+
deleteGCS,
|
|
13
|
+
getBlobClient,
|
|
14
|
+
} from '../blobHandler.js';
|
|
15
|
+
import { urlExists } from '../helper.js';
|
|
7
16
|
import CortexFileHandler from '../index.js';
|
|
8
17
|
import { setFileStoreMap } from '../redis.js';
|
|
9
|
-
import { urlExists } from '../helper.js';
|
|
10
18
|
|
|
11
19
|
const __filename = fileURLToPath(import.meta.url);
|
|
12
20
|
const __dirname = path.dirname(__filename);
|
|
13
21
|
|
|
14
22
|
// Helper function to determine if GCS is configured
|
|
15
23
|
function isGCSConfigured() {
|
|
16
|
-
return
|
|
24
|
+
return (
|
|
25
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY_BASE64 ||
|
|
26
|
+
process.env.GCP_SERVICE_ACCOUNT_KEY
|
|
27
|
+
);
|
|
17
28
|
}
|
|
18
29
|
|
|
19
30
|
// Helper function to check file size in GCS
|
|
@@ -26,7 +37,7 @@ async function getGCSFileSize(gcsUrl) {
|
|
|
26
37
|
if (process.env.STORAGE_EMULATOR_HOST) {
|
|
27
38
|
const response = await axios.get(
|
|
28
39
|
`${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${bucket}/o/${encodeURIComponent(filename)}`,
|
|
29
|
-
{ validateStatus: status => status === 200 || status === 404 }
|
|
40
|
+
{ validateStatus: (status) => status === 200 || status === 404 },
|
|
30
41
|
);
|
|
31
42
|
if (response.status === 200) {
|
|
32
43
|
return parseInt(response.data.size);
|
|
@@ -50,7 +61,7 @@ async function getHttpFileSize(url) {
|
|
|
50
61
|
}
|
|
51
62
|
}
|
|
52
63
|
|
|
53
|
-
test('test GCS backup during initial upload', async t => {
|
|
64
|
+
test('test GCS backup during initial upload', async (t) => {
|
|
54
65
|
if (!isGCSConfigured()) {
|
|
55
66
|
t.pass('Skipping test - GCS not configured');
|
|
56
67
|
return;
|
|
@@ -60,32 +71,32 @@ test('test GCS backup during initial upload', async t => {
|
|
|
60
71
|
const testContent = 'Hello World!'.repeat(1000); // Create a decent sized file
|
|
61
72
|
const testFile = path.join(__dirname, 'test.txt');
|
|
62
73
|
fs.writeFileSync(testFile, testContent);
|
|
63
|
-
|
|
74
|
+
|
|
64
75
|
try {
|
|
65
|
-
|
|
76
|
+
// Upload the file - should go to both Azure/local and GCS
|
|
66
77
|
const context = { log: console.log };
|
|
67
78
|
const result = await uploadBlob(context, null, false, testFile);
|
|
68
|
-
|
|
79
|
+
|
|
69
80
|
// Verify we got both URLs
|
|
70
81
|
t.truthy(result.url, 'Should have primary storage URL');
|
|
71
82
|
t.truthy(result.gcs, 'Should have GCS backup URL');
|
|
72
|
-
|
|
83
|
+
|
|
73
84
|
// Verify GCS file exists
|
|
74
85
|
const gcsExists = await gcsUrlExists(result.gcs);
|
|
75
86
|
t.true(gcsExists, 'File should exist in GCS');
|
|
76
|
-
|
|
87
|
+
|
|
77
88
|
// Verify file content size in GCS
|
|
78
89
|
const gcsSize = await getGCSFileSize(result.gcs);
|
|
79
90
|
t.is(gcsSize, testContent.length, 'GCS file size should match original');
|
|
80
91
|
} finally {
|
|
81
|
-
|
|
92
|
+
// Cleanup
|
|
82
93
|
if (fs.existsSync(testFile)) {
|
|
83
94
|
fs.unlinkSync(testFile);
|
|
84
95
|
}
|
|
85
96
|
}
|
|
86
97
|
});
|
|
87
98
|
|
|
88
|
-
test('test GCS backup restoration when missing', async t => {
|
|
99
|
+
test('test GCS backup restoration when missing', async (t) => {
|
|
89
100
|
if (!isGCSConfigured()) {
|
|
90
101
|
t.pass('Skipping test - GCS not configured');
|
|
91
102
|
return;
|
|
@@ -95,48 +106,52 @@ test('test GCS backup restoration when missing', async t => {
|
|
|
95
106
|
const testContent = 'Hello World!'.repeat(1000); // Create a decent sized file
|
|
96
107
|
const testFile = path.join(__dirname, 'test.txt');
|
|
97
108
|
fs.writeFileSync(testFile, testContent);
|
|
98
|
-
|
|
109
|
+
|
|
99
110
|
try {
|
|
100
|
-
|
|
111
|
+
// First upload normally
|
|
101
112
|
const context = { log: console.log };
|
|
102
113
|
const result = await uploadBlob(context, null, false, testFile);
|
|
103
|
-
|
|
114
|
+
|
|
104
115
|
// Verify initial upload worked
|
|
105
116
|
t.truthy(result.gcs, 'Should have GCS backup URL after initial upload');
|
|
106
|
-
|
|
117
|
+
|
|
107
118
|
// Delete the GCS file
|
|
108
119
|
const gcsFileName = result.gcs.replace('gs://cortextempfiles/', '');
|
|
109
120
|
await deleteGCS(gcsFileName);
|
|
110
|
-
|
|
121
|
+
|
|
111
122
|
// Verify file is gone
|
|
112
123
|
const existsAfterDelete = await gcsUrlExists(result.gcs);
|
|
113
124
|
t.false(existsAfterDelete, 'File should not exist in GCS after deletion');
|
|
114
|
-
|
|
125
|
+
|
|
115
126
|
// Remove GCS URL to simulate missing backup
|
|
116
|
-
const { gcs: _, ...fileInfo } = result;
|
|
117
|
-
|
|
127
|
+
const { gcs: _, ...fileInfo } = result;
|
|
128
|
+
|
|
118
129
|
// Try to ensure GCS backup
|
|
119
130
|
const updatedResult = await ensureGCSUpload(context, fileInfo);
|
|
120
|
-
|
|
131
|
+
|
|
121
132
|
// Verify GCS URL was added
|
|
122
133
|
t.truthy(updatedResult.gcs, 'Should have GCS backup URL after ensure');
|
|
123
|
-
|
|
134
|
+
|
|
124
135
|
// Verify GCS file exists
|
|
125
136
|
const gcsExists = await gcsUrlExists(updatedResult.gcs);
|
|
126
137
|
t.true(gcsExists, 'File should exist in GCS after ensure');
|
|
127
|
-
|
|
138
|
+
|
|
128
139
|
// Verify file content size in GCS
|
|
129
140
|
const gcsSize = await getGCSFileSize(updatedResult.gcs);
|
|
130
|
-
t.is(
|
|
141
|
+
t.is(
|
|
142
|
+
gcsSize,
|
|
143
|
+
testContent.length,
|
|
144
|
+
'GCS file size should match original after ensure',
|
|
145
|
+
);
|
|
131
146
|
} finally {
|
|
132
|
-
|
|
147
|
+
// Cleanup
|
|
133
148
|
if (fs.existsSync(testFile)) {
|
|
134
149
|
fs.unlinkSync(testFile);
|
|
135
150
|
}
|
|
136
151
|
}
|
|
137
152
|
});
|
|
138
153
|
|
|
139
|
-
test('test primary storage restoration from GCS backup', async t => {
|
|
154
|
+
test('test primary storage restoration from GCS backup', async (t) => {
|
|
140
155
|
if (!isGCSConfigured()) {
|
|
141
156
|
t.pass('Skipping test - GCS not configured');
|
|
142
157
|
return;
|
|
@@ -146,46 +161,46 @@ test('test primary storage restoration from GCS backup', async t => {
|
|
|
146
161
|
const testContent = 'Hello World!'.repeat(1000);
|
|
147
162
|
const testFile = path.join(__dirname, 'test.txt');
|
|
148
163
|
fs.writeFileSync(testFile, testContent);
|
|
149
|
-
|
|
164
|
+
|
|
150
165
|
try {
|
|
151
|
-
|
|
166
|
+
// First upload normally
|
|
152
167
|
const context = { log: console.log };
|
|
153
168
|
const initialResult = await uploadBlob(context, null, false, testFile);
|
|
154
|
-
|
|
169
|
+
|
|
155
170
|
// Verify initial upload worked
|
|
156
171
|
t.truthy(initialResult.url, 'Should have primary storage URL');
|
|
157
172
|
t.truthy(initialResult.gcs, 'Should have GCS backup URL');
|
|
158
|
-
|
|
173
|
+
|
|
159
174
|
// Store the hash and simulate a missing primary file by requesting with a bad URL
|
|
160
175
|
const hash = 'test_primary_restore';
|
|
161
176
|
const modifiedResult = {
|
|
162
177
|
...initialResult,
|
|
163
|
-
url: initialResult.url.replace('.
|
|
178
|
+
url: initialResult.url.replace('test.txt', 'invalid.txt'),
|
|
164
179
|
};
|
|
165
|
-
|
|
180
|
+
|
|
166
181
|
// Set up Redis state with the bad URL
|
|
167
182
|
await setFileStoreMap(hash, modifiedResult);
|
|
168
|
-
|
|
183
|
+
|
|
169
184
|
// Set up request for the handler
|
|
170
185
|
const mockReq = {
|
|
171
186
|
method: 'GET',
|
|
172
|
-
body: { params: { hash, checkHash: true } }
|
|
187
|
+
body: { params: { hash, checkHash: true } },
|
|
173
188
|
};
|
|
174
|
-
|
|
189
|
+
|
|
175
190
|
// Set up context for the handler
|
|
176
|
-
const handlerContext = {
|
|
191
|
+
const handlerContext = {
|
|
177
192
|
log: console.log,
|
|
178
|
-
res: null
|
|
193
|
+
res: null,
|
|
179
194
|
};
|
|
180
|
-
|
|
195
|
+
|
|
181
196
|
// Call the handler which should restore from GCS
|
|
182
197
|
await CortexFileHandler(handlerContext, mockReq);
|
|
183
|
-
|
|
198
|
+
|
|
184
199
|
// Verify we got a valid response
|
|
185
200
|
t.is(handlerContext.res.status, 200, 'Should get successful response');
|
|
186
201
|
t.truthy(handlerContext.res.body.url, 'Should have restored primary URL');
|
|
187
202
|
t.truthy(handlerContext.res.body.gcs, 'Should still have GCS URL');
|
|
188
|
-
|
|
203
|
+
|
|
189
204
|
// Verify the restored URL is accessible
|
|
190
205
|
const { valid } = await urlExists(handlerContext.res.body.url);
|
|
191
206
|
t.true(valid, 'Restored URL should be accessible');
|
|
@@ -193,18 +208,21 @@ test('test primary storage restoration from GCS backup', async t => {
|
|
|
193
208
|
// Verify file sizes match in both storages
|
|
194
209
|
const gcsSize = await getGCSFileSize(handlerContext.res.body.gcs);
|
|
195
210
|
const azureSize = await getHttpFileSize(handlerContext.res.body.url);
|
|
196
|
-
t.is(
|
|
211
|
+
t.is(
|
|
212
|
+
azureSize,
|
|
213
|
+
testContent.length,
|
|
214
|
+
'Azure file size should match original',
|
|
215
|
+
);
|
|
197
216
|
t.is(gcsSize, azureSize, 'Azure and GCS file sizes should match');
|
|
198
|
-
|
|
199
217
|
} finally {
|
|
200
|
-
|
|
218
|
+
// Cleanup
|
|
201
219
|
if (fs.existsSync(testFile)) {
|
|
202
220
|
fs.unlinkSync(testFile);
|
|
203
221
|
}
|
|
204
222
|
}
|
|
205
223
|
});
|
|
206
224
|
|
|
207
|
-
test('test hash check returns 404 when both storages are empty', async t => {
|
|
225
|
+
test('test hash check returns 404 when both storages are empty', async (t) => {
|
|
208
226
|
if (!isGCSConfigured()) {
|
|
209
227
|
t.pass('Skipping test - GCS not configured');
|
|
210
228
|
return;
|
|
@@ -214,20 +232,20 @@ test('test hash check returns 404 when both storages are empty', async t => {
|
|
|
214
232
|
const testContent = 'Hello World!'.repeat(1000);
|
|
215
233
|
const testFile = path.join(__dirname, 'test.txt');
|
|
216
234
|
fs.writeFileSync(testFile, testContent);
|
|
217
|
-
|
|
235
|
+
|
|
218
236
|
try {
|
|
219
|
-
|
|
237
|
+
// First upload normally
|
|
220
238
|
const context = { log: console.log };
|
|
221
239
|
const initialResult = await uploadBlob(context, null, false, testFile);
|
|
222
|
-
|
|
240
|
+
|
|
223
241
|
// Verify initial upload worked
|
|
224
242
|
t.truthy(initialResult.url, 'Should have primary storage URL');
|
|
225
243
|
t.truthy(initialResult.gcs, 'Should have GCS backup URL');
|
|
226
|
-
|
|
244
|
+
|
|
227
245
|
// Store the hash
|
|
228
246
|
const hash = 'test_both_missing';
|
|
229
247
|
await setFileStoreMap(hash, initialResult);
|
|
230
|
-
|
|
248
|
+
|
|
231
249
|
// Verify both files exist initially
|
|
232
250
|
const initialPrimaryCheck = await urlExists(initialResult.url);
|
|
233
251
|
const initialGcsCheck = await gcsUrlExists(initialResult.gcs);
|
|
@@ -244,7 +262,7 @@ test('test hash check returns 404 when both storages are empty', async t => {
|
|
|
244
262
|
const pathParts = fullPath.split('/');
|
|
245
263
|
const blobName = pathParts[pathParts.length - 1];
|
|
246
264
|
console.log('Attempting to delete Azure blob:', blobName);
|
|
247
|
-
|
|
265
|
+
|
|
248
266
|
// Delete the blob using the correct container name
|
|
249
267
|
const { containerClient } = await getBlobClient();
|
|
250
268
|
const blockBlobClient = containerClient.getBlockBlobClient(blobName);
|
|
@@ -252,14 +270,14 @@ test('test hash check returns 404 when both storages are empty', async t => {
|
|
|
252
270
|
console.log('Azure deletion completed');
|
|
253
271
|
|
|
254
272
|
// Add a small delay to ensure deletion is complete
|
|
255
|
-
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
273
|
+
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
256
274
|
|
|
257
275
|
// Delete from GCS
|
|
258
276
|
const gcsFileName = initialResult.gcs.replace('gs://cortextempfiles/', '');
|
|
259
277
|
console.log('Attempting to delete GCS file:', gcsFileName);
|
|
260
278
|
await deleteGCS(gcsFileName);
|
|
261
279
|
console.log('GCS deletion completed');
|
|
262
|
-
|
|
280
|
+
|
|
263
281
|
// Verify both files are gone
|
|
264
282
|
const primaryExists = await urlExists(initialResult.url);
|
|
265
283
|
console.log('Primary exists after deletion:', primaryExists.valid);
|
|
@@ -267,26 +285,32 @@ test('test hash check returns 404 when both storages are empty', async t => {
|
|
|
267
285
|
console.log('GCS exists after deletion:', gcsExists);
|
|
268
286
|
t.false(primaryExists.valid, 'Primary file should be deleted');
|
|
269
287
|
t.false(gcsExists, 'GCS file should be deleted');
|
|
270
|
-
|
|
288
|
+
|
|
271
289
|
// Try to get the file via hash - should fail
|
|
272
|
-
const handlerContext = {
|
|
290
|
+
const handlerContext = {
|
|
273
291
|
log: console.log,
|
|
274
|
-
res: null
|
|
292
|
+
res: null,
|
|
275
293
|
};
|
|
276
|
-
|
|
294
|
+
|
|
277
295
|
await CortexFileHandler(handlerContext, {
|
|
278
296
|
method: 'GET',
|
|
279
|
-
body: { params: { hash, checkHash: true } }
|
|
297
|
+
body: { params: { hash, checkHash: true } },
|
|
280
298
|
});
|
|
281
|
-
|
|
299
|
+
|
|
282
300
|
// Verify we got a 404 response
|
|
283
|
-
t.is(
|
|
284
|
-
|
|
285
|
-
|
|
301
|
+
t.is(
|
|
302
|
+
handlerContext.res.status,
|
|
303
|
+
404,
|
|
304
|
+
'Should get 404 when both files are missing',
|
|
305
|
+
);
|
|
306
|
+
t.true(
|
|
307
|
+
handlerContext.res.body.includes('not found in storage'),
|
|
308
|
+
'Should indicate files are missing in storage',
|
|
309
|
+
);
|
|
286
310
|
} finally {
|
|
287
|
-
|
|
311
|
+
// Cleanup
|
|
288
312
|
if (fs.existsSync(testFile)) {
|
|
289
313
|
fs.unlinkSync(testFile);
|
|
290
314
|
}
|
|
291
315
|
}
|
|
292
|
-
});
|
|
316
|
+
});
|