@aj-archipelago/cortex 1.3.58 → 1.3.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
  2. package/helper-apps/cortex-file-handler/package-lock.json +2 -2
  3. package/helper-apps/cortex-file-handler/package.json +1 -1
  4. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
  5. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
  6. package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
  7. package/helper-apps/cortex-file-handler/src/constants.js +98 -98
  8. package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
  9. package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
  10. package/helper-apps/cortex-file-handler/src/helper.js +93 -93
  11. package/helper-apps/cortex-file-handler/src/index.js +584 -550
  12. package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
  13. package/helper-apps/cortex-file-handler/src/redis.js +186 -90
  14. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
  15. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
  16. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
  17. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
  18. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
  19. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
  20. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
  21. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
  22. package/helper-apps/cortex-file-handler/src/start.js +4 -1
  23. package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
  24. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
  25. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
  26. package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
  27. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
  28. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
  29. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
  30. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
  31. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
  32. package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
  33. package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
  34. package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
  35. package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
  36. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
  37. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
  38. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
  39. package/package.json +1 -1
  40. package/pathways/translate_subtitle.js +15 -8
@@ -1,18 +1,20 @@
1
1
  # Cortex File Handler Interface Documentation
2
2
 
3
3
  ## Overview
4
+
4
5
  The Cortex File Handler is a service that processes files through various operations including uploading, downloading, chunking, and document processing. It supports multiple storage backends (Azure Blob Storage, Google Cloud Storage, and Local File System).
5
6
 
6
7
  ## Request Methods
7
8
 
8
9
  ### POST
10
+
9
11
  - **Purpose**: Upload a file
10
12
  - **Content-Type**: `multipart/form-data`
11
13
  - **Parameters**:
12
14
  - `hash` (optional): Unique identifier for the file
13
15
  - `requestId` (required): Unique identifier for the request
14
16
  - File content must be included in the form data
15
- - **Behavior**:
17
+ - **Behavior**:
16
18
  - Uploads file to primary storage (Azure or Local)
17
19
  - If GCS is configured, also uploads to GCS
18
20
  - If hash is provided, stores file metadata in Redis
@@ -26,6 +28,7 @@ The Cortex File Handler is a service that processes files through various operat
26
28
  - **Note**: The `save` parameter is not supported in POST requests. To convert and save a document as text, use GET with the `save` parameter.
27
29
 
28
30
  ### GET
31
+
29
32
  - **Purpose**: Process or retrieve files
30
33
  - **Parameters** (can be in query string or request body):
31
34
  - `uri` (required if not using fetch/load/restore): URL of the file to process
@@ -48,13 +51,13 @@ The Cortex File Handler is a service that processes files through various operat
48
51
  - Truncates long filenames
49
52
  - **Behavior**:
50
53
  - For documents (PDF, DOC, etc.):
51
- - If `save=true`:
54
+ - If `save=true`:
52
55
  - Converts document to text
53
56
  - Saves text file to primary storage (Azure or Local)
54
57
  - Deletes original document from storage
55
58
  - Does not save to GCS
56
59
  - Returns object with primary storage URL
57
- - If `save=false`:
60
+ - If `save=false`:
58
61
  - Converts document to text
59
62
  - Returns array of text chunks
60
63
  - Does not persist any files
@@ -72,6 +75,7 @@ The Cortex File Handler is a service that processes files through various operat
72
75
  - Ensures correct file extension based on content type
73
76
 
74
77
  ### DELETE
78
+
75
79
  - **Purpose**: Remove files from storage
76
80
  - **Parameters** (can be in query string or request body):
77
81
  - `requestId` (required): Unique identifier for the request
@@ -82,12 +86,14 @@ The Cortex File Handler is a service that processes files through various operat
82
86
  - **Response**: Array of deleted file URLs
83
87
 
84
88
  ## Storage Configuration
89
+
85
90
  - **Azure**: Enabled if `AZURE_STORAGE_CONNECTION_STRING` is set
86
91
  - **GCS**: Enabled if `GCP_SERVICE_ACCOUNT_KEY_BASE64` or `GCP_SERVICE_ACCOUNT_KEY` is set
87
92
  - **Local**: Used as fallback if Azure is not configured
88
93
 
89
94
  ## Response Format
90
- - **Success**:
95
+
96
+ - **Success**:
91
97
  - Status: 200
92
98
  - Body: Varies by operation (see specific methods above)
93
99
  - **Error**:
@@ -95,6 +101,7 @@ The Cortex File Handler is a service that processes files through various operat
95
101
  - Body: Error message string
96
102
 
97
103
  ## Progress Tracking
104
+
98
105
  - Progress updates are published to Redis for each operation
99
106
  - Progress includes:
100
107
  - `progress`: Completion percentage (0-1)
@@ -105,8 +112,9 @@ The Cortex File Handler is a service that processes files through various operat
105
112
  - Progress updates are published to Redis channel associated with `requestId`
106
113
 
107
114
  ## File Types
115
+
108
116
  - **Documents**: Processed based on `DOC_EXTENSIONS` list
109
- - Supported extensions:
117
+ - Supported extensions:
110
118
  - Text: .txt, .json, .csv, .md, .xml, .js, .html, .css
111
119
  - Office: .doc, .docx, .xls, .xlsx
112
120
  - Document processing limitations:
@@ -135,6 +143,7 @@ The Cortex File Handler is a service that processes files through various operat
135
143
  - Truncates filenames longer than 200 characters
136
144
 
137
145
  ## Storage Behavior
146
+
138
147
  - **Primary Storage** (Azure or Local):
139
148
  - Files are stored with UUID-based names
140
149
  - Organized by requestId folders
@@ -151,6 +160,7 @@ The Cortex File Handler is a service that processes files through various operat
151
160
  - Used for progress tracking
152
161
 
153
162
  ## Cleanup
163
+
154
164
  - Automatic cleanup of inactive files
155
165
  - Removes files from:
156
166
  - Primary storage (Azure/Local)
@@ -163,16 +173,17 @@ The Cortex File Handler is a service that processes files through various operat
163
173
  - On error conditions
164
174
 
165
175
  ## Error Handling
166
- - **400 Bad Request**:
176
+
177
+ - **400 Bad Request**:
167
178
  - Missing required parameters
168
179
  - Invalid or inaccessible URL
169
180
  - Unsupported file type
170
- - **404 Not Found**:
181
+ - **404 Not Found**:
171
182
  - File or hash not found
172
183
  - File not found in storage
173
- - **500 Internal Server Error**:
184
+ - **500 Internal Server Error**:
174
185
  - Processing errors
175
186
  - Storage errors
176
187
  - Document conversion errors
177
188
  - PDF processing errors (scanned, encrypted, password-protected)
178
- - All errors include descriptive message in response body
189
+ - All errors include descriptive message in response body
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "1.0.18",
3
+ "version": "2.6.1",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "@aj-archipelago/cortex-file-handler",
9
- "version": "1.0.18",
9
+ "version": "2.6.1",
10
10
  "dependencies": {
11
11
  "@azure/storage-blob": "^12.13.0",
12
12
  "@distube/ytdl-core": "^4.14.3",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "2.0.02",
3
+ "version": "2.6.1",
4
4
  "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -1,25 +1,25 @@
1
- import { BlobServiceClient } from '@azure/storage-blob';
1
+ import { BlobServiceClient } from "@azure/storage-blob";
2
2
 
3
3
  async function createContainer() {
4
- try {
5
- const blobServiceClient = BlobServiceClient.fromConnectionString(
6
- 'UseDevelopmentStorage=true',
7
- );
8
- const containerClient =
9
- blobServiceClient.getContainerClient('test-container');
4
+ try {
5
+ const blobServiceClient = BlobServiceClient.fromConnectionString(
6
+ "UseDevelopmentStorage=true",
7
+ );
8
+ const containerClient =
9
+ blobServiceClient.getContainerClient("test-container");
10
10
 
11
- console.log('Creating container...');
12
- await containerClient.create();
13
- console.log('Container created successfully');
14
- } catch (error) {
11
+ console.log("Creating container...");
12
+ await containerClient.create();
13
+ console.log("Container created successfully");
14
+ } catch (error) {
15
15
  // Ignore if container already exists
16
- if (error.statusCode === 409) {
17
- console.log('Container already exists');
18
- } else {
19
- console.error('Error creating container:', error);
20
- process.exit(1);
21
- }
16
+ if (error.statusCode === 409) {
17
+ console.log("Container already exists");
18
+ } else {
19
+ console.error("Error creating container:", error);
20
+ process.exit(1);
22
21
  }
22
+ }
23
23
  }
24
24
 
25
25
  createContainer();
@@ -1,52 +1,52 @@
1
- import { BlobServiceClient } from '@azure/storage-blob';
2
- import { Storage } from '@google-cloud/storage';
1
+ import { BlobServiceClient } from "@azure/storage-blob";
2
+ import { Storage } from "@google-cloud/storage";
3
3
 
4
4
  async function createAzureContainer() {
5
- try {
6
- const blobServiceClient = BlobServiceClient.fromConnectionString(
7
- 'UseDevelopmentStorage=true',
8
- );
9
- const containerClient =
10
- blobServiceClient.getContainerClient('test-container');
5
+ try {
6
+ const blobServiceClient = BlobServiceClient.fromConnectionString(
7
+ "UseDevelopmentStorage=true",
8
+ );
9
+ const containerClient =
10
+ blobServiceClient.getContainerClient("test-container");
11
11
 
12
- console.log('Creating Azure container...');
13
- await containerClient.create();
14
- console.log('Azure container created successfully');
15
- } catch (error) {
12
+ console.log("Creating Azure container...");
13
+ await containerClient.create();
14
+ console.log("Azure container created successfully");
15
+ } catch (error) {
16
16
  // Ignore if container already exists
17
- if (error.statusCode === 409) {
18
- console.log('Azure container already exists');
19
- } else {
20
- console.error('Error creating Azure container:', error);
21
- process.exit(1);
22
- }
17
+ if (error.statusCode === 409) {
18
+ console.log("Azure container already exists");
19
+ } else {
20
+ console.error("Error creating Azure container:", error);
21
+ process.exit(1);
23
22
  }
23
+ }
24
24
  }
25
25
 
26
26
  async function createGCSBucket() {
27
- try {
28
- const storage = new Storage({
29
- projectId: 'test-project',
30
- apiEndpoint: 'http://localhost:4443',
31
- });
27
+ try {
28
+ const storage = new Storage({
29
+ projectId: "test-project",
30
+ apiEndpoint: "http://localhost:4443",
31
+ });
32
32
 
33
- console.log('Creating GCS bucket...');
34
- await storage.createBucket('cortextempfiles');
35
- console.log('GCS bucket created successfully');
36
- } catch (error) {
33
+ console.log("Creating GCS bucket...");
34
+ await storage.createBucket("cortextempfiles");
35
+ console.log("GCS bucket created successfully");
36
+ } catch (error) {
37
37
  // Ignore if bucket already exists
38
- if (error.code === 409) {
39
- console.log('GCS bucket already exists');
40
- } else {
41
- console.error('Error creating GCS bucket:', error);
42
- process.exit(1);
43
- }
38
+ if (error.code === 409) {
39
+ console.log("GCS bucket already exists");
40
+ } else {
41
+ console.error("Error creating GCS bucket:", error);
42
+ process.exit(1);
44
43
  }
44
+ }
45
45
  }
46
46
 
47
47
  async function setup() {
48
- await createAzureContainer();
49
- await createGCSBucket();
48
+ await createAzureContainer();
49
+ await createGCSBucket();
50
50
  }
51
51
 
52
52
  setup();