@aj-archipelago/cortex 1.3.57 → 1.3.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +6 -0
  2. package/config.js +22 -0
  3. package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
  4. package/helper-apps/cortex-file-handler/package-lock.json +2 -2
  5. package/helper-apps/cortex-file-handler/package.json +1 -1
  6. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
  7. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
  8. package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
  9. package/helper-apps/cortex-file-handler/src/constants.js +98 -98
  10. package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
  11. package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
  12. package/helper-apps/cortex-file-handler/src/helper.js +93 -93
  13. package/helper-apps/cortex-file-handler/src/index.js +584 -550
  14. package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
  15. package/helper-apps/cortex-file-handler/src/redis.js +186 -90
  16. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
  17. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
  18. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
  19. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
  20. package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
  21. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
  22. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
  23. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
  24. package/helper-apps/cortex-file-handler/src/start.js +4 -1
  25. package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
  26. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
  27. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
  28. package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
  29. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
  30. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
  31. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
  32. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
  33. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
  34. package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
  35. package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
  36. package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
  37. package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
  38. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
  39. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
  40. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
  41. package/package.json +1 -1
  42. package/pathways/translate_apptek.js +33 -0
  43. package/pathways/translate_subtitle.js +15 -8
  44. package/server/plugins/apptekTranslatePlugin.js +46 -91
  45. package/tests/apptekTranslatePlugin.test.js +0 -2
  46. package/tests/integration/apptekTranslatePlugin.integration.test.js +159 -93
  47. package/tests/translate_apptek.test.js +16 -0
package/README.md CHANGED
@@ -561,6 +561,12 @@ Each model configuration can include:
561
561
  }
562
562
  ```
563
563
 
564
+ **Rate Limiting**: The `requestsPerSecond` parameter controls the rate limiting for each model endpoint. If not specified, Cortex defaults to **100 requests per second** per endpoint. This rate limiting is implemented using the Bottleneck library with a token bucket algorithm that includes:
565
+ - Minimum time between requests (`minTime`)
566
+ - Maximum concurrent requests (`maxConcurrent`)
567
+ - Token reservoir that refreshes every second
568
+ - Optional Redis clustering support when `storageConnectionString` is configured
569
+
564
570
  ### API Compatibility
565
571
 
566
572
  Cortex provides OpenAI-compatible REST endpoints that allow you to use various models through a standardized interface. When `enableRestEndpoints` is set to `true`, Cortex exposes the following endpoints:
package/config.js CHANGED
@@ -435,6 +435,17 @@ var config = convict({
435
435
  "maxReturnTokens": 4096,
436
436
  "supportsStreaming": true
437
437
  },
438
+ "apptek-translate": {
439
+ "type": "APPTEK-TRANSLATE",
440
+ "url": "{{APPTEK_API_ENDPOINT}}",
441
+ "headers": {
442
+ "x-token": "{{APPTEK_API_KEY}}",
443
+ "Accept": "application/json",
444
+ "Content-Type": "text/plain"
445
+ },
446
+ "requestsPerSecond": 10,
447
+ "maxTokenLength": 128000
448
+ },
438
449
  },
439
450
  env: 'CORTEX_MODELS'
440
451
  },
@@ -533,6 +544,17 @@ var config = convict({
533
544
  format: String,
534
545
  default: null,
535
546
  env: 'JINA_API_KEY'
547
+ },
548
+ apptekApiKey: {
549
+ format: String,
550
+ default: null,
551
+ env: 'APPTEK_API_KEY',
552
+ sensitive: true
553
+ },
554
+ apptekApiEndpoint: {
555
+ format: String,
556
+ default: null,
557
+ env: 'APPTEK_API_ENDPOINT'
536
558
  }
537
559
  });
538
560
 
@@ -1,18 +1,20 @@
1
1
  # Cortex File Handler Interface Documentation
2
2
 
3
3
  ## Overview
4
+
4
5
  The Cortex File Handler is a service that processes files through various operations including uploading, downloading, chunking, and document processing. It supports multiple storage backends (Azure Blob Storage, Google Cloud Storage, and Local File System).
5
6
 
6
7
  ## Request Methods
7
8
 
8
9
  ### POST
10
+
9
11
  - **Purpose**: Upload a file
10
12
  - **Content-Type**: `multipart/form-data`
11
13
  - **Parameters**:
12
14
  - `hash` (optional): Unique identifier for the file
13
15
  - `requestId` (required): Unique identifier for the request
14
16
  - File content must be included in the form data
15
- - **Behavior**:
17
+ - **Behavior**:
16
18
  - Uploads file to primary storage (Azure or Local)
17
19
  - If GCS is configured, also uploads to GCS
18
20
  - If hash is provided, stores file metadata in Redis
@@ -26,6 +28,7 @@ The Cortex File Handler is a service that processes files through various operat
26
28
  - **Note**: The `save` parameter is not supported in POST requests. To convert and save a document as text, use GET with the `save` parameter.
27
29
 
28
30
  ### GET
31
+
29
32
  - **Purpose**: Process or retrieve files
30
33
  - **Parameters** (can be in query string or request body):
31
34
  - `uri` (required if not using fetch/load/restore): URL of the file to process
@@ -48,13 +51,13 @@ The Cortex File Handler is a service that processes files through various operat
48
51
  - Truncates long filenames
49
52
  - **Behavior**:
50
53
  - For documents (PDF, DOC, etc.):
51
- - If `save=true`:
54
+ - If `save=true`:
52
55
  - Converts document to text
53
56
  - Saves text file to primary storage (Azure or Local)
54
57
  - Deletes original document from storage
55
58
  - Does not save to GCS
56
59
  - Returns object with primary storage URL
57
- - If `save=false`:
60
+ - If `save=false`:
58
61
  - Converts document to text
59
62
  - Returns array of text chunks
60
63
  - Does not persist any files
@@ -72,6 +75,7 @@ The Cortex File Handler is a service that processes files through various operat
72
75
  - Ensures correct file extension based on content type
73
76
 
74
77
  ### DELETE
78
+
75
79
  - **Purpose**: Remove files from storage
76
80
  - **Parameters** (can be in query string or request body):
77
81
  - `requestId` (required): Unique identifier for the request
@@ -82,12 +86,14 @@ The Cortex File Handler is a service that processes files through various operat
82
86
  - **Response**: Array of deleted file URLs
83
87
 
84
88
  ## Storage Configuration
89
+
85
90
  - **Azure**: Enabled if `AZURE_STORAGE_CONNECTION_STRING` is set
86
91
  - **GCS**: Enabled if `GCP_SERVICE_ACCOUNT_KEY_BASE64` or `GCP_SERVICE_ACCOUNT_KEY` is set
87
92
  - **Local**: Used as fallback if Azure is not configured
88
93
 
89
94
  ## Response Format
90
- - **Success**:
95
+
96
+ - **Success**:
91
97
  - Status: 200
92
98
  - Body: Varies by operation (see specific methods above)
93
99
  - **Error**:
@@ -95,6 +101,7 @@ The Cortex File Handler is a service that processes files through various operat
95
101
  - Body: Error message string
96
102
 
97
103
  ## Progress Tracking
104
+
98
105
  - Progress updates are published to Redis for each operation
99
106
  - Progress includes:
100
107
  - `progress`: Completion percentage (0-1)
@@ -105,8 +112,9 @@ The Cortex File Handler is a service that processes files through various operat
105
112
  - Progress updates are published to Redis channel associated with `requestId`
106
113
 
107
114
  ## File Types
115
+
108
116
  - **Documents**: Processed based on `DOC_EXTENSIONS` list
109
- - Supported extensions:
117
+ - Supported extensions:
110
118
  - Text: .txt, .json, .csv, .md, .xml, .js, .html, .css
111
119
  - Office: .doc, .docx, .xls, .xlsx
112
120
  - Document processing limitations:
@@ -135,6 +143,7 @@ The Cortex File Handler is a service that processes files through various operat
135
143
  - Truncates filenames longer than 200 characters
136
144
 
137
145
  ## Storage Behavior
146
+
138
147
  - **Primary Storage** (Azure or Local):
139
148
  - Files are stored with UUID-based names
140
149
  - Organized by requestId folders
@@ -151,6 +160,7 @@ The Cortex File Handler is a service that processes files through various operat
151
160
  - Used for progress tracking
152
161
 
153
162
  ## Cleanup
163
+
154
164
  - Automatic cleanup of inactive files
155
165
  - Removes files from:
156
166
  - Primary storage (Azure/Local)
@@ -163,16 +173,17 @@ The Cortex File Handler is a service that processes files through various operat
163
173
  - On error conditions
164
174
 
165
175
  ## Error Handling
166
- - **400 Bad Request**:
176
+
177
+ - **400 Bad Request**:
167
178
  - Missing required parameters
168
179
  - Invalid or inaccessible URL
169
180
  - Unsupported file type
170
- - **404 Not Found**:
181
+ - **404 Not Found**:
171
182
  - File or hash not found
172
183
  - File not found in storage
173
- - **500 Internal Server Error**:
184
+ - **500 Internal Server Error**:
174
185
  - Processing errors
175
186
  - Storage errors
176
187
  - Document conversion errors
177
188
  - PDF processing errors (scanned, encrypted, password-protected)
178
- - All errors include descriptive message in response body
189
+ - All errors include descriptive message in response body
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "1.0.18",
3
+ "version": "2.6.1",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "@aj-archipelago/cortex-file-handler",
9
- "version": "1.0.18",
9
+ "version": "2.6.1",
10
10
  "dependencies": {
11
11
  "@azure/storage-blob": "^12.13.0",
12
12
  "@distube/ytdl-core": "^4.14.3",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex-file-handler",
3
- "version": "2.0.02",
3
+ "version": "2.6.1",
4
4
  "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -1,25 +1,25 @@
1
- import { BlobServiceClient } from '@azure/storage-blob';
1
+ import { BlobServiceClient } from "@azure/storage-blob";
2
2
 
3
3
  async function createContainer() {
4
- try {
5
- const blobServiceClient = BlobServiceClient.fromConnectionString(
6
- 'UseDevelopmentStorage=true',
7
- );
8
- const containerClient =
9
- blobServiceClient.getContainerClient('test-container');
4
+ try {
5
+ const blobServiceClient = BlobServiceClient.fromConnectionString(
6
+ "UseDevelopmentStorage=true",
7
+ );
8
+ const containerClient =
9
+ blobServiceClient.getContainerClient("test-container");
10
10
 
11
- console.log('Creating container...');
12
- await containerClient.create();
13
- console.log('Container created successfully');
14
- } catch (error) {
11
+ console.log("Creating container...");
12
+ await containerClient.create();
13
+ console.log("Container created successfully");
14
+ } catch (error) {
15
15
  // Ignore if container already exists
16
- if (error.statusCode === 409) {
17
- console.log('Container already exists');
18
- } else {
19
- console.error('Error creating container:', error);
20
- process.exit(1);
21
- }
16
+ if (error.statusCode === 409) {
17
+ console.log("Container already exists");
18
+ } else {
19
+ console.error("Error creating container:", error);
20
+ process.exit(1);
22
21
  }
22
+ }
23
23
  }
24
24
 
25
25
  createContainer();
@@ -1,52 +1,52 @@
1
- import { BlobServiceClient } from '@azure/storage-blob';
2
- import { Storage } from '@google-cloud/storage';
1
+ import { BlobServiceClient } from "@azure/storage-blob";
2
+ import { Storage } from "@google-cloud/storage";
3
3
 
4
4
  async function createAzureContainer() {
5
- try {
6
- const blobServiceClient = BlobServiceClient.fromConnectionString(
7
- 'UseDevelopmentStorage=true',
8
- );
9
- const containerClient =
10
- blobServiceClient.getContainerClient('test-container');
5
+ try {
6
+ const blobServiceClient = BlobServiceClient.fromConnectionString(
7
+ "UseDevelopmentStorage=true",
8
+ );
9
+ const containerClient =
10
+ blobServiceClient.getContainerClient("test-container");
11
11
 
12
- console.log('Creating Azure container...');
13
- await containerClient.create();
14
- console.log('Azure container created successfully');
15
- } catch (error) {
12
+ console.log("Creating Azure container...");
13
+ await containerClient.create();
14
+ console.log("Azure container created successfully");
15
+ } catch (error) {
16
16
  // Ignore if container already exists
17
- if (error.statusCode === 409) {
18
- console.log('Azure container already exists');
19
- } else {
20
- console.error('Error creating Azure container:', error);
21
- process.exit(1);
22
- }
17
+ if (error.statusCode === 409) {
18
+ console.log("Azure container already exists");
19
+ } else {
20
+ console.error("Error creating Azure container:", error);
21
+ process.exit(1);
23
22
  }
23
+ }
24
24
  }
25
25
 
26
26
  async function createGCSBucket() {
27
- try {
28
- const storage = new Storage({
29
- projectId: 'test-project',
30
- apiEndpoint: 'http://localhost:4443',
31
- });
27
+ try {
28
+ const storage = new Storage({
29
+ projectId: "test-project",
30
+ apiEndpoint: "http://localhost:4443",
31
+ });
32
32
 
33
- console.log('Creating GCS bucket...');
34
- await storage.createBucket('cortextempfiles');
35
- console.log('GCS bucket created successfully');
36
- } catch (error) {
33
+ console.log("Creating GCS bucket...");
34
+ await storage.createBucket("cortextempfiles");
35
+ console.log("GCS bucket created successfully");
36
+ } catch (error) {
37
37
  // Ignore if bucket already exists
38
- if (error.code === 409) {
39
- console.log('GCS bucket already exists');
40
- } else {
41
- console.error('Error creating GCS bucket:', error);
42
- process.exit(1);
43
- }
38
+ if (error.code === 409) {
39
+ console.log("GCS bucket already exists");
40
+ } else {
41
+ console.error("Error creating GCS bucket:", error);
42
+ process.exit(1);
44
43
  }
44
+ }
45
45
  }
46
46
 
47
47
  async function setup() {
48
- await createAzureContainer();
49
- await createGCSBucket();
48
+ await createAzureContainer();
49
+ await createGCSBucket();
50
50
  }
51
51
 
52
52
  setup();