@aj-archipelago/cortex 1.3.57 → 1.3.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/config.js +22 -0
- package/helper-apps/cortex-file-handler/INTERFACE.md +20 -9
- package/helper-apps/cortex-file-handler/package-lock.json +2 -2
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +17 -17
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +35 -35
- package/helper-apps/cortex-file-handler/src/blobHandler.js +1010 -909
- package/helper-apps/cortex-file-handler/src/constants.js +98 -98
- package/helper-apps/cortex-file-handler/src/docHelper.js +27 -27
- package/helper-apps/cortex-file-handler/src/fileChunker.js +224 -214
- package/helper-apps/cortex-file-handler/src/helper.js +93 -93
- package/helper-apps/cortex-file-handler/src/index.js +584 -550
- package/helper-apps/cortex-file-handler/src/localFileHandler.js +86 -86
- package/helper-apps/cortex-file-handler/src/redis.js +186 -90
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +301 -273
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +55 -55
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +174 -154
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +239 -223
- package/helper-apps/cortex-file-handler/src/services/storage/LocalStorageProvider.js +161 -159
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +73 -71
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +46 -45
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +256 -213
- package/helper-apps/cortex-file-handler/src/start.js +4 -1
- package/helper-apps/cortex-file-handler/src/utils/filenameUtils.js +59 -25
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +119 -116
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +257 -257
- package/helper-apps/cortex-file-handler/tests/cleanup.test.js +676 -0
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +124 -124
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +249 -208
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +439 -380
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +299 -263
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +265 -239
- package/helper-apps/cortex-file-handler/tests/start.test.js +1230 -1201
- package/helper-apps/cortex-file-handler/tests/storage/AzureStorageProvider.test.js +110 -105
- package/helper-apps/cortex-file-handler/tests/storage/GCSStorageProvider.test.js +201 -175
- package/helper-apps/cortex-file-handler/tests/storage/LocalStorageProvider.test.js +128 -125
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +78 -73
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +99 -99
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -70
- package/package.json +1 -1
- package/pathways/translate_apptek.js +33 -0
- package/pathways/translate_subtitle.js +15 -8
- package/server/plugins/apptekTranslatePlugin.js +46 -91
- package/tests/apptekTranslatePlugin.test.js +0 -2
- package/tests/integration/apptekTranslatePlugin.integration.test.js +159 -93
- package/tests/translate_apptek.test.js +16 -0
package/README.md
CHANGED
|
@@ -561,6 +561,12 @@ Each model configuration can include:
|
|
|
561
561
|
}
|
|
562
562
|
```
|
|
563
563
|
|
|
564
|
+
**Rate Limiting**: The `requestsPerSecond` parameter controls the rate limiting for each model endpoint. If not specified, Cortex defaults to **100 requests per second** per endpoint. This rate limiting is implemented using the Bottleneck library with a token bucket algorithm that includes:
|
|
565
|
+
- Minimum time between requests (`minTime`)
|
|
566
|
+
- Maximum concurrent requests (`maxConcurrent`)
|
|
567
|
+
- Token reservoir that refreshes every second
|
|
568
|
+
- Optional Redis clustering support when `storageConnectionString` is configured
|
|
569
|
+
|
|
564
570
|
### API Compatibility
|
|
565
571
|
|
|
566
572
|
Cortex provides OpenAI-compatible REST endpoints that allow you to use various models through a standardized interface. When `enableRestEndpoints` is set to `true`, Cortex exposes the following endpoints:
|
package/config.js
CHANGED
|
@@ -435,6 +435,17 @@ var config = convict({
|
|
|
435
435
|
"maxReturnTokens": 4096,
|
|
436
436
|
"supportsStreaming": true
|
|
437
437
|
},
|
|
438
|
+
"apptek-translate": {
|
|
439
|
+
"type": "APPTEK-TRANSLATE",
|
|
440
|
+
"url": "{{APPTEK_API_ENDPOINT}}",
|
|
441
|
+
"headers": {
|
|
442
|
+
"x-token": "{{APPTEK_API_KEY}}",
|
|
443
|
+
"Accept": "application/json",
|
|
444
|
+
"Content-Type": "text/plain"
|
|
445
|
+
},
|
|
446
|
+
"requestsPerSecond": 10,
|
|
447
|
+
"maxTokenLength": 128000
|
|
448
|
+
},
|
|
438
449
|
},
|
|
439
450
|
env: 'CORTEX_MODELS'
|
|
440
451
|
},
|
|
@@ -533,6 +544,17 @@ var config = convict({
|
|
|
533
544
|
format: String,
|
|
534
545
|
default: null,
|
|
535
546
|
env: 'JINA_API_KEY'
|
|
547
|
+
},
|
|
548
|
+
apptekApiKey: {
|
|
549
|
+
format: String,
|
|
550
|
+
default: null,
|
|
551
|
+
env: 'APPTEK_API_KEY',
|
|
552
|
+
sensitive: true
|
|
553
|
+
},
|
|
554
|
+
apptekApiEndpoint: {
|
|
555
|
+
format: String,
|
|
556
|
+
default: null,
|
|
557
|
+
env: 'APPTEK_API_ENDPOINT'
|
|
536
558
|
}
|
|
537
559
|
});
|
|
538
560
|
|
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
# Cortex File Handler Interface Documentation
|
|
2
2
|
|
|
3
3
|
## Overview
|
|
4
|
+
|
|
4
5
|
The Cortex File Handler is a service that processes files through various operations including uploading, downloading, chunking, and document processing. It supports multiple storage backends (Azure Blob Storage, Google Cloud Storage, and Local File System).
|
|
5
6
|
|
|
6
7
|
## Request Methods
|
|
7
8
|
|
|
8
9
|
### POST
|
|
10
|
+
|
|
9
11
|
- **Purpose**: Upload a file
|
|
10
12
|
- **Content-Type**: `multipart/form-data`
|
|
11
13
|
- **Parameters**:
|
|
12
14
|
- `hash` (optional): Unique identifier for the file
|
|
13
15
|
- `requestId` (required): Unique identifier for the request
|
|
14
16
|
- File content must be included in the form data
|
|
15
|
-
- **Behavior**:
|
|
17
|
+
- **Behavior**:
|
|
16
18
|
- Uploads file to primary storage (Azure or Local)
|
|
17
19
|
- If GCS is configured, also uploads to GCS
|
|
18
20
|
- If hash is provided, stores file metadata in Redis
|
|
@@ -26,6 +28,7 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
26
28
|
- **Note**: The `save` parameter is not supported in POST requests. To convert and save a document as text, use GET with the `save` parameter.
|
|
27
29
|
|
|
28
30
|
### GET
|
|
31
|
+
|
|
29
32
|
- **Purpose**: Process or retrieve files
|
|
30
33
|
- **Parameters** (can be in query string or request body):
|
|
31
34
|
- `uri` (required if not using fetch/load/restore): URL of the file to process
|
|
@@ -48,13 +51,13 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
48
51
|
- Truncates long filenames
|
|
49
52
|
- **Behavior**:
|
|
50
53
|
- For documents (PDF, DOC, etc.):
|
|
51
|
-
- If `save=true`:
|
|
54
|
+
- If `save=true`:
|
|
52
55
|
- Converts document to text
|
|
53
56
|
- Saves text file to primary storage (Azure or Local)
|
|
54
57
|
- Deletes original document from storage
|
|
55
58
|
- Does not save to GCS
|
|
56
59
|
- Returns object with primary storage URL
|
|
57
|
-
- If `save=false`:
|
|
60
|
+
- If `save=false`:
|
|
58
61
|
- Converts document to text
|
|
59
62
|
- Returns array of text chunks
|
|
60
63
|
- Does not persist any files
|
|
@@ -72,6 +75,7 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
72
75
|
- Ensures correct file extension based on content type
|
|
73
76
|
|
|
74
77
|
### DELETE
|
|
78
|
+
|
|
75
79
|
- **Purpose**: Remove files from storage
|
|
76
80
|
- **Parameters** (can be in query string or request body):
|
|
77
81
|
- `requestId` (required): Unique identifier for the request
|
|
@@ -82,12 +86,14 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
82
86
|
- **Response**: Array of deleted file URLs
|
|
83
87
|
|
|
84
88
|
## Storage Configuration
|
|
89
|
+
|
|
85
90
|
- **Azure**: Enabled if `AZURE_STORAGE_CONNECTION_STRING` is set
|
|
86
91
|
- **GCS**: Enabled if `GCP_SERVICE_ACCOUNT_KEY_BASE64` or `GCP_SERVICE_ACCOUNT_KEY` is set
|
|
87
92
|
- **Local**: Used as fallback if Azure is not configured
|
|
88
93
|
|
|
89
94
|
## Response Format
|
|
90
|
-
|
|
95
|
+
|
|
96
|
+
- **Success**:
|
|
91
97
|
- Status: 200
|
|
92
98
|
- Body: Varies by operation (see specific methods above)
|
|
93
99
|
- **Error**:
|
|
@@ -95,6 +101,7 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
95
101
|
- Body: Error message string
|
|
96
102
|
|
|
97
103
|
## Progress Tracking
|
|
104
|
+
|
|
98
105
|
- Progress updates are published to Redis for each operation
|
|
99
106
|
- Progress includes:
|
|
100
107
|
- `progress`: Completion percentage (0-1)
|
|
@@ -105,8 +112,9 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
105
112
|
- Progress updates are published to Redis channel associated with `requestId`
|
|
106
113
|
|
|
107
114
|
## File Types
|
|
115
|
+
|
|
108
116
|
- **Documents**: Processed based on `DOC_EXTENSIONS` list
|
|
109
|
-
- Supported extensions:
|
|
117
|
+
- Supported extensions:
|
|
110
118
|
- Text: .txt, .json, .csv, .md, .xml, .js, .html, .css
|
|
111
119
|
- Office: .doc, .docx, .xls, .xlsx
|
|
112
120
|
- Document processing limitations:
|
|
@@ -135,6 +143,7 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
135
143
|
- Truncates filenames longer than 200 characters
|
|
136
144
|
|
|
137
145
|
## Storage Behavior
|
|
146
|
+
|
|
138
147
|
- **Primary Storage** (Azure or Local):
|
|
139
148
|
- Files are stored with UUID-based names
|
|
140
149
|
- Organized by requestId folders
|
|
@@ -151,6 +160,7 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
151
160
|
- Used for progress tracking
|
|
152
161
|
|
|
153
162
|
## Cleanup
|
|
163
|
+
|
|
154
164
|
- Automatic cleanup of inactive files
|
|
155
165
|
- Removes files from:
|
|
156
166
|
- Primary storage (Azure/Local)
|
|
@@ -163,16 +173,17 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
163
173
|
- On error conditions
|
|
164
174
|
|
|
165
175
|
## Error Handling
|
|
166
|
-
|
|
176
|
+
|
|
177
|
+
- **400 Bad Request**:
|
|
167
178
|
- Missing required parameters
|
|
168
179
|
- Invalid or inaccessible URL
|
|
169
180
|
- Unsupported file type
|
|
170
|
-
- **404 Not Found**:
|
|
181
|
+
- **404 Not Found**:
|
|
171
182
|
- File or hash not found
|
|
172
183
|
- File not found in storage
|
|
173
|
-
- **500 Internal Server Error**:
|
|
184
|
+
- **500 Internal Server Error**:
|
|
174
185
|
- Processing errors
|
|
175
186
|
- Storage errors
|
|
176
187
|
- Document conversion errors
|
|
177
188
|
- PDF processing errors (scanned, encrypted, password-protected)
|
|
178
|
-
- All errors include descriptive message in response body
|
|
189
|
+
- All errors include descriptive message in response body
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex-file-handler",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.6.1",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "@aj-archipelago/cortex-file-handler",
|
|
9
|
-
"version": "
|
|
9
|
+
"version": "2.6.1",
|
|
10
10
|
"dependencies": {
|
|
11
11
|
"@azure/storage-blob": "^12.13.0",
|
|
12
12
|
"@distube/ytdl-core": "^4.14.3",
|
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import { BlobServiceClient } from
|
|
1
|
+
import { BlobServiceClient } from "@azure/storage-blob";
|
|
2
2
|
|
|
3
3
|
async function createContainer() {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
blobServiceClient.getContainerClient(
|
|
4
|
+
try {
|
|
5
|
+
const blobServiceClient = BlobServiceClient.fromConnectionString(
|
|
6
|
+
"UseDevelopmentStorage=true",
|
|
7
|
+
);
|
|
8
|
+
const containerClient =
|
|
9
|
+
blobServiceClient.getContainerClient("test-container");
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
console.log("Creating container...");
|
|
12
|
+
await containerClient.create();
|
|
13
|
+
console.log("Container created successfully");
|
|
14
|
+
} catch (error) {
|
|
15
15
|
// Ignore if container already exists
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
16
|
+
if (error.statusCode === 409) {
|
|
17
|
+
console.log("Container already exists");
|
|
18
|
+
} else {
|
|
19
|
+
console.error("Error creating container:", error);
|
|
20
|
+
process.exit(1);
|
|
22
21
|
}
|
|
22
|
+
}
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
createContainer();
|
|
@@ -1,52 +1,52 @@
|
|
|
1
|
-
import { BlobServiceClient } from
|
|
2
|
-
import { Storage } from
|
|
1
|
+
import { BlobServiceClient } from "@azure/storage-blob";
|
|
2
|
+
import { Storage } from "@google-cloud/storage";
|
|
3
3
|
|
|
4
4
|
async function createAzureContainer() {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
blobServiceClient.getContainerClient(
|
|
5
|
+
try {
|
|
6
|
+
const blobServiceClient = BlobServiceClient.fromConnectionString(
|
|
7
|
+
"UseDevelopmentStorage=true",
|
|
8
|
+
);
|
|
9
|
+
const containerClient =
|
|
10
|
+
blobServiceClient.getContainerClient("test-container");
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
console.log("Creating Azure container...");
|
|
13
|
+
await containerClient.create();
|
|
14
|
+
console.log("Azure container created successfully");
|
|
15
|
+
} catch (error) {
|
|
16
16
|
// Ignore if container already exists
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
}
|
|
17
|
+
if (error.statusCode === 409) {
|
|
18
|
+
console.log("Azure container already exists");
|
|
19
|
+
} else {
|
|
20
|
+
console.error("Error creating Azure container:", error);
|
|
21
|
+
process.exit(1);
|
|
23
22
|
}
|
|
23
|
+
}
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
async function createGCSBucket() {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
27
|
+
try {
|
|
28
|
+
const storage = new Storage({
|
|
29
|
+
projectId: "test-project",
|
|
30
|
+
apiEndpoint: "http://localhost:4443",
|
|
31
|
+
});
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
33
|
+
console.log("Creating GCS bucket...");
|
|
34
|
+
await storage.createBucket("cortextempfiles");
|
|
35
|
+
console.log("GCS bucket created successfully");
|
|
36
|
+
} catch (error) {
|
|
37
37
|
// Ignore if bucket already exists
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
}
|
|
38
|
+
if (error.code === 409) {
|
|
39
|
+
console.log("GCS bucket already exists");
|
|
40
|
+
} else {
|
|
41
|
+
console.error("Error creating GCS bucket:", error);
|
|
42
|
+
process.exit(1);
|
|
44
43
|
}
|
|
44
|
+
}
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
async function setup() {
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
await createAzureContainer();
|
|
49
|
+
await createGCSBucket();
|
|
50
50
|
}
|
|
51
51
|
|
|
52
52
|
setup();
|