@aj-archipelago/cortex 1.3.14 → 1.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -131,6 +131,16 @@ Cortex pathway prompt enhancements include:
131
131
  ### Pathway Development
132
132
  To add a new pathway to Cortex, you create a new JavaScript file and define the prompts, properties, and functions that implement the desired functionality. Cortex provides defaults for almost everything, so in the simplest case a pathway can really just consist of a string prompt like the spelling example above. You can then save this file in the `pathways` directory in your Cortex project and it will be picked up and made available as a GraphQL query.
133
133
 
134
+ ### Specifying a Model
135
+ When determining which model to use for a pathway, Cortex follows this order of precedence:
136
+
137
+ 1. `pathway.model` - The model specified directly in the pathway definition
138
+ 2. `args.model` - The model passed in the request arguments
139
+ 3. `pathway.inputParameters.model` - The model specified in the pathway's input parameters
140
+ 4. `config.get('defaultModelName')` - The default model specified in the configuration
141
+
142
+ The first valid model found in this order will be used. If none of these models are found in the configured endpoints, Cortex will log a warning and use the default model defined in the configuration.
143
+
134
144
  ### Prompt
135
145
  When you define a new pathway, you need to at least specify a prompt that will be passed to the model for processing. In the simplest case, a prompt is really just a string, but the prompt is polymorphic - it can be a string or an object that contains information for the model API that you wish to call. Prompts can also be an array of strings or an array of objects for sequential operations. In this way Cortex aims to support the most simple to advanced prompting scenarios.
136
146
 
@@ -577,7 +587,7 @@ The following properties can be configured through environment variables or the
577
587
  - `subscriptionKeepAlive`: Keep-alive time for subscriptions in seconds. Default is 0.
578
588
 
579
589
  API-specific configuration:
580
- - `azureVideoTranslationApiUrl`: URL for Azure video translation API. Default is 'http://127.0.0.1:5005'.
590
+ - `azureVideoTranslationApiKey`: API key for Azure video translation API. Default is null.
581
591
  - `dalleImageApiUrl`: URL for DALL-E image API. Default is 'null'.
582
592
  - `neuralSpaceApiKey`: API key for NeuralSpace services. Default is null.
583
593
  - `whisperMediaApiUrl`: URL for Whisper media API. Default is 'null'.
package/config.js CHANGED
@@ -271,18 +271,19 @@ var config = convict({
271
271
  },
272
272
  "azure-video-translate": {
273
273
  "type": "AZURE-VIDEO-TRANSLATE",
274
+ "url": "https://eastus.api.cognitive.microsoft.com/videotranslation",
274
275
  "headers": {
275
276
  "Content-Type": "application/json"
276
277
  },
277
- "supportsStreaming": true,
278
278
  }
279
279
  },
280
280
  env: 'CORTEX_MODELS'
281
281
  },
282
- azureVideoTranslationApiUrl: {
282
+ azureVideoTranslationApiKey: {
283
283
  format: String,
284
- default: 'http://127.0.0.1:5005',
285
- env: 'AZURE_VIDEO_TRANSLATION_API_URL'
284
+ default: null,
285
+ env: 'AZURE_VIDEO_TRANSLATION_API_KEY',
286
+ sensitive: true
286
287
  },
287
288
  openaiApiKey: {
288
289
  format: String,
@@ -68,6 +68,7 @@ export const ACCEPTED_MIME_TYPES = {
68
68
  'image/webp': ['.webp'],
69
69
  'image/heic': ['.heic'],
70
70
  'image/heif': ['.heif'],
71
+ 'application/octet-stream': ['.jpg', '.jpeg', '.png', '.webp', '.heic', '.heif'],
71
72
  'application/pdf': ['.pdf'],
72
73
 
73
74
  // Audio types
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.3.14",
3
+ "version": "1.3.15",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -1,26 +1,223 @@
1
1
  // AzureVideoTranslatePlugin.js
2
2
  import ModelPlugin from "./modelPlugin.js";
3
3
  import logger from "../../lib/logger.js";
4
- import axios from "axios";
5
4
  import { publishRequestProgress } from "../../lib/redisSubscription.js";
6
- import { config } from "../../config.js";
7
-
8
- function isValidJSON(str) {
9
- try {
10
- JSON.parse(str);
11
- return true;
12
- } catch (e) {
13
- return false;
14
- }
15
- }
5
+ import crypto from 'crypto';
6
+ import axios from 'axios';
7
+ import {config} from "../../config.js";
8
+
9
+ // turn off any caching because we're polling the operation status
10
+ axios.defaults.cache = false;
16
11
 
17
12
  class AzureVideoTranslatePlugin extends ModelPlugin {
13
+ static lastProcessingRate = null; // bytes per second
14
+
18
15
  constructor(pathway, model) {
19
16
  super(pathway, model);
20
- this.apiUrl = config.get("azureVideoTranslationApiUrl");
21
- this.eventSource = null;
22
- this.jsonBuffer = '';
23
- this.jsonDepth = 0;
17
+ this.subscriptionKey = config.get("azureVideoTranslationApiKey");
18
+ this.apiVersion = "2024-05-20-preview";
19
+ this.baseUrl = "";
20
+ this.startTime = null;
21
+ this.videoContentLength = null;
22
+ }
23
+
24
+ async verifyVideoAccess(videoUrl) {
25
+ try {
26
+ const response = await axios.head(videoUrl);
27
+
28
+ const contentType = response.headers['content-type'];
29
+ const contentLength = parseInt(response.headers['content-length'], 10);
30
+
31
+ if (contentType && !contentType.includes('video/mp4')) {
32
+ logger.warn(`Warning: Video might not be in MP4 format. Content-Type: ${contentType}`);
33
+ }
34
+
35
+ const TYPICAL_BITRATE = 2.5 * 1024 * 1024; // 2.5 Mbps
36
+ const durationSeconds = Math.round((contentLength * 8) / TYPICAL_BITRATE);
37
+
38
+ return {
39
+ isAccessible: true,
40
+ contentLength,
41
+ durationSeconds: durationSeconds || 60
42
+ };
43
+ } catch (error) {
44
+ throw new Error(`Failed to access video: ${error.message}`);
45
+ }
46
+ }
47
+
48
+ async createTranslation(params) {
49
+ const { videoUrl, sourceLanguage, targetLanguage, voiceKind, translationId } = params;
50
+
51
+ const translation = {
52
+ id: translationId,
53
+ displayName: `${translationId}.mp4`,
54
+ description: `Translate video from ${sourceLanguage} to ${targetLanguage}`,
55
+ input: {
56
+ sourceLocale: sourceLanguage,
57
+ targetLocale: targetLanguage,
58
+ voiceKind: voiceKind,
59
+ videoFileUrl: videoUrl
60
+ }
61
+ };
62
+
63
+ const url = `${this.baseUrl}/translations/${translationId}?api-version=${this.apiVersion}`;
64
+ logger.debug(`Creating translation: ${url}`);
65
+
66
+ try {
67
+ const response = await axios.put(url, translation, {
68
+ headers: {
69
+ 'Content-Type': 'application/json',
70
+ 'Ocp-Apim-Subscription-Key': this.subscriptionKey,
71
+ }
72
+ });
73
+
74
+ const operationUrl = response.headers['operation-location'];
75
+ return { translation: response.data, operationUrl };
76
+ } catch (error) {
77
+ const errorText = error.response?.data || error.message;
78
+ throw new Error(`Failed to create translation: ${error.message}\nDetails: ${errorText}`);
79
+ }
80
+ }
81
+
82
+ async getTranslationStatus(translationId) {
83
+ const url = `${this.baseUrl}/translations/${translationId}?api-version=${this.apiVersion}`;
84
+ try {
85
+ const response = await axios.get(url, {
86
+ headers: {
87
+ 'Ocp-Apim-Subscription-Key': this.subscriptionKey,
88
+ }
89
+ });
90
+ return response.data;
91
+ } catch (error) {
92
+ throw new Error(`Failed to get translation status: ${error.message}`);
93
+ }
94
+ }
95
+
96
+ async getIterationStatus(translationId, iterationId) {
97
+ const url = `${this.baseUrl}/translations/${translationId}/iterations/${iterationId}?api-version=${this.apiVersion}`;
98
+
99
+ try {
100
+ const response = await axios.get(url, {
101
+ headers: {
102
+ 'Ocp-Apim-Subscription-Key': this.subscriptionKey,
103
+ }
104
+ });
105
+ return response.data;
106
+ } catch (error) {
107
+ const errorText = error.response?.data || error.message;
108
+ throw new Error(`Failed to get iteration status: ${error.message}\nDetails: ${errorText}`);
109
+ }
110
+ }
111
+
112
+ async pollOperation(operationUrl) {
113
+ try {
114
+ const response = await axios.get(operationUrl, {
115
+ headers: {
116
+ 'Ocp-Apim-Subscription-Key': this.subscriptionKey,
117
+ }
118
+ });
119
+ return response.data;
120
+ } catch (error) {
121
+ const errorText = error.response?.data || error.message;
122
+ throw new Error(`Failed to poll operation: ${error.message}\nDetails: ${errorText}`);
123
+ }
124
+ }
125
+
126
+ async monitorOperation(operationUrlOrConfig, entityType = 'operation') {
127
+
128
+ let estimatedTotalTime = 0;
129
+ if (AzureVideoTranslatePlugin.lastProcessingRate && this.videoContentLength) {
130
+ estimatedTotalTime = this.videoContentLength / AzureVideoTranslatePlugin.lastProcessingRate;
131
+ } else {
132
+ // First run: estimate based on 1x calculated video duration
133
+ estimatedTotalTime = (this.videoContentLength * 8) / (2.5 * 1024 * 1024);
134
+ }
135
+
136
+ // eslint-disable-next-line no-constant-condition
137
+ while (true) {
138
+ let status;
139
+ if (typeof operationUrlOrConfig === 'string') {
140
+ const operation = await this.pollOperation(operationUrlOrConfig);
141
+ status = operation;
142
+ } else {
143
+ const { translationId, iterationId } = operationUrlOrConfig;
144
+ const iteration = await this.getIterationStatus(translationId, iterationId);
145
+ status = iteration;
146
+ }
147
+
148
+ logger.debug(`${entityType} status: ${JSON.stringify(status, null, 2)}`);
149
+
150
+ let progress = 0;
151
+ let estimatedProgress = 0;
152
+ let progressMessage = '';
153
+ switch (entityType) {
154
+ case 'translation':
155
+ progressMessage = 'Getting ready to translate video...';
156
+ break;
157
+ case 'iteration':
158
+ if (status.status === 'NotStarted') {
159
+ progressMessage = 'Waiting for translation to start...';
160
+ } else if (status.status === 'Running') {
161
+ progressMessage = 'Translating video...';
162
+ if (this.startTime) {
163
+ // Calculate progress based on elapsed time
164
+ const elapsedSeconds = (Date.now() - this.startTime) / 1000;
165
+ estimatedProgress = Math.min(0.95, elapsedSeconds / estimatedTotalTime);
166
+ const remainingSeconds = Math.max(0, estimatedTotalTime - elapsedSeconds);
167
+ if (remainingSeconds > 0) {
168
+ if (remainingSeconds < 60) {
169
+ const roundedSeconds = Math.ceil(remainingSeconds);
170
+ progressMessage = `Translating video... ${roundedSeconds} second${roundedSeconds !== 1 ? 's' : ''} remaining`;
171
+ } else {
172
+ const remainingMinutes = Math.ceil(remainingSeconds / 60);
173
+ progressMessage = `Translating video... ${remainingMinutes} minute${remainingMinutes !== 1 ? 's' : ''} remaining`;
174
+ }
175
+ }
176
+ progress = status.percentComplete ? status.percentComplete / 100 : estimatedProgress;
177
+ } else {
178
+ this.startTime = Date.now();
179
+ estimatedProgress = 0;
180
+ }
181
+ } else if (status.status === 'Succeeded') {
182
+ progressMessage = 'Video translation complete.';
183
+ } else if (status.status === 'Failed') {
184
+ progressMessage = 'Video translation failed.';
185
+ }
186
+ break;
187
+ }
188
+
189
+ // Publish progress updates
190
+ publishRequestProgress({
191
+ requestId: this.requestId,
192
+ progress,
193
+ info: progressMessage
194
+ });
195
+
196
+ if (status.status === 'Succeeded') {
197
+ return status;
198
+ } else if (status.status === 'Failed') {
199
+ throw new Error(`${entityType} failed: ${status.error?.message || 'Unknown error'}`);
200
+ }
201
+ await new Promise(resolve => setTimeout(resolve, 5000));
202
+ }
203
+ }
204
+
205
+ async getTranslationOutput(translationId, iterationId) {
206
+ const iteration = await this.getIterationStatus(translationId, iterationId);
207
+ const translation = await this.getTranslationStatus(translationId);
208
+ if (iteration.result) {
209
+ const targetLocale = translation.input.targetLocale;
210
+ return {
211
+ outputVideoSubtitleWebVttFileUrl: iteration.result.sourceLocaleSubtitleWebvttFileUrl,
212
+ targetLocales: {
213
+ [targetLocale]: {
214
+ outputVideoFileUrl: iteration.result.translatedVideoFileUrl,
215
+ outputVideoSubtitleWebVttFileUrl: iteration.result.targetLocaleSubtitleWebvttFileUrl
216
+ }
217
+ }
218
+ };
219
+ }
220
+ return null;
24
221
  }
25
222
 
26
223
  getRequestParameters(_, parameters, __) {
@@ -37,150 +234,88 @@ class AzureVideoTranslatePlugin extends ModelPlugin {
37
234
  );
38
235
  }
39
236
 
40
- handleStream(stream, onData, onEnd, onError) {
41
- const timeout = setTimeout(() => {
42
- onError(new Error('Stream timeout'));
43
- }, 300000); // timeout
44
-
45
- stream.on('data', (chunk) => {
46
- clearTimeout(timeout);
47
- const lines = chunk.toString().split('\n\n');
48
- lines.forEach(line => {
49
- if (line.startsWith('data: ')) {
50
- const eventData = line.slice(6);
51
- try {
52
- this.handleEvent({ data: eventData }, onData);
53
- } catch (error) {
54
- onError(error);
55
- }
56
- }
57
- });
58
- });
59
- stream.on('end', () => {
60
- clearTimeout(timeout);
61
- this.cleanup();
62
- onEnd();
63
- });
64
- stream.on('error', (error) => {
65
- clearTimeout(timeout);
66
- console.error('Stream error:', error);
67
- this.cleanup();
68
- onError(error);
69
- });
70
- }
71
-
72
- handleEvent(event, onData) {
73
- const data = event.data;
74
- this.jsonBuffer += data;
75
- this.jsonDepth += (data.match(/{/g) || []).length - (data.match(/}/g) || []).length;
76
-
77
- if (this.jsonDepth === 0 && this.jsonBuffer.trim()) {
78
- logger.debug(this.jsonBuffer);
79
- if (this.jsonBuffer.includes('Failed to run with exception')) {
80
- this.cleanup();
81
- throw new Error(this.jsonBuffer);
82
- }
83
-
84
- onData(this.jsonBuffer);
85
- this.jsonBuffer = '';
86
- this.jsonDepth = 0;
87
- }
88
- }
89
-
90
237
  async execute(text, parameters, prompt, cortexRequest) {
91
- if (!this.apiUrl) {
92
- throw new Error("API URL is not set");
238
+ if (!this.subscriptionKey) {
239
+ throw new Error("Azure Video Translation subscription key is not set");
93
240
  }
241
+
94
242
  this.requestId = cortexRequest.requestId;
243
+ this.baseUrl = cortexRequest.url;
244
+
95
245
  const requestParameters = this.getRequestParameters(text, parameters, prompt);
246
+
96
247
  try {
97
- const response = await axios.post(this.apiUrl, requestParameters, {
98
- responseType: 'stream',
99
- headers: {
100
- 'Cache-Control': 'no-cache',
101
- 'Pragma': 'no-cache',
102
- 'Expires': '0',
103
- }
248
+ const translationId = `cortex-translation-${this.requestId}`;
249
+ const videoUrl = requestParameters.sourcevideooraudiofilepath;
250
+ const sourceLanguage = requestParameters.sourcelocale;
251
+ const targetLanguage = requestParameters.targetlocale;
252
+ const voiceKind = requestParameters.voicekind || 'PlatformVoice';
253
+ const embedSubtitles = requestParameters.withoutsubtitleintranslatedvideofile === "false" ? true : false;
254
+ const speakerCount = parseInt(requestParameters.speakercount) || 0;
255
+
256
+ // Verify video access and get duration
257
+ const videoInfo = await this.verifyVideoAccess(videoUrl);
258
+ this.videoContentLength = videoInfo.contentLength;
259
+ logger.debug(`Video info: ${JSON.stringify(videoInfo, null, 2)}`);
260
+
261
+ // Create translation
262
+ const { operationUrl } = await this.createTranslation({
263
+ videoUrl, sourceLanguage, targetLanguage, voiceKind, translationId
104
264
  });
105
265
 
106
- return new Promise((resolve, reject) => {
107
- let finalJson = '';
108
- this.handleStream(response.data,
109
- (data) => {
110
- let sent = false;
111
- if (isValidJSON(data)) {
112
- const parsedData = JSON.parse(data);
113
- if (parsedData.progress !== undefined) {
114
- let timeInfo = '';
115
- if (parsedData.estimated_time_remaining && parsedData.elapsed_time) {
116
- const minutes = Math.ceil(parsedData.estimated_time_remaining / 60);
117
- timeInfo = minutes <= 2
118
- ? `Should be done soon (${parsedData.elapsed_time} elapsed)`
119
- : `Estimated ${minutes} minutes remaining`;
120
- }
266
+ logger.debug(`Starting translation monitoring with operation URL: ${operationUrl}`);
267
+ // Monitor translation creation
268
+ const operationStatus = await this.monitorOperation(operationUrl, 'translation');
269
+ logger.debug(`Translation operation completed with status: ${JSON.stringify(operationStatus, null, 2)}`);
270
+
271
+ const updatedTranslation = await this.getTranslationStatus(translationId);
272
+ logger.debug(`Translation status after operation: ${JSON.stringify(updatedTranslation, null, 2)}`);
121
273
 
122
- publishRequestProgress({
123
- requestId: this.requestId,
124
- progress: parsedData.progress,
125
- info: timeInfo
126
- });
127
- sent = true;
128
- }
129
- }
130
- if (!sent) {
131
- publishRequestProgress({
132
- requestId: this.requestId,
133
- info: data
134
- });
135
- }
136
- logger.debug('Data:', data);
137
-
138
- // Extract JSON content if message contains targetLocales
139
- const jsonMatch = data.match(/{[\s\S]*"targetLocales"[\s\S]*}/);
140
- if (jsonMatch) {
141
- const extractedJson = jsonMatch[0];
142
- if (isValidJSON(extractedJson)) {
143
- finalJson = extractedJson;
144
- }
145
- }
146
- },
147
- () => {
148
- resolve(finalJson)
149
- },
150
- (error) => reject(error)
151
- );
152
- }).finally(() => this.cleanup());
274
+ // Create iteration
275
+ const iteration = {
276
+ id: crypto.randomUUID(),
277
+ displayName: translationId,
278
+ input: {
279
+ subtitleMaxCharCountPerSegment: 42,
280
+ exportSubtitleInVideo: embedSubtitles,
281
+ ...(speakerCount > 0 && { speakerCount })
282
+ }
283
+ };
153
284
 
154
- } catch (error) {
155
- this.cleanup();
156
- return error;
157
- }
158
- }
285
+ logger.debug(`Creating iteration: ${JSON.stringify(iteration, null, 2)}`);
286
+ const iterationUrl = `${this.baseUrl}/translations/${translationId}/iterations/${iteration.id}?api-version=${this.apiVersion}`;
287
+ try {
288
+ const iterationResponse = await axios.put(iterationUrl, iteration, {
289
+ headers: {
290
+ 'Content-Type': 'application/json',
291
+ 'Ocp-Apim-Subscription-Key': this.subscriptionKey,
292
+ 'Cache-Control': 'no-cache',
293
+ 'Pragma': 'no-cache'
294
+ }
295
+ });
159
296
 
160
- parseResponse(data) {
161
- const response = typeof data === 'object' ? JSON.stringify(data) : data;
162
- publishRequestProgress({
163
- requestId: this.requestId,
164
- progress: 1,
165
- data: response,
166
- });
167
- return response;
168
- }
297
+ const iterationOperationUrl = iterationResponse.headers['operation-location'];
298
+ await this.monitorOperation(iterationOperationUrl, 'iteration');
299
+
300
+ // Update processing rate for future estimates
301
+ const totalSeconds = (Date.now() - this.startTime) / 1000;
302
+ AzureVideoTranslatePlugin.lastProcessingRate = this.videoContentLength / totalSeconds;
303
+ logger.debug(`Updated processing rate: ${AzureVideoTranslatePlugin.lastProcessingRate} bytes/second`);
169
304
 
170
- logRequestData(data, responseData, prompt) {
171
- logger.verbose(`Request: ${JSON.stringify(data)}`);
172
- logger.verbose(`Response: ${this.parseResponse(responseData)}`);
173
- if (prompt?.debugInfo) {
174
- prompt.debugInfo += `\nRequest: ${JSON.stringify(data)}`;
175
- prompt.debugInfo += `\nResponse: ${this.parseResponse(responseData)}`;
305
+ const output = await this.getTranslationOutput(translationId, iteration.id);
306
+ return JSON.stringify(output);
307
+ } catch (error) {
308
+ const errorText = error.response?.data || error.message;
309
+ throw new Error(`Failed to create iteration: ${error.message}\nDetails: ${errorText}`);
310
+ }
311
+ } catch (error) {
312
+ logger.error(`Error in video translation: ${error.message}`);
313
+ throw error;
176
314
  }
177
315
  }
178
316
 
179
317
  cleanup() {
180
- if (this.eventSource) {
181
- this.eventSource.close();
182
- this.eventSource = null;
183
- }
318
+ // No cleanup needed for direct API implementation
184
319
  }
185
320
  }
186
321