@aj-archipelago/cortex 1.1.9 → 1.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,7 +66,8 @@ async function downloadFile(url, outputPath) {
66
66
  }
67
67
  }
68
68
 
69
- async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
69
+ // Split a media file into chunks of max 500 seconds
70
+ async function splitMediaFile(inputPath, chunkDurationInSeconds = 500) {
70
71
  try {
71
72
  // Create unique folder
72
73
  const uniqueOutputPath = generateUniqueFolderName();
@@ -1,12 +1,42 @@
1
1
  # For more information, please refer to https://aka.ms/vscode-docker-python
2
- FROM python:3.10-slim
2
+ #FROM python:3.10-slim
3
+ FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
4
+
5
+ # Update system and install necessary packages, including python3.10
6
+ RUN apt-get update && apt-get install -y \
7
+ ffmpeg \
8
+ python3.10 \
9
+ python3-pip \
10
+ && apt-get clean \
11
+ && rm -rf /var/lib/apt/lists/* \
12
+ && ln -s /usr/bin/python3.10 /usr/bin/python
13
+
14
+ # # Update and install necessary packages.
15
+ # RUN apt-get update && apt-get install -y \
16
+ # ffmpeg \
17
+ # nvidia-cuda-toolkit \
18
+ # && apt-get clean \
19
+ # && rm -rf /var/lib/apt/lists/*
20
+
21
+ # Verify that the CUDA toolkit was installed correctly
22
+ RUN nvcc --version
23
+
24
+ # # Update system and install necessary packages
25
+ # RUN apt-get update && apt-get install -y \
26
+ # ffmpeg \
27
+ # && apt-get clean \
28
+ # && rm -rf /var/lib/apt/lists/*
29
+
3
30
 
4
31
  EXPOSE 8000
5
32
 
6
- ## following 3 lines are for installing ffmepg
7
- RUN apt-get -y update
8
- RUN apt-get -y upgrade
9
- RUN apt-get install -y ffmpeg
33
+ # ## following 3 lines are for installing ffmepg
34
+ # RUN apt-get -y update
35
+ # RUN apt-get -y upgrade
36
+ # RUN apt-get install -y ffmpeg
37
+
38
+ # # Install CUDA toolkit
39
+ # RUN apt-get install -y nvidia-cuda-toolkit
10
40
 
11
41
  # Keeps Python from generating .pyc files in the container
12
42
  ENV PYTHONDONTWRITEBYTECODE=1
@@ -9,7 +9,11 @@ from fastapi.encoders import jsonable_encoder
9
9
  import time
10
10
 
11
11
  model_download_root = './models'
12
- model = whisper.load_model("large", download_root=model_download_root) #large, tiny
12
+ try:
13
+ model = whisper.load_model("large", download_root=model_download_root) #large, tiny
14
+ except Exception as e:
15
+ print(f"Error loading model: {e}")
16
+ raise
13
17
 
14
18
  # Create a semaphore with a limit of 1
15
19
  semaphore = asyncio.Semaphore(1)
@@ -26,14 +30,14 @@ def delete_tmp_file(file_path):
26
30
  print(f"Temporary file '{file_path}' has been deleted.")
27
31
  except OSError as e:
28
32
  print(f"Error: {e.strerror}")
29
-
33
+
30
34
  def transcribe(params):
31
35
  if 'fileurl' not in params:
32
36
  raise HTTPException(status_code=400, detail="fileurl parameter is required")
33
37
 
34
38
  fileurl = params["fileurl"]
35
39
 
36
- #word_timestamps bool, default True
40
+ # word_timestamps bool, default True
37
41
  word_timestamps = True
38
42
  if 'word_timestamps' in params: #parse as bool
39
43
  word_timestamps = False if params['word_timestamps'] == 'False' else True
@@ -45,7 +49,11 @@ def transcribe(params):
45
49
 
46
50
  print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
47
51
  start_time = time.time()
48
- result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
52
+ try:
53
+ result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
54
+ except Exception as e:
55
+ print(f"Error during transcription: {e}")
56
+ raise
49
57
  end_time = time.time()
50
58
  execution_time = end_time - start_time
51
59
  print("Transcribe execution time:", execution_time, "seconds")
@@ -55,7 +63,6 @@ def transcribe(params):
55
63
  print(f"Saving transcription as : {srtpath}")
56
64
  writer = get_writer("srt", save_directory)
57
65
 
58
-
59
66
  writer_args = {'highlight_words': False, 'max_line_count': None, 'max_line_width': None, 'max_words_per_line': None}
60
67
  if 'highlight_words' in params: #parse as bool
61
68
  writer_args['highlight_words'] = params['highlight_words'] == 'True'
@@ -70,9 +77,11 @@ def transcribe(params):
70
77
  if fileurl and word_timestamps and len(params) <= 2:
71
78
  writer_args['max_words_per_line'] = 1
72
79
 
73
- # writer_args = {arg: args.pop(arg) for arg in word_options if arg in args}
74
- writer(result, srtpath, **writer_args)
75
-
80
+ try:
81
+ writer(result, srtpath, **writer_args)
82
+ except Exception as e:
83
+ print(f"Error while writing transcription: {e}")
84
+ raise
76
85
 
77
86
  with open(srtpath, "r") as f:
78
87
  srtstr = f.read()
@@ -101,9 +110,15 @@ async def root(request: Request):
101
110
 
102
111
  params = await get_params(request)
103
112
  async with semaphore:
104
- result = await asyncio.to_thread(transcribe, params)
105
- return result
113
+ try:
114
+ result = await asyncio.to_thread(transcribe, params)
115
+ return result
116
+ except HTTPException as e:
117
+ raise e
118
+ except Exception as e:
119
+ print(f"Internal Server Error: {e}")
120
+ raise HTTPException(status_code=500, detail="Internal Server Error")
106
121
 
107
122
  if __name__ == "__main__":
108
123
  print("Starting APP Whisper server", flush=True)
109
- uvicorn.run(app, host="0.0.0.0", port=8000)
124
+ uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -287,7 +287,7 @@ const makeRequest = async (cortexRequest) => {
287
287
  //logger.info(`XXX [${requestId}] request ${index} was cancelled`);
288
288
  reject(error);
289
289
  } else {
290
- logger.error(`!!! [${requestId}] request ${index} failed with error: ${error?.response?.data?.error?.message || error}`);
290
+ logger.error(`!!! [${requestId}] request ${index} failed with error: ${error?.response?.data?.message || error?.response?.data?.error?.message || error?.message || error}`);
291
291
  reject(error);
292
292
  }
293
293
  } finally {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.1.9",
3
+ "version": "1.1.11",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -25,7 +25,7 @@ if(WHISPER_TS_API_URL){
25
25
  logger.warn(`WHISPER API URL not set using default OpenAI API Whisper`);
26
26
  }
27
27
 
28
- const OFFSET_CHUNK = 1000 * 60 * 10; // 10 minutes for each chunk
28
+ const OFFSET_CHUNK = 1000 * 500; // 500 seconds chunk offset
29
29
 
30
30
  async function deleteTempPath(path) {
31
31
  try {
@@ -205,37 +205,34 @@ class OpenAIWhisperPlugin extends ModelPlugin {
205
205
  }
206
206
 
207
207
  const processTS = async (uri) => {
208
- try {
209
- const tsparams = { fileurl:uri };
210
-
211
- const { language } = parameters;
212
- if(language) tsparams.language = language;
213
- if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
214
- if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
215
- if(maxLineCount) tsparams.max_line_count = maxLineCount;
216
- if(maxWordsPerLine) tsparams.max_words_per_line = maxWordsPerLine;
217
- if(wordTimestamped!=null) {
218
- if(!wordTimestamped) {
219
- tsparams.word_timestamps = "False";
220
- }else{
221
- tsparams.word_timestamps = wordTimestamped;
222
- }
208
+ const tsparams = { fileurl:uri };
209
+ const { language } = parameters;
210
+ if(language) tsparams.language = language;
211
+ if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
212
+ if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
213
+ if(maxLineCount) tsparams.max_line_count = maxLineCount;
214
+ if(maxWordsPerLine) tsparams.max_words_per_line = maxWordsPerLine;
215
+ if(wordTimestamped!=null) {
216
+ if(!wordTimestamped) {
217
+ tsparams.word_timestamps = "False";
218
+ }else{
219
+ tsparams.word_timestamps = wordTimestamped;
223
220
  }
221
+ }
224
222
 
225
- cortexRequest.url = WHISPER_TS_API_URL;
226
- cortexRequest.data = tsparams;
223
+ cortexRequest.url = WHISPER_TS_API_URL;
224
+ cortexRequest.data = tsparams;
227
225
 
228
- const res = await this.executeRequest(cortexRequest);
226
+ const res = await this.executeRequest(cortexRequest);
227
+ if (res.statusCode && res.statusCode >= 400) {
228
+ throw new Error(res.message || 'An error occurred.');
229
+ }
229
230
 
230
- if(!wordTimestamped && !responseFormat){
231
- //if no response format, convert to text
232
- return convertToText(res);
233
- }
234
- return res;
235
- } catch (err) {
236
- logger.error(`Error getting word timestamped data from api: ${err}`);
237
- throw err;
231
+ if(!wordTimestamped && !responseFormat){
232
+ //if no response format, convert to text
233
+ return convertToText(res);
238
234
  }
235
+ return res;
239
236
  }
240
237
 
241
238
  let result = [];
@@ -268,18 +265,32 @@ class OpenAIWhisperPlugin extends ModelPlugin {
268
265
  async function processURI(uri) {
269
266
  let result = null;
270
267
  let _promise = null;
271
- if(WHISPER_TS_API_URL){
272
- _promise = processTS
273
- }else {
268
+ let errorOccurred = false;
269
+
270
+ const useTS = WHISPER_TS_API_URL && (wordTimestamped || highlightWords);
271
+
272
+ if (useTS) {
273
+ _promise = processTS;
274
+ } else {
274
275
  _promise = processChunk;
275
276
  }
276
- _promise(uri).then((ts) => { result = ts;});
277
-
278
- //send updates while waiting for result
279
- while(!result) {
277
+
278
+ _promise(uri).then((ts) => {
279
+ result = ts;
280
+ }).catch((err) => {
281
+ logger.error(`Error occurred while processing URI: ${err}`);
282
+ errorOccurred = err;
283
+ });
284
+
285
+ while(result === null && !errorOccurred) {
280
286
  sendProgress(true);
281
287
  await new Promise(r => setTimeout(r, 3000));
282
288
  }
289
+
290
+ if(errorOccurred) {
291
+ throw errorOccurred;
292
+ }
293
+
283
294
  return result;
284
295
  }
285
296
 
@@ -0,0 +1,358 @@
1
+ // openAiWhisperPlugin.js
2
+ import ModelPlugin from './modelPlugin.js';
3
+ import { config } from '../../config.js';
4
+ import subsrt from 'subsrt';
5
+ import FormData from 'form-data';
6
+ import fs from 'fs';
7
+ import { axios } from '../../lib/requestExecutor.js';
8
+ import stream from 'stream';
9
+ import os from 'os';
10
+ import path from 'path';
11
+ import http from 'http';
12
+ import https from 'https';
13
+ import { URL } from 'url';
14
+ import { v4 as uuidv4 } from 'uuid';
15
+ import { promisify } from 'util';
16
+ import { publishRequestProgress } from '../../lib/redisSubscription.js';
17
+ import logger from '../../lib/logger.js';
18
+ const pipeline = promisify(stream.pipeline);
19
+
20
+ const API_URL = config.get('whisperMediaApiUrl');
21
+ const WHISPER_TS_API_URL = config.get('whisperTSApiUrl');
22
+ if(WHISPER_TS_API_URL){
23
+ logger.info(`WHISPER API URL using ${WHISPER_TS_API_URL}`);
24
+ }else{
25
+ logger.warn(`WHISPER API URL not set using default OpenAI API Whisper`);
26
+ }
27
+
28
+ const OFFSET_CHUNK = 1000 * 60 * 10; // 10 minutes for each chunk
29
+
30
+ async function deleteTempPath(path) {
31
+ try {
32
+ if (!path) {
33
+ logger.warn('Temporary path is not defined.');
34
+ return;
35
+ }
36
+ if (!fs.existsSync(path)) {
37
+ logger.warn(`Temporary path ${path} does not exist.`);
38
+ return;
39
+ }
40
+ const stats = fs.statSync(path);
41
+ if (stats.isFile()) {
42
+ fs.unlinkSync(path);
43
+ logger.info(`Temporary file ${path} deleted successfully.`);
44
+ } else if (stats.isDirectory()) {
45
+ fs.rmSync(path, { recursive: true });
46
+ logger.info(`Temporary folder ${path} and its contents deleted successfully.`);
47
+ }
48
+ } catch (err) {
49
+ logger.error(`Error occurred while deleting the temporary path: ${err}`);
50
+ }
51
+ }
52
+
53
+ function generateUniqueFilename(extension) {
54
+ return `${uuidv4()}.${extension}`;
55
+ }
56
+
57
+ const downloadFile = async (fileUrl) => {
58
+ const fileExtension = path.extname(fileUrl).slice(1);
59
+ const uniqueFilename = generateUniqueFilename(fileExtension);
60
+ const tempDir = os.tmpdir();
61
+ const localFilePath = `${tempDir}/${uniqueFilename}`;
62
+
63
+ // eslint-disable-next-line no-async-promise-executor
64
+ return new Promise(async (resolve, reject) => {
65
+ try {
66
+ const parsedUrl = new URL(fileUrl);
67
+ const protocol = parsedUrl.protocol === 'https:' ? https : http;
68
+
69
+ const response = await new Promise((resolve, reject) => {
70
+ protocol.get(parsedUrl, (res) => {
71
+ if (res.statusCode === 200) {
72
+ resolve(res);
73
+ } else {
74
+ reject(new Error(`HTTP request failed with status code ${res.statusCode}`));
75
+ }
76
+ }).on('error', reject);
77
+ });
78
+
79
+ await pipeline(response, fs.createWriteStream(localFilePath));
80
+ logger.info(`Downloaded file to ${localFilePath}`);
81
+ resolve(localFilePath);
82
+ } catch (error) {
83
+ fs.unlink(localFilePath, () => {
84
+ reject(error);
85
+ });
86
+ //throw error;
87
+ }
88
+ });
89
+ };
90
+
91
+ // convert srt format to text
92
+ function convertToText(str) {
93
+ return str
94
+ .split('\n')
95
+ .filter(line => !line.match(/^\d+$/) && !line.match(/^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$/) && line !== '')
96
+ .join(' ');
97
+ }
98
+
99
+ function alignSubtitles(subtitles, format) {
100
+ const result = [];
101
+
102
+ function preprocessStr(str) {
103
+ try{
104
+ if(!str) return '';
105
+ return str.trim().replace(/(\n\n)(?!\n)/g, '\n\n\n');
106
+ }catch(e){
107
+ logger.error(`An error occurred in content text preprocessing: ${e}`);
108
+ return '';
109
+ }
110
+ }
111
+
112
+ function shiftSubtitles(subtitle, shiftOffset) {
113
+ const captions = subsrt.parse(preprocessStr(subtitle));
114
+ const resynced = subsrt.resync(captions, { offset: shiftOffset });
115
+ return resynced;
116
+ }
117
+
118
+ for (let i = 0; i < subtitles.length; i++) {
119
+ result.push(...shiftSubtitles(subtitles[i], i * OFFSET_CHUNK));
120
+ }
121
+
122
+ try {
123
+ //if content has needed html style tags, keep them
124
+ for(const obj of result) {
125
+ if(obj && obj.content){
126
+ obj.text = obj.content;
127
+ }
128
+ }
129
+ } catch (error) {
130
+ logger.error(`An error occurred in content text parsing: ${error}`);
131
+ }
132
+
133
+ return subsrt.build(result, { format: format === 'vtt' ? 'vtt' : 'srt' });
134
+ }
135
+
136
+
137
+ class OpenAIWhisperPlugin extends ModelPlugin {
138
+ constructor(pathway, model) {
139
+ super(pathway, model);
140
+ }
141
+
142
+ async getMediaChunks(file, requestId) {
143
+ try {
144
+ if (API_URL) {
145
+ //call helper api and get list of file uris
146
+ const res = await axios.get(API_URL, { params: { uri: file, requestId } });
147
+ return res.data;
148
+ } else {
149
+ logger.info(`No API_URL set, returning file as chunk`);
150
+ return [file];
151
+ }
152
+ } catch (err) {
153
+ logger.error(`Error getting media chunks list from api: ${err}`);
154
+ throw err;
155
+ }
156
+ }
157
+
158
+ async markCompletedForCleanUp(requestId) {
159
+ try {
160
+ if (API_URL) {
161
+ //call helper api to mark processing as completed
162
+ const res = await axios.delete(API_URL, { params: { requestId } });
163
+ logger.info(`Marked request ${requestId} as completed:`, res.data);
164
+ return res.data;
165
+ }
166
+ } catch (err) {
167
+ logger.error(`Error marking request ${requestId} as completed: ${err}`);
168
+ }
169
+ }
170
+
171
+ // Execute the request to the OpenAI Whisper API
172
+ async execute(text, parameters, prompt, cortexRequest) {
173
+ const { pathwayResolver } = cortexRequest;
174
+ const { responseFormat, wordTimestamped, highlightWords, maxLineWidth, maxLineCount, maxWordsPerLine } = parameters;
175
+ cortexRequest.url = this.requestUrl(text);
176
+
177
+ const chunks = [];
178
+ const processChunk = async (uri) => {
179
+ try {
180
+ const chunk = await downloadFile(uri);
181
+ chunks.push(chunk);
182
+
183
+ const { language, responseFormat } = parameters;
184
+ cortexRequest.url = this.requestUrl(text);
185
+ const params = {};
186
+ const { modelPromptText } = this.getCompiledPrompt(text, parameters, prompt);
187
+ const response_format = responseFormat || 'text';
188
+
189
+ const formData = new FormData();
190
+ formData.append('file', fs.createReadStream(chunk));
191
+ formData.append('model', this.model.params.model);
192
+ formData.append('response_format', response_format);
193
+ language && formData.append('language', language);
194
+ modelPromptText && formData.append('prompt', modelPromptText);
195
+
196
+ cortexRequest.data = formData;
197
+ cortexRequest.params = params;
198
+ cortexRequest.headers = { ...cortexRequest.headers, ...formData.getHeaders() };
199
+
200
+ return this.executeRequest(cortexRequest);
201
+ } catch (err) {
202
+ logger.error(`Error getting word timestamped data from api: ${err}`);
203
+ throw err;
204
+ }
205
+ }
206
+
207
+ const processTS = async (uri) => {
208
+ try {
209
+ const tsparams = { fileurl:uri };
210
+
211
+ const { language } = parameters;
212
+ if(language) tsparams.language = language;
213
+ if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
214
+ if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
215
+ if(maxLineCount) tsparams.max_line_count = maxLineCount;
216
+ if(maxWordsPerLine) tsparams.max_words_per_line = maxWordsPerLine;
217
+ if(wordTimestamped!=null) {
218
+ if(!wordTimestamped) {
219
+ tsparams.word_timestamps = "False";
220
+ }else{
221
+ tsparams.word_timestamps = wordTimestamped;
222
+ }
223
+ }
224
+
225
+ cortexRequest.url = WHISPER_TS_API_URL;
226
+ cortexRequest.data = tsparams;
227
+
228
+ const res = await this.executeRequest(cortexRequest);
229
+
230
+ if(!wordTimestamped && !responseFormat){
231
+ //if no response format, convert to text
232
+ return convertToText(res);
233
+ }
234
+ return res;
235
+ } catch (err) {
236
+ logger.error(`Error getting word timestamped data from api: ${err}`);
237
+ throw err;
238
+ }
239
+ }
240
+
241
+ let result = [];
242
+ let { file } = parameters;
243
+ let totalCount = 0;
244
+ let completedCount = 0;
245
+ let partialCount = 0;
246
+ const { requestId } = pathwayResolver;
247
+
248
+ const MAXPARTIALCOUNT = 60;
249
+ const sendProgress = (partial=false) => {
250
+ if(partial){
251
+ partialCount = Math.min(partialCount + 1, MAXPARTIALCOUNT-1);
252
+ }else {
253
+ partialCount = 0;
254
+ completedCount++;
255
+ }
256
+ if (completedCount >= totalCount) return;
257
+
258
+ const progress = (partialCount / MAXPARTIALCOUNT + completedCount) / totalCount;
259
+ logger.info(`Progress for ${requestId}: ${progress}`);
260
+
261
+ publishRequestProgress({
262
+ requestId,
263
+ progress,
264
+ data: null,
265
+ });
266
+ }
267
+
268
+ async function processInBatches(tasks, batchSize) {
269
+ const batches = chunkArray(tasks,batchSize);
270
+ const result = [];
271
+ for(let i=0; i < batches.length; i++){
272
+ let batch = batches[i];
273
+ // execute all tasks in current batch and wait for them to finish
274
+ const curBatchResults = await Promise.all(batch.map(task => task()));
275
+ // accumulate the results
276
+ result.push(...curBatchResults);
277
+ }
278
+ return result;
279
+ }
280
+
281
+ // helper function to chunk an array into smaller arrays of size n
282
+ function chunkArray(array, chunkSize) {
283
+ const results = [];
284
+ while (array.length) {
285
+ results.push(array.splice(0, chunkSize));
286
+ }
287
+ return results;
288
+ }
289
+
290
+ async function processURI(uri) {
291
+ let result = null;
292
+ let _promise = null;
293
+ if(WHISPER_TS_API_URL){
294
+ _promise = processTS
295
+ }else {
296
+ _promise = processChunk;
297
+ }
298
+ _promise(uri).then((ts) => { result = ts;});
299
+
300
+ //send updates while waiting for result
301
+ while(!result) {
302
+ sendProgress(true);
303
+ await new Promise(r => setTimeout(r, 3000));
304
+ }
305
+ return result;
306
+ }
307
+
308
+ try {
309
+ const uris = await this.getMediaChunks(file, requestId); // array of remote file uris
310
+ if (!uris || !uris.length) {
311
+ throw new Error(`Error in getting chunks from media helper for file ${file}`);
312
+ }
313
+ totalCount = uris.length + 1; // total number of chunks that will be processed
314
+
315
+ // parallel process of chunks with limit
316
+ const tasks = uris.map(uri => () => processURI(uri)); // map each uri to a function that returns a Promise
317
+ result = await processInBatches(tasks, 2); // wait for all Promises to resolve, 2 at a time
318
+
319
+
320
+ } catch (error) {
321
+ const errMsg = `Transcribe error: ${error?.response?.data || error?.message || error}`;
322
+ logger.error(errMsg);
323
+ return errMsg;
324
+ }
325
+ finally {
326
+ try {
327
+ for (const chunk of chunks) {
328
+ try {
329
+ await deleteTempPath(chunk);
330
+ } catch (error) {
331
+ //ignore error
332
+ }
333
+ }
334
+
335
+ await this.markCompletedForCleanUp(requestId);
336
+
337
+ //check cleanup for whisper temp uploaded files url
338
+ const regex = /whispertempfiles\/([a-z0-9-]+)/;
339
+ const match = file.match(regex);
340
+ if (match && match[1]) {
341
+ const extractedValue = match[1];
342
+ await this.markCompletedForCleanUp(extractedValue);
343
+ logger.info(`Cleaned temp whisper file ${file} with request id ${extractedValue}`);
344
+ }
345
+
346
+ } catch (error) {
347
+ logger.error(`An error occurred while deleting: ${error}`);
348
+ }
349
+ }
350
+
351
+ if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
352
+ return alignSubtitles(result, responseFormat);
353
+ }
354
+ return result.join(` `);
355
+ }
356
+ }
357
+
358
+ export default OpenAIWhisperPlugin;
@@ -348,161 +348,3 @@ test('test translate endpoint with huge arabic text english translation and chec
348
348
  // check return only contains non-Arabic characters
349
349
  t.notRegex(response.body?.singleResult?.data?.translate.result, /[ء-ي]/);
350
350
  });
351
-
352
-
353
- test('vision test image', async t => {
354
- const response = await testServer.executeOperation({
355
- query: `query($text: String, $chatHistory: [MultiMessage]){
356
- vision(text: $text, chatHistory: $chatHistory) {
357
- result
358
- }
359
- }`,
360
-
361
- variables: {
362
- "chatHistory": [
363
- {
364
- "role": "user",
365
- "content": [
366
- "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
367
- "{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://static.toiimg.com/thumb/msid-102827471,width-1280,height-720,resizemode-4/102827471.jpg\"}}"
368
- ],
369
- }],
370
- },
371
- });
372
-
373
- t.is(response.body?.singleResult?.errors, undefined);
374
- t.true(response.body?.singleResult?.data?.vision.result.length > 100);
375
- });
376
-
377
-
378
- test('vision test base64 image', async t => {
379
- const response = await testServer.executeOperation({
380
- query: `query($text: String, $chatHistory: [MultiMessage]){
381
- vision(text: $text, chatHistory: $chatHistory) {
382
- result
383
- }
384
- }`,
385
-
386
- variables: {
387
- "chatHistory": [
388
- {
389
- "role": "user",
390
- "content": [
391
- "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
392
- "{\"type\":\"image_url\",\"image_url\":{\"url\":\"\"}}"
393
- ],
394
- }],
395
- },
396
- });
397
-
398
- t.is(response.body?.singleResult?.errors, undefined);
399
- t.true(response.body?.singleResult?.data?.vision.result.length > 100);
400
- });
401
-
402
-
403
- const base64Img = ``;
404
-
405
- test('vision test chunking', async t => {
406
- t.timeout(400000);
407
- //generate text adem1 adem2 ... ademN
408
- // const testText = Array.from(Array(1000000).keys()).map(i => `adem${i}`).join(' ');
409
- //const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
410
-
411
- const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
412
-
413
- const response = await testServer.executeOperation({
414
- query: `query($text: String, $chatHistory: [MultiMessage]){
415
- vision(text: $text, chatHistory: $chatHistory) {
416
- result
417
- }
418
- }`,
419
-
420
- variables: {
421
- "chatHistory": [
422
- {
423
- "role": "user",
424
- "content": [
425
- "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
426
- ...Array.from(new Array(1),()=> base64ImgRow),
427
- ],
428
- }],
429
- },
430
- });
431
-
432
- t.is(response.body?.singleResult?.errors, undefined);
433
- t.true(response.body?.singleResult?.data?.vision.result.length > 100);
434
- });
435
-
436
-
437
- test('vision multi single long text', async t => {
438
- t.timeout(400000);
439
- //generate text adem1 adem2 ... ademN
440
- const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
441
- const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
442
-
443
- const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
444
-
445
- const response = await testServer.executeOperation({
446
- query: `query($text: String, $chatHistory: [MultiMessage]){
447
- vision(text: $text, chatHistory: $chatHistory) {
448
- result
449
- }
450
- }`,
451
-
452
- variables: {
453
- "chatHistory": [
454
- ...Array.from(new Array(10),()=> testRow),
455
- {
456
- "role": "user",
457
- "content": [
458
- "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
459
- ...Array.from(new Array(10),()=> base64ImgRow),
460
- ],
461
- },
462
- ],
463
- },
464
- });
465
-
466
- t.is(response.body?.singleResult?.errors?.[0]?.message, 'Unable to process your request as your single message content is too long. Please try again with a shorter message.');
467
- });
468
-
469
-
470
- test('vision multi long text', async t => {
471
- t.timeout(400000);
472
- //generate text adem1 adem2 ... ademN
473
- const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
474
- const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
475
-
476
- const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
477
-
478
- const response = await testServer.executeOperation({
479
- query: `query($text: String, $chatHistory: [MultiMessage]){
480
- vision(text: $text, chatHistory: $chatHistory) {
481
- result
482
- }
483
- }`,
484
-
485
- variables: {
486
- "chatHistory": [
487
- ...Array.from(new Array(10),()=> testRow),
488
- {
489
- "role": "user",
490
- "content": [
491
- "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
492
- ...Array.from(new Array(10),()=> base64ImgRow),
493
- ],
494
- },
495
- {
496
- "role": "user",
497
- "content": [
498
- "{\"type\": \"text\", \"text\": \"then tell me your name then describe the image shortly:\"}",
499
- ...Array.from(new Array(1),()=> base64ImgRow),
500
- ],
501
- },
502
- ],
503
- },
504
- });
505
-
506
- t.is(response.body?.singleResult?.errors, undefined);
507
- t.true(response.body?.singleResult?.data?.vision.result.length > 100);
508
- });
@@ -0,0 +1,176 @@
1
+ // vision.test.js
2
+ // This is where all the Cortex vision model tests go
3
+
4
+ import test from 'ava';
5
+ import serverFactory from '../index.js';
6
+
7
+ let testServer;
8
+
9
+ test.before(async () => {
10
+ const { server, startServer } = await serverFactory();
11
+ startServer && await startServer();
12
+ testServer = server;
13
+ });
14
+
15
+ test.after.always('cleanup', async () => {
16
+ if (testServer) {
17
+ await testServer.stop();
18
+ }
19
+ });
20
+
21
+ test('vision test image', async t => {
22
+ const response = await testServer.executeOperation({
23
+ query: `query($text: String, $chatHistory: [MultiMessage]){
24
+ vision(text: $text, chatHistory: $chatHistory) {
25
+ result
26
+ }
27
+ }`,
28
+
29
+ variables: {
30
+ "chatHistory": [
31
+ {
32
+ "role": "user",
33
+ "content": [
34
+ "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
35
+ "{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://static.toiimg.com/thumb/msid-102827471,width-1280,height-720,resizemode-4/102827471.jpg\"}}"
36
+ ],
37
+ }],
38
+ },
39
+ });
40
+
41
+ t.is(response.body?.singleResult?.errors, undefined);
42
+ t.true(response.body?.singleResult?.data?.vision.result.length > 100);
43
+ });
44
+
45
+
46
+ test('vision test base64 image', async t => {
47
+ const response = await testServer.executeOperation({
48
+ query: `query($text: String, $chatHistory: [MultiMessage]){
49
+ vision(text: $text, chatHistory: $chatHistory) {
50
+ result
51
+ }
52
+ }`,
53
+
54
+ variables: {
55
+ "chatHistory": [
56
+ {
57
+ "role": "user",
58
+ "content": [
59
+ "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
60
+ "{\"type\":\"image_url\",\"image_url\":{\"url\":\"\"}}"
61
+ ],
62
+ }],
63
+ },
64
+ });
65
+
66
+ t.is(response.body?.singleResult?.errors, undefined);
67
+ t.true(response.body?.singleResult?.data?.vision.result.length > 100);
68
+ });
69
+
70
+
71
+ const base64Img = ``;
72
+
73
+ test('vision test chunking', async t => {
74
+ t.timeout(400000);
75
+ //generate text adem1 adem2 ... ademN
76
+ // const testText = Array.from(Array(1000000).keys()).map(i => `adem${i}`).join(' ');
77
+ //const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
78
+
79
+ const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
80
+
81
+ const response = await testServer.executeOperation({
82
+ query: `query($text: String, $chatHistory: [MultiMessage]){
83
+ vision(text: $text, chatHistory: $chatHistory) {
84
+ result
85
+ }
86
+ }`,
87
+
88
+ variables: {
89
+ "chatHistory": [
90
+ {
91
+ "role": "user",
92
+ "content": [
93
+ "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
94
+ ...Array.from(new Array(1),()=> base64ImgRow),
95
+ ],
96
+ }],
97
+ },
98
+ });
99
+
100
+ t.is(response.body?.singleResult?.errors, undefined);
101
+ t.true(response.body?.singleResult?.data?.vision.result.length > 100);
102
+ });
103
+
104
+
105
+ test('vision multi single long text', async t => {
106
+ t.timeout(400000);
107
+ //generate text adem1 adem2 ... ademN
108
+ const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
109
+ const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
110
+
111
+ const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
112
+
113
+ const response = await testServer.executeOperation({
114
+ query: `query($text: String, $chatHistory: [MultiMessage]){
115
+ vision(text: $text, chatHistory: $chatHistory) {
116
+ result
117
+ }
118
+ }`,
119
+
120
+ variables: {
121
+ "chatHistory": [
122
+ ...Array.from(new Array(10),()=> testRow),
123
+ {
124
+ "role": "user",
125
+ "content": [
126
+ "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
127
+ ...Array.from(new Array(10),()=> base64ImgRow),
128
+ ],
129
+ },
130
+ ],
131
+ },
132
+ });
133
+
134
+ t.is(response.body?.singleResult?.errors?.[0]?.message, 'Unable to process your request as your single message content is too long. Please try again with a shorter message.');
135
+ });
136
+
137
+
138
+ test('vision multi long text', async t => {
139
+ t.timeout(400000);
140
+ //generate text adem1 adem2 ... ademN
141
+ const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
142
+ const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
143
+
144
+ const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
145
+
146
+ const response = await testServer.executeOperation({
147
+ query: `query($text: String, $chatHistory: [MultiMessage]){
148
+ vision(text: $text, chatHistory: $chatHistory) {
149
+ result
150
+ }
151
+ }`,
152
+
153
+ variables: {
154
+ "chatHistory": [
155
+ ...Array.from(new Array(10),()=> testRow),
156
+ {
157
+ "role": "user",
158
+ "content": [
159
+ "{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
160
+ ...Array.from(new Array(10),()=> base64ImgRow),
161
+ ],
162
+ },
163
+ {
164
+ "role": "user",
165
+ "content": [
166
+ "{\"type\": \"text\", \"text\": \"then tell me your name then describe the image shortly:\"}",
167
+ ...Array.from(new Array(1),()=> base64ImgRow),
168
+ ],
169
+ },
170
+ ],
171
+ },
172
+ });
173
+
174
+ t.is(response.body?.singleResult?.errors, undefined);
175
+ t.true(response.body?.singleResult?.data?.vision.result.length > 100);
176
+ });