@aj-archipelago/cortex 1.1.8 → 1.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/helper-apps/cortex-file-handler/fileChunker.js +1 -1
- package/helper-apps/cortex-whisper-wrapper/Dockerfile +35 -5
- package/helper-apps/cortex-whisper-wrapper/app.py +26 -11
- package/lib/requestExecutor.js +1 -1
- package/lib/util.js +36 -0
- package/package.json +1 -1
- package/server/plugins/openAiWhisperPlugin.js +44 -33
- package/server/plugins/openAiWhisperPlugin_parallel.js +358 -0
- package/tests/main.test.js +0 -158
- package/tests/vision.test.js +176 -0
|
@@ -66,7 +66,7 @@ async function downloadFile(url, outputPath) {
|
|
|
66
66
|
}
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
async function splitMediaFile(inputPath, chunkDurationInSeconds = 600) {
|
|
69
|
+
async function splitMediaFile(inputPath, chunkDurationInSeconds = 600 - 100) {
|
|
70
70
|
try {
|
|
71
71
|
// Create unique folder
|
|
72
72
|
const uniqueOutputPath = generateUniqueFolderName();
|
|
@@ -1,12 +1,42 @@
|
|
|
1
1
|
# For more information, please refer to https://aka.ms/vscode-docker-python
|
|
2
|
-
FROM python:3.10-slim
|
|
2
|
+
#FROM python:3.10-slim
|
|
3
|
+
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
|
|
4
|
+
|
|
5
|
+
# Update system and install necessary packages, including python3.10
|
|
6
|
+
RUN apt-get update && apt-get install -y \
|
|
7
|
+
ffmpeg \
|
|
8
|
+
python3.10 \
|
|
9
|
+
python3-pip \
|
|
10
|
+
&& apt-get clean \
|
|
11
|
+
&& rm -rf /var/lib/apt/lists/* \
|
|
12
|
+
&& ln -s /usr/bin/python3.10 /usr/bin/python
|
|
13
|
+
|
|
14
|
+
# # Update and install necessary packages.
|
|
15
|
+
# RUN apt-get update && apt-get install -y \
|
|
16
|
+
# ffmpeg \
|
|
17
|
+
# nvidia-cuda-toolkit \
|
|
18
|
+
# && apt-get clean \
|
|
19
|
+
# && rm -rf /var/lib/apt/lists/*
|
|
20
|
+
|
|
21
|
+
# Verify that the CUDA toolkit was installed correctly
|
|
22
|
+
RUN nvcc --version
|
|
23
|
+
|
|
24
|
+
# # Update system and install necessary packages
|
|
25
|
+
# RUN apt-get update && apt-get install -y \
|
|
26
|
+
# ffmpeg \
|
|
27
|
+
# && apt-get clean \
|
|
28
|
+
# && rm -rf /var/lib/apt/lists/*
|
|
29
|
+
|
|
3
30
|
|
|
4
31
|
EXPOSE 8000
|
|
5
32
|
|
|
6
|
-
## following 3 lines are for installing ffmepg
|
|
7
|
-
RUN apt-get -y update
|
|
8
|
-
RUN apt-get -y upgrade
|
|
9
|
-
RUN apt-get install -y ffmpeg
|
|
33
|
+
# ## following 3 lines are for installing ffmepg
|
|
34
|
+
# RUN apt-get -y update
|
|
35
|
+
# RUN apt-get -y upgrade
|
|
36
|
+
# RUN apt-get install -y ffmpeg
|
|
37
|
+
|
|
38
|
+
# # Install CUDA toolkit
|
|
39
|
+
# RUN apt-get install -y nvidia-cuda-toolkit
|
|
10
40
|
|
|
11
41
|
# Keeps Python from generating .pyc files in the container
|
|
12
42
|
ENV PYTHONDONTWRITEBYTECODE=1
|
|
@@ -9,7 +9,11 @@ from fastapi.encoders import jsonable_encoder
|
|
|
9
9
|
import time
|
|
10
10
|
|
|
11
11
|
model_download_root = './models'
|
|
12
|
-
|
|
12
|
+
try:
|
|
13
|
+
model = whisper.load_model("large", download_root=model_download_root) #large, tiny
|
|
14
|
+
except Exception as e:
|
|
15
|
+
print(f"Error loading model: {e}")
|
|
16
|
+
raise
|
|
13
17
|
|
|
14
18
|
# Create a semaphore with a limit of 1
|
|
15
19
|
semaphore = asyncio.Semaphore(1)
|
|
@@ -26,14 +30,14 @@ def delete_tmp_file(file_path):
|
|
|
26
30
|
print(f"Temporary file '{file_path}' has been deleted.")
|
|
27
31
|
except OSError as e:
|
|
28
32
|
print(f"Error: {e.strerror}")
|
|
29
|
-
|
|
33
|
+
|
|
30
34
|
def transcribe(params):
|
|
31
35
|
if 'fileurl' not in params:
|
|
32
36
|
raise HTTPException(status_code=400, detail="fileurl parameter is required")
|
|
33
37
|
|
|
34
38
|
fileurl = params["fileurl"]
|
|
35
39
|
|
|
36
|
-
#word_timestamps bool, default True
|
|
40
|
+
# word_timestamps bool, default True
|
|
37
41
|
word_timestamps = True
|
|
38
42
|
if 'word_timestamps' in params: #parse as bool
|
|
39
43
|
word_timestamps = False if params['word_timestamps'] == 'False' else True
|
|
@@ -45,7 +49,11 @@ def transcribe(params):
|
|
|
45
49
|
|
|
46
50
|
print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
|
|
47
51
|
start_time = time.time()
|
|
48
|
-
|
|
52
|
+
try:
|
|
53
|
+
result = model.transcribe(fileurl, word_timestamps=word_timestamps, **decode_options)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"Error during transcription: {e}")
|
|
56
|
+
raise
|
|
49
57
|
end_time = time.time()
|
|
50
58
|
execution_time = end_time - start_time
|
|
51
59
|
print("Transcribe execution time:", execution_time, "seconds")
|
|
@@ -55,7 +63,6 @@ def transcribe(params):
|
|
|
55
63
|
print(f"Saving transcription as : {srtpath}")
|
|
56
64
|
writer = get_writer("srt", save_directory)
|
|
57
65
|
|
|
58
|
-
|
|
59
66
|
writer_args = {'highlight_words': False, 'max_line_count': None, 'max_line_width': None, 'max_words_per_line': None}
|
|
60
67
|
if 'highlight_words' in params: #parse as bool
|
|
61
68
|
writer_args['highlight_words'] = params['highlight_words'] == 'True'
|
|
@@ -70,9 +77,11 @@ def transcribe(params):
|
|
|
70
77
|
if fileurl and word_timestamps and len(params) <= 2:
|
|
71
78
|
writer_args['max_words_per_line'] = 1
|
|
72
79
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
80
|
+
try:
|
|
81
|
+
writer(result, srtpath, **writer_args)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
print(f"Error while writing transcription: {e}")
|
|
84
|
+
raise
|
|
76
85
|
|
|
77
86
|
with open(srtpath, "r") as f:
|
|
78
87
|
srtstr = f.read()
|
|
@@ -101,9 +110,15 @@ async def root(request: Request):
|
|
|
101
110
|
|
|
102
111
|
params = await get_params(request)
|
|
103
112
|
async with semaphore:
|
|
104
|
-
|
|
105
|
-
|
|
113
|
+
try:
|
|
114
|
+
result = await asyncio.to_thread(transcribe, params)
|
|
115
|
+
return result
|
|
116
|
+
except HTTPException as e:
|
|
117
|
+
raise e
|
|
118
|
+
except Exception as e:
|
|
119
|
+
print(f"Internal Server Error: {e}")
|
|
120
|
+
raise HTTPException(status_code=500, detail="Internal Server Error")
|
|
106
121
|
|
|
107
122
|
if __name__ == "__main__":
|
|
108
123
|
print("Starting APP Whisper server", flush=True)
|
|
109
|
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
124
|
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
package/lib/requestExecutor.js
CHANGED
|
@@ -287,7 +287,7 @@ const makeRequest = async (cortexRequest) => {
|
|
|
287
287
|
//logger.info(`XXX [${requestId}] request ${index} was cancelled`);
|
|
288
288
|
reject(error);
|
|
289
289
|
} else {
|
|
290
|
-
logger.error(`!!! [${requestId}] request ${index} failed with error: ${error?.response?.data?.error?.message || error}`);
|
|
290
|
+
logger.error(`!!! [${requestId}] request ${index} failed with error: ${error?.response?.data?.message || error?.response?.data?.error?.message || error?.message || error}`);
|
|
291
291
|
reject(error);
|
|
292
292
|
}
|
|
293
293
|
} finally {
|
package/lib/util.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
function convertToSingleContentChatHistory(chatHistory){
|
|
2
|
+
for(let i=0; i<chatHistory.length; i++){
|
|
3
|
+
//if isarray make it single string
|
|
4
|
+
if (Array.isArray(chatHistory[i]?.content)) {
|
|
5
|
+
chatHistory[i].content = chatHistory[i].content.join("\n");
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
//check if args has a type in chatHistory
|
|
11
|
+
function chatArgsHasType(args, type){
|
|
12
|
+
const { chatHistory } = args;
|
|
13
|
+
for(const ch of chatHistory){
|
|
14
|
+
for(const content of ch.content){
|
|
15
|
+
try{
|
|
16
|
+
if(JSON.parse(content).type == type){
|
|
17
|
+
return true;
|
|
18
|
+
}
|
|
19
|
+
}catch(e){
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
//check if args has an image_url in chatHistory
|
|
28
|
+
function chatArgsHasImageUrl(args){
|
|
29
|
+
return chatArgsHasType(args, 'image_url');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export {
|
|
33
|
+
convertToSingleContentChatHistory,
|
|
34
|
+
chatArgsHasImageUrl,
|
|
35
|
+
chatArgsHasType
|
|
36
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.10",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -205,37 +205,34 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
205
205
|
}
|
|
206
206
|
|
|
207
207
|
const processTS = async (uri) => {
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if(
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
}else{
|
|
221
|
-
tsparams.word_timestamps = wordTimestamped;
|
|
222
|
-
}
|
|
208
|
+
const tsparams = { fileurl:uri };
|
|
209
|
+
const { language } = parameters;
|
|
210
|
+
if(language) tsparams.language = language;
|
|
211
|
+
if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
|
|
212
|
+
if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
|
|
213
|
+
if(maxLineCount) tsparams.max_line_count = maxLineCount;
|
|
214
|
+
if(maxWordsPerLine) tsparams.max_words_per_line = maxWordsPerLine;
|
|
215
|
+
if(wordTimestamped!=null) {
|
|
216
|
+
if(!wordTimestamped) {
|
|
217
|
+
tsparams.word_timestamps = "False";
|
|
218
|
+
}else{
|
|
219
|
+
tsparams.word_timestamps = wordTimestamped;
|
|
223
220
|
}
|
|
221
|
+
}
|
|
224
222
|
|
|
225
|
-
|
|
226
|
-
|
|
223
|
+
cortexRequest.url = WHISPER_TS_API_URL;
|
|
224
|
+
cortexRequest.data = tsparams;
|
|
227
225
|
|
|
228
|
-
|
|
226
|
+
const res = await this.executeRequest(cortexRequest);
|
|
227
|
+
if (res.statusCode && res.statusCode >= 400) {
|
|
228
|
+
throw new Error(res.message || 'An error occurred.');
|
|
229
|
+
}
|
|
229
230
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
}
|
|
234
|
-
return res;
|
|
235
|
-
} catch (err) {
|
|
236
|
-
logger.error(`Error getting word timestamped data from api: ${err}`);
|
|
237
|
-
throw err;
|
|
231
|
+
if(!wordTimestamped && !responseFormat){
|
|
232
|
+
//if no response format, convert to text
|
|
233
|
+
return convertToText(res);
|
|
238
234
|
}
|
|
235
|
+
return res;
|
|
239
236
|
}
|
|
240
237
|
|
|
241
238
|
let result = [];
|
|
@@ -268,18 +265,32 @@ class OpenAIWhisperPlugin extends ModelPlugin {
|
|
|
268
265
|
async function processURI(uri) {
|
|
269
266
|
let result = null;
|
|
270
267
|
let _promise = null;
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
268
|
+
let errorOccurred = false;
|
|
269
|
+
|
|
270
|
+
const useTS = WHISPER_TS_API_URL && (wordTimestamped || highlightWords);
|
|
271
|
+
|
|
272
|
+
if (useTS) {
|
|
273
|
+
_promise = processTS;
|
|
274
|
+
} else {
|
|
274
275
|
_promise = processChunk;
|
|
275
276
|
}
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
277
|
+
|
|
278
|
+
_promise(uri).then((ts) => {
|
|
279
|
+
result = ts;
|
|
280
|
+
}).catch((err) => {
|
|
281
|
+
logger.error(`Error occurred while processing URI: ${err}`);
|
|
282
|
+
errorOccurred = err;
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
while(result === null && !errorOccurred) {
|
|
280
286
|
sendProgress(true);
|
|
281
287
|
await new Promise(r => setTimeout(r, 3000));
|
|
282
288
|
}
|
|
289
|
+
|
|
290
|
+
if(errorOccurred) {
|
|
291
|
+
throw errorOccurred;
|
|
292
|
+
}
|
|
293
|
+
|
|
283
294
|
return result;
|
|
284
295
|
}
|
|
285
296
|
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
// openAiWhisperPlugin.js
|
|
2
|
+
import ModelPlugin from './modelPlugin.js';
|
|
3
|
+
import { config } from '../../config.js';
|
|
4
|
+
import subsrt from 'subsrt';
|
|
5
|
+
import FormData from 'form-data';
|
|
6
|
+
import fs from 'fs';
|
|
7
|
+
import { axios } from '../../lib/requestExecutor.js';
|
|
8
|
+
import stream from 'stream';
|
|
9
|
+
import os from 'os';
|
|
10
|
+
import path from 'path';
|
|
11
|
+
import http from 'http';
|
|
12
|
+
import https from 'https';
|
|
13
|
+
import { URL } from 'url';
|
|
14
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
15
|
+
import { promisify } from 'util';
|
|
16
|
+
import { publishRequestProgress } from '../../lib/redisSubscription.js';
|
|
17
|
+
import logger from '../../lib/logger.js';
|
|
18
|
+
const pipeline = promisify(stream.pipeline);
|
|
19
|
+
|
|
20
|
+
const API_URL = config.get('whisperMediaApiUrl');
|
|
21
|
+
const WHISPER_TS_API_URL = config.get('whisperTSApiUrl');
|
|
22
|
+
if(WHISPER_TS_API_URL){
|
|
23
|
+
logger.info(`WHISPER API URL using ${WHISPER_TS_API_URL}`);
|
|
24
|
+
}else{
|
|
25
|
+
logger.warn(`WHISPER API URL not set using default OpenAI API Whisper`);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const OFFSET_CHUNK = 1000 * 60 * 10; // 10 minutes for each chunk
|
|
29
|
+
|
|
30
|
+
async function deleteTempPath(path) {
|
|
31
|
+
try {
|
|
32
|
+
if (!path) {
|
|
33
|
+
logger.warn('Temporary path is not defined.');
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
if (!fs.existsSync(path)) {
|
|
37
|
+
logger.warn(`Temporary path ${path} does not exist.`);
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const stats = fs.statSync(path);
|
|
41
|
+
if (stats.isFile()) {
|
|
42
|
+
fs.unlinkSync(path);
|
|
43
|
+
logger.info(`Temporary file ${path} deleted successfully.`);
|
|
44
|
+
} else if (stats.isDirectory()) {
|
|
45
|
+
fs.rmSync(path, { recursive: true });
|
|
46
|
+
logger.info(`Temporary folder ${path} and its contents deleted successfully.`);
|
|
47
|
+
}
|
|
48
|
+
} catch (err) {
|
|
49
|
+
logger.error(`Error occurred while deleting the temporary path: ${err}`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function generateUniqueFilename(extension) {
|
|
54
|
+
return `${uuidv4()}.${extension}`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const downloadFile = async (fileUrl) => {
|
|
58
|
+
const fileExtension = path.extname(fileUrl).slice(1);
|
|
59
|
+
const uniqueFilename = generateUniqueFilename(fileExtension);
|
|
60
|
+
const tempDir = os.tmpdir();
|
|
61
|
+
const localFilePath = `${tempDir}/${uniqueFilename}`;
|
|
62
|
+
|
|
63
|
+
// eslint-disable-next-line no-async-promise-executor
|
|
64
|
+
return new Promise(async (resolve, reject) => {
|
|
65
|
+
try {
|
|
66
|
+
const parsedUrl = new URL(fileUrl);
|
|
67
|
+
const protocol = parsedUrl.protocol === 'https:' ? https : http;
|
|
68
|
+
|
|
69
|
+
const response = await new Promise((resolve, reject) => {
|
|
70
|
+
protocol.get(parsedUrl, (res) => {
|
|
71
|
+
if (res.statusCode === 200) {
|
|
72
|
+
resolve(res);
|
|
73
|
+
} else {
|
|
74
|
+
reject(new Error(`HTTP request failed with status code ${res.statusCode}`));
|
|
75
|
+
}
|
|
76
|
+
}).on('error', reject);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
await pipeline(response, fs.createWriteStream(localFilePath));
|
|
80
|
+
logger.info(`Downloaded file to ${localFilePath}`);
|
|
81
|
+
resolve(localFilePath);
|
|
82
|
+
} catch (error) {
|
|
83
|
+
fs.unlink(localFilePath, () => {
|
|
84
|
+
reject(error);
|
|
85
|
+
});
|
|
86
|
+
//throw error;
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
// convert srt format to text
|
|
92
|
+
function convertToText(str) {
|
|
93
|
+
return str
|
|
94
|
+
.split('\n')
|
|
95
|
+
.filter(line => !line.match(/^\d+$/) && !line.match(/^\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}$/) && line !== '')
|
|
96
|
+
.join(' ');
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function alignSubtitles(subtitles, format) {
|
|
100
|
+
const result = [];
|
|
101
|
+
|
|
102
|
+
function preprocessStr(str) {
|
|
103
|
+
try{
|
|
104
|
+
if(!str) return '';
|
|
105
|
+
return str.trim().replace(/(\n\n)(?!\n)/g, '\n\n\n');
|
|
106
|
+
}catch(e){
|
|
107
|
+
logger.error(`An error occurred in content text preprocessing: ${e}`);
|
|
108
|
+
return '';
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function shiftSubtitles(subtitle, shiftOffset) {
|
|
113
|
+
const captions = subsrt.parse(preprocessStr(subtitle));
|
|
114
|
+
const resynced = subsrt.resync(captions, { offset: shiftOffset });
|
|
115
|
+
return resynced;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < subtitles.length; i++) {
|
|
119
|
+
result.push(...shiftSubtitles(subtitles[i], i * OFFSET_CHUNK));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
//if content has needed html style tags, keep them
|
|
124
|
+
for(const obj of result) {
|
|
125
|
+
if(obj && obj.content){
|
|
126
|
+
obj.text = obj.content;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
} catch (error) {
|
|
130
|
+
logger.error(`An error occurred in content text parsing: ${error}`);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return subsrt.build(result, { format: format === 'vtt' ? 'vtt' : 'srt' });
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class OpenAIWhisperPlugin extends ModelPlugin {
|
|
138
|
+
constructor(pathway, model) {
|
|
139
|
+
super(pathway, model);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async getMediaChunks(file, requestId) {
|
|
143
|
+
try {
|
|
144
|
+
if (API_URL) {
|
|
145
|
+
//call helper api and get list of file uris
|
|
146
|
+
const res = await axios.get(API_URL, { params: { uri: file, requestId } });
|
|
147
|
+
return res.data;
|
|
148
|
+
} else {
|
|
149
|
+
logger.info(`No API_URL set, returning file as chunk`);
|
|
150
|
+
return [file];
|
|
151
|
+
}
|
|
152
|
+
} catch (err) {
|
|
153
|
+
logger.error(`Error getting media chunks list from api: ${err}`);
|
|
154
|
+
throw err;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async markCompletedForCleanUp(requestId) {
|
|
159
|
+
try {
|
|
160
|
+
if (API_URL) {
|
|
161
|
+
//call helper api to mark processing as completed
|
|
162
|
+
const res = await axios.delete(API_URL, { params: { requestId } });
|
|
163
|
+
logger.info(`Marked request ${requestId} as completed:`, res.data);
|
|
164
|
+
return res.data;
|
|
165
|
+
}
|
|
166
|
+
} catch (err) {
|
|
167
|
+
logger.error(`Error marking request ${requestId} as completed: ${err}`);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Execute the request to the OpenAI Whisper API
|
|
172
|
+
async execute(text, parameters, prompt, cortexRequest) {
|
|
173
|
+
const { pathwayResolver } = cortexRequest;
|
|
174
|
+
const { responseFormat, wordTimestamped, highlightWords, maxLineWidth, maxLineCount, maxWordsPerLine } = parameters;
|
|
175
|
+
cortexRequest.url = this.requestUrl(text);
|
|
176
|
+
|
|
177
|
+
const chunks = [];
|
|
178
|
+
const processChunk = async (uri) => {
|
|
179
|
+
try {
|
|
180
|
+
const chunk = await downloadFile(uri);
|
|
181
|
+
chunks.push(chunk);
|
|
182
|
+
|
|
183
|
+
const { language, responseFormat } = parameters;
|
|
184
|
+
cortexRequest.url = this.requestUrl(text);
|
|
185
|
+
const params = {};
|
|
186
|
+
const { modelPromptText } = this.getCompiledPrompt(text, parameters, prompt);
|
|
187
|
+
const response_format = responseFormat || 'text';
|
|
188
|
+
|
|
189
|
+
const formData = new FormData();
|
|
190
|
+
formData.append('file', fs.createReadStream(chunk));
|
|
191
|
+
formData.append('model', this.model.params.model);
|
|
192
|
+
formData.append('response_format', response_format);
|
|
193
|
+
language && formData.append('language', language);
|
|
194
|
+
modelPromptText && formData.append('prompt', modelPromptText);
|
|
195
|
+
|
|
196
|
+
cortexRequest.data = formData;
|
|
197
|
+
cortexRequest.params = params;
|
|
198
|
+
cortexRequest.headers = { ...cortexRequest.headers, ...formData.getHeaders() };
|
|
199
|
+
|
|
200
|
+
return this.executeRequest(cortexRequest);
|
|
201
|
+
} catch (err) {
|
|
202
|
+
logger.error(`Error getting word timestamped data from api: ${err}`);
|
|
203
|
+
throw err;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const processTS = async (uri) => {
|
|
208
|
+
try {
|
|
209
|
+
const tsparams = { fileurl:uri };
|
|
210
|
+
|
|
211
|
+
const { language } = parameters;
|
|
212
|
+
if(language) tsparams.language = language;
|
|
213
|
+
if(highlightWords) tsparams.highlight_words = highlightWords ? "True" : "False";
|
|
214
|
+
if(maxLineWidth) tsparams.max_line_width = maxLineWidth;
|
|
215
|
+
if(maxLineCount) tsparams.max_line_count = maxLineCount;
|
|
216
|
+
if(maxWordsPerLine) tsparams.max_words_per_line = maxWordsPerLine;
|
|
217
|
+
if(wordTimestamped!=null) {
|
|
218
|
+
if(!wordTimestamped) {
|
|
219
|
+
tsparams.word_timestamps = "False";
|
|
220
|
+
}else{
|
|
221
|
+
tsparams.word_timestamps = wordTimestamped;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
cortexRequest.url = WHISPER_TS_API_URL;
|
|
226
|
+
cortexRequest.data = tsparams;
|
|
227
|
+
|
|
228
|
+
const res = await this.executeRequest(cortexRequest);
|
|
229
|
+
|
|
230
|
+
if(!wordTimestamped && !responseFormat){
|
|
231
|
+
//if no response format, convert to text
|
|
232
|
+
return convertToText(res);
|
|
233
|
+
}
|
|
234
|
+
return res;
|
|
235
|
+
} catch (err) {
|
|
236
|
+
logger.error(`Error getting word timestamped data from api: ${err}`);
|
|
237
|
+
throw err;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
let result = [];
|
|
242
|
+
let { file } = parameters;
|
|
243
|
+
let totalCount = 0;
|
|
244
|
+
let completedCount = 0;
|
|
245
|
+
let partialCount = 0;
|
|
246
|
+
const { requestId } = pathwayResolver;
|
|
247
|
+
|
|
248
|
+
const MAXPARTIALCOUNT = 60;
|
|
249
|
+
const sendProgress = (partial=false) => {
|
|
250
|
+
if(partial){
|
|
251
|
+
partialCount = Math.min(partialCount + 1, MAXPARTIALCOUNT-1);
|
|
252
|
+
}else {
|
|
253
|
+
partialCount = 0;
|
|
254
|
+
completedCount++;
|
|
255
|
+
}
|
|
256
|
+
if (completedCount >= totalCount) return;
|
|
257
|
+
|
|
258
|
+
const progress = (partialCount / MAXPARTIALCOUNT + completedCount) / totalCount;
|
|
259
|
+
logger.info(`Progress for ${requestId}: ${progress}`);
|
|
260
|
+
|
|
261
|
+
publishRequestProgress({
|
|
262
|
+
requestId,
|
|
263
|
+
progress,
|
|
264
|
+
data: null,
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
async function processInBatches(tasks, batchSize) {
|
|
269
|
+
const batches = chunkArray(tasks,batchSize);
|
|
270
|
+
const result = [];
|
|
271
|
+
for(let i=0; i < batches.length; i++){
|
|
272
|
+
let batch = batches[i];
|
|
273
|
+
// execute all tasks in current batch and wait for them to finish
|
|
274
|
+
const curBatchResults = await Promise.all(batch.map(task => task()));
|
|
275
|
+
// accumulate the results
|
|
276
|
+
result.push(...curBatchResults);
|
|
277
|
+
}
|
|
278
|
+
return result;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// helper function to chunk an array into smaller arrays of size n
|
|
282
|
+
function chunkArray(array, chunkSize) {
|
|
283
|
+
const results = [];
|
|
284
|
+
while (array.length) {
|
|
285
|
+
results.push(array.splice(0, chunkSize));
|
|
286
|
+
}
|
|
287
|
+
return results;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
async function processURI(uri) {
|
|
291
|
+
let result = null;
|
|
292
|
+
let _promise = null;
|
|
293
|
+
if(WHISPER_TS_API_URL){
|
|
294
|
+
_promise = processTS
|
|
295
|
+
}else {
|
|
296
|
+
_promise = processChunk;
|
|
297
|
+
}
|
|
298
|
+
_promise(uri).then((ts) => { result = ts;});
|
|
299
|
+
|
|
300
|
+
//send updates while waiting for result
|
|
301
|
+
while(!result) {
|
|
302
|
+
sendProgress(true);
|
|
303
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
304
|
+
}
|
|
305
|
+
return result;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
try {
|
|
309
|
+
const uris = await this.getMediaChunks(file, requestId); // array of remote file uris
|
|
310
|
+
if (!uris || !uris.length) {
|
|
311
|
+
throw new Error(`Error in getting chunks from media helper for file ${file}`);
|
|
312
|
+
}
|
|
313
|
+
totalCount = uris.length + 1; // total number of chunks that will be processed
|
|
314
|
+
|
|
315
|
+
// parallel process of chunks with limit
|
|
316
|
+
const tasks = uris.map(uri => () => processURI(uri)); // map each uri to a function that returns a Promise
|
|
317
|
+
result = await processInBatches(tasks, 2); // wait for all Promises to resolve, 2 at a time
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
} catch (error) {
|
|
321
|
+
const errMsg = `Transcribe error: ${error?.response?.data || error?.message || error}`;
|
|
322
|
+
logger.error(errMsg);
|
|
323
|
+
return errMsg;
|
|
324
|
+
}
|
|
325
|
+
finally {
|
|
326
|
+
try {
|
|
327
|
+
for (const chunk of chunks) {
|
|
328
|
+
try {
|
|
329
|
+
await deleteTempPath(chunk);
|
|
330
|
+
} catch (error) {
|
|
331
|
+
//ignore error
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
await this.markCompletedForCleanUp(requestId);
|
|
336
|
+
|
|
337
|
+
//check cleanup for whisper temp uploaded files url
|
|
338
|
+
const regex = /whispertempfiles\/([a-z0-9-]+)/;
|
|
339
|
+
const match = file.match(regex);
|
|
340
|
+
if (match && match[1]) {
|
|
341
|
+
const extractedValue = match[1];
|
|
342
|
+
await this.markCompletedForCleanUp(extractedValue);
|
|
343
|
+
logger.info(`Cleaned temp whisper file ${file} with request id ${extractedValue}`);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
} catch (error) {
|
|
347
|
+
logger.error(`An error occurred while deleting: ${error}`);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (['srt','vtt'].includes(responseFormat) || wordTimestamped) { // align subtitles for formats
|
|
352
|
+
return alignSubtitles(result, responseFormat);
|
|
353
|
+
}
|
|
354
|
+
return result.join(` `);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
export default OpenAIWhisperPlugin;
|
package/tests/main.test.js
CHANGED
|
@@ -348,161 +348,3 @@ test('test translate endpoint with huge arabic text english translation and chec
|
|
|
348
348
|
// check return only contains non-Arabic characters
|
|
349
349
|
t.notRegex(response.body?.singleResult?.data?.translate.result, /[ء-ي]/);
|
|
350
350
|
});
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
test('vision test image', async t => {
|
|
354
|
-
const response = await testServer.executeOperation({
|
|
355
|
-
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
356
|
-
vision(text: $text, chatHistory: $chatHistory) {
|
|
357
|
-
result
|
|
358
|
-
}
|
|
359
|
-
}`,
|
|
360
|
-
|
|
361
|
-
variables: {
|
|
362
|
-
"chatHistory": [
|
|
363
|
-
{
|
|
364
|
-
"role": "user",
|
|
365
|
-
"content": [
|
|
366
|
-
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
367
|
-
"{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://static.toiimg.com/thumb/msid-102827471,width-1280,height-720,resizemode-4/102827471.jpg\"}}"
|
|
368
|
-
],
|
|
369
|
-
}],
|
|
370
|
-
},
|
|
371
|
-
});
|
|
372
|
-
|
|
373
|
-
t.is(response.body?.singleResult?.errors, undefined);
|
|
374
|
-
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
375
|
-
});
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
test('vision test base64 image', async t => {
|
|
379
|
-
const response = await testServer.executeOperation({
|
|
380
|
-
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
381
|
-
vision(text: $text, chatHistory: $chatHistory) {
|
|
382
|
-
result
|
|
383
|
-
}
|
|
384
|
-
}`,
|
|
385
|
-
|
|
386
|
-
variables: {
|
|
387
|
-
"chatHistory": [
|
|
388
|
-
{
|
|
389
|
-
"role": "user",
|
|
390
|
-
"content": [
|
|
391
|
-
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
392
|
-
"{\"type\":\"image_url\",\"image_url\":{\"url\":\"\"}}"
|
|
393
|
-
],
|
|
394
|
-
}],
|
|
395
|
-
},
|
|
396
|
-
});
|
|
397
|
-
|
|
398
|
-
t.is(response.body?.singleResult?.errors, undefined);
|
|
399
|
-
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
400
|
-
});
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
const base64Img = ``;
|
|
404
|
-
|
|
405
|
-
test('vision test chunking', async t => {
|
|
406
|
-
t.timeout(400000);
|
|
407
|
-
//generate text adem1 adem2 ... ademN
|
|
408
|
-
// const testText = Array.from(Array(1000000).keys()).map(i => `adem${i}`).join(' ');
|
|
409
|
-
//const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
|
|
410
|
-
|
|
411
|
-
const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
|
|
412
|
-
|
|
413
|
-
const response = await testServer.executeOperation({
|
|
414
|
-
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
415
|
-
vision(text: $text, chatHistory: $chatHistory) {
|
|
416
|
-
result
|
|
417
|
-
}
|
|
418
|
-
}`,
|
|
419
|
-
|
|
420
|
-
variables: {
|
|
421
|
-
"chatHistory": [
|
|
422
|
-
{
|
|
423
|
-
"role": "user",
|
|
424
|
-
"content": [
|
|
425
|
-
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
426
|
-
...Array.from(new Array(1),()=> base64ImgRow),
|
|
427
|
-
],
|
|
428
|
-
}],
|
|
429
|
-
},
|
|
430
|
-
});
|
|
431
|
-
|
|
432
|
-
t.is(response.body?.singleResult?.errors, undefined);
|
|
433
|
-
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
434
|
-
});
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
test('vision multi single long text', async t => {
|
|
438
|
-
t.timeout(400000);
|
|
439
|
-
//generate text adem1 adem2 ... ademN
|
|
440
|
-
const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
|
|
441
|
-
const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
|
|
442
|
-
|
|
443
|
-
const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
|
|
444
|
-
|
|
445
|
-
const response = await testServer.executeOperation({
|
|
446
|
-
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
447
|
-
vision(text: $text, chatHistory: $chatHistory) {
|
|
448
|
-
result
|
|
449
|
-
}
|
|
450
|
-
}`,
|
|
451
|
-
|
|
452
|
-
variables: {
|
|
453
|
-
"chatHistory": [
|
|
454
|
-
...Array.from(new Array(10),()=> testRow),
|
|
455
|
-
{
|
|
456
|
-
"role": "user",
|
|
457
|
-
"content": [
|
|
458
|
-
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
459
|
-
...Array.from(new Array(10),()=> base64ImgRow),
|
|
460
|
-
],
|
|
461
|
-
},
|
|
462
|
-
],
|
|
463
|
-
},
|
|
464
|
-
});
|
|
465
|
-
|
|
466
|
-
t.is(response.body?.singleResult?.errors?.[0]?.message, 'Unable to process your request as your single message content is too long. Please try again with a shorter message.');
|
|
467
|
-
});
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
test('vision multi long text', async t => {
|
|
471
|
-
t.timeout(400000);
|
|
472
|
-
//generate text adem1 adem2 ... ademN
|
|
473
|
-
const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
|
|
474
|
-
const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
|
|
475
|
-
|
|
476
|
-
const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
|
|
477
|
-
|
|
478
|
-
const response = await testServer.executeOperation({
|
|
479
|
-
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
480
|
-
vision(text: $text, chatHistory: $chatHistory) {
|
|
481
|
-
result
|
|
482
|
-
}
|
|
483
|
-
}`,
|
|
484
|
-
|
|
485
|
-
variables: {
|
|
486
|
-
"chatHistory": [
|
|
487
|
-
...Array.from(new Array(10),()=> testRow),
|
|
488
|
-
{
|
|
489
|
-
"role": "user",
|
|
490
|
-
"content": [
|
|
491
|
-
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
492
|
-
...Array.from(new Array(10),()=> base64ImgRow),
|
|
493
|
-
],
|
|
494
|
-
},
|
|
495
|
-
{
|
|
496
|
-
"role": "user",
|
|
497
|
-
"content": [
|
|
498
|
-
"{\"type\": \"text\", \"text\": \"then tell me your name then describe the image shortly:\"}",
|
|
499
|
-
...Array.from(new Array(1),()=> base64ImgRow),
|
|
500
|
-
],
|
|
501
|
-
},
|
|
502
|
-
],
|
|
503
|
-
},
|
|
504
|
-
});
|
|
505
|
-
|
|
506
|
-
t.is(response.body?.singleResult?.errors, undefined);
|
|
507
|
-
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
508
|
-
});
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
// vision.test.js
|
|
2
|
+
// This is where all the Cortex vision model tests go
|
|
3
|
+
|
|
4
|
+
import test from 'ava';
|
|
5
|
+
import serverFactory from '../index.js';
|
|
6
|
+
|
|
7
|
+
let testServer;
|
|
8
|
+
|
|
9
|
+
test.before(async () => {
|
|
10
|
+
const { server, startServer } = await serverFactory();
|
|
11
|
+
startServer && await startServer();
|
|
12
|
+
testServer = server;
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
test.after.always('cleanup', async () => {
|
|
16
|
+
if (testServer) {
|
|
17
|
+
await testServer.stop();
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test('vision test image', async t => {
|
|
22
|
+
const response = await testServer.executeOperation({
|
|
23
|
+
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
24
|
+
vision(text: $text, chatHistory: $chatHistory) {
|
|
25
|
+
result
|
|
26
|
+
}
|
|
27
|
+
}`,
|
|
28
|
+
|
|
29
|
+
variables: {
|
|
30
|
+
"chatHistory": [
|
|
31
|
+
{
|
|
32
|
+
"role": "user",
|
|
33
|
+
"content": [
|
|
34
|
+
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
35
|
+
"{\"type\":\"image_url\",\"image_url\":{\"url\":\"https://static.toiimg.com/thumb/msid-102827471,width-1280,height-720,resizemode-4/102827471.jpg\"}}"
|
|
36
|
+
],
|
|
37
|
+
}],
|
|
38
|
+
},
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
t.is(response.body?.singleResult?.errors, undefined);
|
|
42
|
+
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
test('vision test base64 image', async t => {
|
|
47
|
+
const response = await testServer.executeOperation({
|
|
48
|
+
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
49
|
+
vision(text: $text, chatHistory: $chatHistory) {
|
|
50
|
+
result
|
|
51
|
+
}
|
|
52
|
+
}`,
|
|
53
|
+
|
|
54
|
+
variables: {
|
|
55
|
+
"chatHistory": [
|
|
56
|
+
{
|
|
57
|
+
"role": "user",
|
|
58
|
+
"content": [
|
|
59
|
+
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
60
|
+
"{\"type\":\"image_url\",\"image_url\":{\"url\":\"\"}}"
|
|
61
|
+
],
|
|
62
|
+
}],
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
t.is(response.body?.singleResult?.errors, undefined);
|
|
67
|
+
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
const base64Img = ``;
|
|
72
|
+
|
|
73
|
+
test('vision test chunking', async t => {
|
|
74
|
+
t.timeout(400000);
|
|
75
|
+
//generate text adem1 adem2 ... ademN
|
|
76
|
+
// const testText = Array.from(Array(1000000).keys()).map(i => `adem${i}`).join(' ');
|
|
77
|
+
//const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
|
|
78
|
+
|
|
79
|
+
const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
|
|
80
|
+
|
|
81
|
+
const response = await testServer.executeOperation({
|
|
82
|
+
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
83
|
+
vision(text: $text, chatHistory: $chatHistory) {
|
|
84
|
+
result
|
|
85
|
+
}
|
|
86
|
+
}`,
|
|
87
|
+
|
|
88
|
+
variables: {
|
|
89
|
+
"chatHistory": [
|
|
90
|
+
{
|
|
91
|
+
"role": "user",
|
|
92
|
+
"content": [
|
|
93
|
+
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
94
|
+
...Array.from(new Array(1),()=> base64ImgRow),
|
|
95
|
+
],
|
|
96
|
+
}],
|
|
97
|
+
},
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
t.is(response.body?.singleResult?.errors, undefined);
|
|
101
|
+
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
test('vision multi single long text', async t => {
|
|
106
|
+
t.timeout(400000);
|
|
107
|
+
//generate text adem1 adem2 ... ademN
|
|
108
|
+
const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
|
|
109
|
+
const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
|
|
110
|
+
|
|
111
|
+
const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
|
|
112
|
+
|
|
113
|
+
const response = await testServer.executeOperation({
|
|
114
|
+
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
115
|
+
vision(text: $text, chatHistory: $chatHistory) {
|
|
116
|
+
result
|
|
117
|
+
}
|
|
118
|
+
}`,
|
|
119
|
+
|
|
120
|
+
variables: {
|
|
121
|
+
"chatHistory": [
|
|
122
|
+
...Array.from(new Array(10),()=> testRow),
|
|
123
|
+
{
|
|
124
|
+
"role": "user",
|
|
125
|
+
"content": [
|
|
126
|
+
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
127
|
+
...Array.from(new Array(10),()=> base64ImgRow),
|
|
128
|
+
],
|
|
129
|
+
},
|
|
130
|
+
],
|
|
131
|
+
},
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
t.is(response.body?.singleResult?.errors?.[0]?.message, 'Unable to process your request as your single message content is too long. Please try again with a shorter message.');
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
test('vision multi long text', async t => {
|
|
139
|
+
t.timeout(400000);
|
|
140
|
+
//generate text adem1 adem2 ... ademN
|
|
141
|
+
const testText = Array.from(Array(10).keys()).map(i => `adem${i}`).join(' ');
|
|
142
|
+
const testRow = { "role": "user", "content": [`{"type": "text", "text": "${testText}"}`] };
|
|
143
|
+
|
|
144
|
+
const base64ImgRow = `{"type":"image_url","image_url":{"url":"${base64Img}"}}`;
|
|
145
|
+
|
|
146
|
+
const response = await testServer.executeOperation({
|
|
147
|
+
query: `query($text: String, $chatHistory: [MultiMessage]){
|
|
148
|
+
vision(text: $text, chatHistory: $chatHistory) {
|
|
149
|
+
result
|
|
150
|
+
}
|
|
151
|
+
}`,
|
|
152
|
+
|
|
153
|
+
variables: {
|
|
154
|
+
"chatHistory": [
|
|
155
|
+
...Array.from(new Array(10),()=> testRow),
|
|
156
|
+
{
|
|
157
|
+
"role": "user",
|
|
158
|
+
"content": [
|
|
159
|
+
"{\"type\": \"text\", \"text\": \"first tell me your name then describe the image shortly:\"}",
|
|
160
|
+
...Array.from(new Array(10),()=> base64ImgRow),
|
|
161
|
+
],
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
"role": "user",
|
|
165
|
+
"content": [
|
|
166
|
+
"{\"type\": \"text\", \"text\": \"then tell me your name then describe the image shortly:\"}",
|
|
167
|
+
...Array.from(new Array(1),()=> base64ImgRow),
|
|
168
|
+
],
|
|
169
|
+
},
|
|
170
|
+
],
|
|
171
|
+
},
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
t.is(response.body?.singleResult?.errors, undefined);
|
|
175
|
+
t.true(response.body?.singleResult?.data?.vision.result.length > 100);
|
|
176
|
+
});
|