@aj-archipelago/cortex 1.0.23 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +5 -0
- package/helper_apps/WhisperX/Dockerfile +1 -0
- package/helper_apps/WhisperX/app.py +47 -58
- package/helper_apps/WhisperX/docker-compose.debug.yml +1 -1
- package/helper_apps/WhisperX/docker-compose.yml +2 -2
- package/helper_apps/WhisperX/requirements.txt +1 -2
- package/package.json +1 -1
- package/server/plugins/modelPlugin.js +1 -1
- package/server/plugins/openAiDallE3Plugin.js +36 -23
- package/helper_apps/WhisperX/app-ts.py +0 -76
package/config.js
CHANGED
|
@@ -164,6 +164,11 @@ var config = convict({
|
|
|
164
164
|
default: 'null',
|
|
165
165
|
env: 'WHISPER_TS_API_URL'
|
|
166
166
|
},
|
|
167
|
+
subscriptionKeepAlive: {
|
|
168
|
+
format: Number,
|
|
169
|
+
default: 0,
|
|
170
|
+
env: 'SUBSCRIPTION_KEEP_ALIVE'
|
|
171
|
+
},
|
|
167
172
|
});
|
|
168
173
|
|
|
169
174
|
// Read in environment variables and set up service configuration
|
|
@@ -28,4 +28,5 @@ RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /
|
|
|
28
28
|
USER appuser
|
|
29
29
|
|
|
30
30
|
# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
|
|
31
|
+
# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
31
32
|
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--timeout", "0", "-k", "uvicorn.workers.UvicornWorker", "app:app"]
|
|
@@ -2,11 +2,11 @@ import uvicorn
|
|
|
2
2
|
from fastapi import FastAPI, HTTPException, Request
|
|
3
3
|
from uuid import uuid4
|
|
4
4
|
import os
|
|
5
|
-
import requests
|
|
6
5
|
import asyncio
|
|
7
6
|
import whisper
|
|
8
7
|
from whisper.utils import get_writer
|
|
9
8
|
from fastapi.encoders import jsonable_encoder
|
|
9
|
+
import time
|
|
10
10
|
|
|
11
11
|
model_download_root = './models'
|
|
12
12
|
model = whisper.load_model("large", download_root=model_download_root) #large, tiny
|
|
@@ -20,96 +20,85 @@ save_directory = "./tmp" # folder for downloaded files
|
|
|
20
20
|
os.makedirs(save_directory, exist_ok=True)
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def download_remote_file(url, save_directory):
|
|
24
|
-
# Generate a unique file name with a UUID
|
|
25
|
-
unique_name = str(uuid4()) + os.path.splitext(url)[-1]
|
|
26
|
-
save_path = os.path.join(save_directory, unique_name)
|
|
27
|
-
|
|
28
|
-
# Download the remote file
|
|
29
|
-
response = requests.get(url, stream=True)
|
|
30
|
-
response.raise_for_status()
|
|
31
|
-
|
|
32
|
-
# Save the downloaded file with the unique name
|
|
33
|
-
with open(save_path, 'wb') as file:
|
|
34
|
-
for chunk in response.iter_content(chunk_size=8192):
|
|
35
|
-
file.write(chunk)
|
|
36
|
-
|
|
37
|
-
return [unique_name, save_path]
|
|
38
|
-
|
|
39
23
|
def delete_tmp_file(file_path):
|
|
40
24
|
try:
|
|
41
25
|
os.remove(file_path)
|
|
42
26
|
print(f"Temporary file '{file_path}' has been deleted.")
|
|
43
27
|
except OSError as e:
|
|
44
28
|
print(f"Error: {e.strerror}")
|
|
45
|
-
|
|
46
|
-
def modify_segments(result):
|
|
47
|
-
modified_segments = []
|
|
48
29
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
word = word_info['word']
|
|
53
|
-
start = word_info['start']
|
|
54
|
-
end = word_info['end']
|
|
55
|
-
|
|
56
|
-
modified_segment = {} #segment.copy()
|
|
57
|
-
modified_segment['id'] = id
|
|
58
|
-
modified_segment['text'] = word
|
|
59
|
-
modified_segment['start'] = start
|
|
60
|
-
modified_segment['end'] = end
|
|
61
|
-
modified_segments.append(modified_segment)
|
|
62
|
-
id+=1
|
|
30
|
+
def transcribe(params):
|
|
31
|
+
if 'fileurl' not in params:
|
|
32
|
+
raise HTTPException(status_code=400, detail="fileurl parameter is required")
|
|
63
33
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def transcribe(fileurl):
|
|
67
|
-
print(f"Downloading file from: {fileurl}")
|
|
68
|
-
[unique_file_name, save_path] = download_remote_file(
|
|
69
|
-
fileurl, save_directory)
|
|
70
|
-
print(f"Downloaded file saved as: {unique_file_name}")
|
|
34
|
+
fileurl = params["fileurl"]
|
|
71
35
|
|
|
72
|
-
|
|
73
|
-
|
|
36
|
+
#word_timestamps bool, default True
|
|
37
|
+
word_timestamps = True
|
|
38
|
+
if 'word_timestamps' in params: #parse as bool
|
|
39
|
+
word_timestamps = False if params['word_timestamps'] == 'False' else True
|
|
74
40
|
|
|
75
|
-
|
|
41
|
+
print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
|
|
42
|
+
start_time = time.time()
|
|
43
|
+
result = model.transcribe(fileurl, word_timestamps=word_timestamps)
|
|
44
|
+
end_time = time.time()
|
|
45
|
+
execution_time = end_time - start_time
|
|
46
|
+
print("Transcribe execution time:", execution_time, "seconds")
|
|
76
47
|
|
|
77
48
|
srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
|
|
78
49
|
|
|
79
50
|
print(f"Saving transcription as : {srtpath}")
|
|
80
51
|
writer = get_writer("srt", save_directory)
|
|
81
|
-
|
|
82
|
-
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
writer_args = {'highlight_words': False, 'max_line_count': None, 'max_line_width': None, 'max_words_per_line': None}
|
|
55
|
+
if 'highlight_words' in params: #parse as bool
|
|
56
|
+
writer_args['highlight_words'] = params['highlight_words'] == 'True'
|
|
57
|
+
if 'max_line_count' in params: #parse as int
|
|
58
|
+
writer_args['max_line_count'] = int(params['max_line_count'])
|
|
59
|
+
if 'max_line_width' in params: #parse as int
|
|
60
|
+
writer_args['max_line_width'] = int(params['max_line_width'])
|
|
61
|
+
if 'max_words_per_line' in params: #parse as int
|
|
62
|
+
writer_args['max_words_per_line'] = int(params['max_words_per_line'])
|
|
63
|
+
|
|
64
|
+
# if and only if fileurl and word_timestamps=True, max_words_per_line=1
|
|
65
|
+
if fileurl and word_timestamps and len(params) <= 2:
|
|
66
|
+
writer_args['max_words_per_line'] = 1
|
|
67
|
+
|
|
68
|
+
# writer_args = {arg: args.pop(arg) for arg in word_options if arg in args}
|
|
69
|
+
writer(result, srtpath, **writer_args)
|
|
70
|
+
|
|
83
71
|
|
|
84
72
|
with open(srtpath, "r") as f:
|
|
85
73
|
srtstr = f.read()
|
|
86
74
|
|
|
87
|
-
# clean up tmp files
|
|
88
|
-
delete_tmp_file(save_path)
|
|
75
|
+
# clean up tmp out files
|
|
89
76
|
delete_tmp_file(srtpath)
|
|
90
77
|
|
|
91
|
-
print(f"Transcription
|
|
78
|
+
print(f"Transcription of file {fileurl} completed")
|
|
92
79
|
return srtstr
|
|
93
80
|
|
|
94
81
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
async def root(request: Request):
|
|
82
|
+
async def get_params(request: Request):
|
|
83
|
+
params = {}
|
|
98
84
|
if request.method == "POST":
|
|
99
85
|
body = jsonable_encoder(await request.json())
|
|
100
|
-
|
|
86
|
+
params = body
|
|
101
87
|
else:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
return "No fileurl given!"
|
|
88
|
+
params = dict(request.query_params)
|
|
89
|
+
return params
|
|
105
90
|
|
|
91
|
+
@app.get("/")
|
|
92
|
+
@app.post("/")
|
|
93
|
+
async def root(request: Request):
|
|
106
94
|
if semaphore.locked():
|
|
107
95
|
raise HTTPException(status_code=429, detail="Too Many Requests")
|
|
108
96
|
|
|
97
|
+
params = await get_params(request)
|
|
109
98
|
async with semaphore:
|
|
110
|
-
result = await asyncio.to_thread(transcribe,
|
|
99
|
+
result = await asyncio.to_thread(transcribe, params)
|
|
111
100
|
return result
|
|
112
101
|
|
|
113
102
|
if __name__ == "__main__":
|
|
114
|
-
print("Starting
|
|
103
|
+
print("Starting APP Whisper server", flush=True)
|
|
115
104
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.24",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"private": false,
|
|
6
6
|
"repository": {
|
|
@@ -241,7 +241,7 @@ class ModelPlugin {
|
|
|
241
241
|
console.log(`\x1b[36m${modelInput}\x1b[0m`);
|
|
242
242
|
}
|
|
243
243
|
|
|
244
|
-
console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
|
|
244
|
+
console.log(`\x1b[34m> ${JSON.stringify(this.parseResponse(responseData))}\x1b[0m`);
|
|
245
245
|
|
|
246
246
|
prompt && prompt.debugInfo && (prompt.debugInfo += `\n${JSON.stringify(data)}`);
|
|
247
247
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import RequestDurationEstimator from '../../lib/requestDurationEstimator.js';
|
|
2
2
|
import pubsub from '../pubsub.js';
|
|
3
3
|
import ModelPlugin from './modelPlugin.js';
|
|
4
|
+
import { request } from '../../lib/request.js';
|
|
4
5
|
|
|
5
6
|
const requestDurationEstimator = new RequestDurationEstimator(10);
|
|
6
7
|
|
|
@@ -17,6 +18,22 @@ class OpenAIDallE3Plugin extends ModelPlugin {
|
|
|
17
18
|
* we keep the request open and send progress updates to the client
|
|
18
19
|
* over a websocket.
|
|
19
20
|
*/
|
|
21
|
+
|
|
22
|
+
async executeRequest(url, data, params, headers, prompt, requestId, pathway) {
|
|
23
|
+
try {
|
|
24
|
+
this.aiRequestStartTime = new Date();
|
|
25
|
+
this.requestId = requestId;
|
|
26
|
+
this.logRequestStart(url, data);
|
|
27
|
+
const responseData = await request({ url, data, params, headers, cache: this.shouldCache }, this.modelName, this.requestId, pathway);
|
|
28
|
+
|
|
29
|
+
this.logRequestData(data, responseData, prompt);
|
|
30
|
+
return this.parseResponse(responseData);
|
|
31
|
+
} catch (error) {
|
|
32
|
+
// Log the error and continue
|
|
33
|
+
console.error(error);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
20
37
|
async execute(text, parameters, _, pathwayResolver) {
|
|
21
38
|
const url = this.requestUrl(text);
|
|
22
39
|
const data = JSON.stringify({ prompt: text });
|
|
@@ -49,29 +66,25 @@ class OpenAIDallE3Plugin extends ModelPlugin {
|
|
|
49
66
|
let attemptCount = 0;
|
|
50
67
|
let data = null;
|
|
51
68
|
|
|
52
|
-
requestPromise
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
requestPromise
|
|
70
|
+
.then((response) => handleResponse(response))
|
|
71
|
+
.catch((error) => handleResponse(error));
|
|
72
|
+
|
|
73
|
+
function handleResponse(response) {
|
|
74
|
+
const status = response?.error ? "failed" : "succeeded";
|
|
75
|
+
const data = JSON.stringify(response?.error ? response : response);
|
|
76
|
+
|
|
77
|
+
const requestProgress = {
|
|
78
|
+
requestId,
|
|
79
|
+
status,
|
|
80
|
+
progress: 1,
|
|
81
|
+
data,
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
state.status = status;
|
|
65
85
|
requestDurationEstimator.endRequest();
|
|
66
|
-
pubsub.publish(
|
|
67
|
-
|
|
68
|
-
requestId,
|
|
69
|
-
status: "failed",
|
|
70
|
-
progress: 1,
|
|
71
|
-
data: JSON.stringify(error),
|
|
72
|
-
}
|
|
73
|
-
});
|
|
74
|
-
});
|
|
86
|
+
pubsub.publish("REQUEST_PROGRESS", { requestProgress });
|
|
87
|
+
}
|
|
75
88
|
|
|
76
89
|
// publish an update every 2 seconds, using the request duration estimator to calculate
|
|
77
90
|
// the percent complete
|
|
@@ -96,7 +109,7 @@ class OpenAIDallE3Plugin extends ModelPlugin {
|
|
|
96
109
|
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
97
110
|
}
|
|
98
111
|
while (state.status !== "succeeded" && attemptCount++ < 30);
|
|
99
|
-
|
|
112
|
+
|
|
100
113
|
return data;
|
|
101
114
|
}
|
|
102
115
|
}
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
import uvicorn
|
|
2
|
-
from fastapi import FastAPI
|
|
3
|
-
import stable_whisper
|
|
4
|
-
from uuid import uuid4
|
|
5
|
-
import requests
|
|
6
|
-
import os
|
|
7
|
-
|
|
8
|
-
model_download_root = './models'
|
|
9
|
-
model = stable_whisper.load_model('large', download_root=model_download_root) #large, tiny
|
|
10
|
-
|
|
11
|
-
app = FastAPI()
|
|
12
|
-
|
|
13
|
-
save_directory = "./tmp" # folder for downloaded files
|
|
14
|
-
os.makedirs(save_directory, exist_ok=True)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def download_remote_file(url, save_directory):
|
|
18
|
-
# Generate a unique file name with a UUID
|
|
19
|
-
unique_name = str(uuid4()) + os.path.splitext(url)[-1]
|
|
20
|
-
save_path = os.path.join(save_directory, unique_name)
|
|
21
|
-
|
|
22
|
-
# Download the remote file
|
|
23
|
-
response = requests.get(url, stream=True)
|
|
24
|
-
response.raise_for_status()
|
|
25
|
-
|
|
26
|
-
# Save the downloaded file with the unique name
|
|
27
|
-
with open(save_path, 'wb') as file:
|
|
28
|
-
for chunk in response.iter_content(chunk_size=8192):
|
|
29
|
-
file.write(chunk)
|
|
30
|
-
|
|
31
|
-
return [unique_name, save_path]
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def delete_tmp_file(file_path):
|
|
35
|
-
try:
|
|
36
|
-
os.remove(file_path)
|
|
37
|
-
print(f"Temporary file '{file_path}' has been deleted.")
|
|
38
|
-
except OSError as e:
|
|
39
|
-
print(f"Error: {e.strerror}")
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
async def transcribe(fileurl):
|
|
43
|
-
print(f"Downloading file from: {fileurl}")
|
|
44
|
-
[unique_file_name, save_path] = download_remote_file(
|
|
45
|
-
fileurl, save_directory)
|
|
46
|
-
print(f"Downloaded file saved as: {unique_file_name}")
|
|
47
|
-
|
|
48
|
-
print(f"Transcribing file")
|
|
49
|
-
result = model.transcribe(save_path)
|
|
50
|
-
|
|
51
|
-
srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
|
|
52
|
-
|
|
53
|
-
print(f"Saving transcription as : {srtpath}")
|
|
54
|
-
result.to_srt_vtt(srtpath, segment_level=False)
|
|
55
|
-
|
|
56
|
-
with open(srtpath, "r") as f:
|
|
57
|
-
srtstr = f.read()
|
|
58
|
-
|
|
59
|
-
# clean up tmp files
|
|
60
|
-
delete_tmp_file(save_path)
|
|
61
|
-
delete_tmp_file(srtpath)
|
|
62
|
-
|
|
63
|
-
print(f"Transcription done.")
|
|
64
|
-
return srtstr
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
@app.get("/")
|
|
68
|
-
async def root(fileurl: str):
|
|
69
|
-
if not fileurl:
|
|
70
|
-
return "No fileurl given!"
|
|
71
|
-
result = await transcribe(fileurl)
|
|
72
|
-
return result
|
|
73
|
-
|
|
74
|
-
if __name__ == "__main__":
|
|
75
|
-
print("Starting server", flush=True)
|
|
76
|
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|