@aj-archipelago/cortex 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/config.js CHANGED
@@ -164,6 +164,11 @@ var config = convict({
164
164
  default: 'null',
165
165
  env: 'WHISPER_TS_API_URL'
166
166
  },
167
+ subscriptionKeepAlive: {
168
+ format: Number,
169
+ default: 0,
170
+ env: 'SUBSCRIPTION_KEEP_ALIVE'
171
+ },
167
172
  });
168
173
 
169
174
  // Read in environment variables and set up service configuration
@@ -28,4 +28,5 @@ RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /
28
28
  USER appuser
29
29
 
30
30
  # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
31
+ # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
31
32
  CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--timeout", "0", "-k", "uvicorn.workers.UvicornWorker", "app:app"]
@@ -2,11 +2,11 @@ import uvicorn
2
2
  from fastapi import FastAPI, HTTPException, Request
3
3
  from uuid import uuid4
4
4
  import os
5
- import requests
6
5
  import asyncio
7
6
  import whisper
8
7
  from whisper.utils import get_writer
9
8
  from fastapi.encoders import jsonable_encoder
9
+ import time
10
10
 
11
11
  model_download_root = './models'
12
12
  model = whisper.load_model("large", download_root=model_download_root) #large, tiny
@@ -20,96 +20,85 @@ save_directory = "./tmp" # folder for downloaded files
20
20
  os.makedirs(save_directory, exist_ok=True)
21
21
 
22
22
 
23
- def download_remote_file(url, save_directory):
24
- # Generate a unique file name with a UUID
25
- unique_name = str(uuid4()) + os.path.splitext(url)[-1]
26
- save_path = os.path.join(save_directory, unique_name)
27
-
28
- # Download the remote file
29
- response = requests.get(url, stream=True)
30
- response.raise_for_status()
31
-
32
- # Save the downloaded file with the unique name
33
- with open(save_path, 'wb') as file:
34
- for chunk in response.iter_content(chunk_size=8192):
35
- file.write(chunk)
36
-
37
- return [unique_name, save_path]
38
-
39
23
  def delete_tmp_file(file_path):
40
24
  try:
41
25
  os.remove(file_path)
42
26
  print(f"Temporary file '{file_path}' has been deleted.")
43
27
  except OSError as e:
44
28
  print(f"Error: {e.strerror}")
45
-
46
- def modify_segments(result):
47
- modified_segments = []
48
29
 
49
- id = 0
50
- for segment in result["segments"]:
51
- for word_info in segment['words']:
52
- word = word_info['word']
53
- start = word_info['start']
54
- end = word_info['end']
55
-
56
- modified_segment = {} #segment.copy()
57
- modified_segment['id'] = id
58
- modified_segment['text'] = word
59
- modified_segment['start'] = start
60
- modified_segment['end'] = end
61
- modified_segments.append(modified_segment)
62
- id+=1
30
+ def transcribe(params):
31
+ if 'fileurl' not in params:
32
+ raise HTTPException(status_code=400, detail="fileurl parameter is required")
63
33
 
64
- result["segments"] = modified_segments
65
-
66
- def transcribe(fileurl):
67
- print(f"Downloading file from: {fileurl}")
68
- [unique_file_name, save_path] = download_remote_file(
69
- fileurl, save_directory)
70
- print(f"Downloaded file saved as: {unique_file_name}")
34
+ fileurl = params["fileurl"]
71
35
 
72
- print(f"Transcribing file")
73
- result = model.transcribe(save_path, word_timestamps=True)
36
+ #word_timestamps bool, default True
37
+ word_timestamps = True
38
+ if 'word_timestamps' in params: #parse as bool
39
+ word_timestamps = False if params['word_timestamps'] == 'False' else True
74
40
 
75
- modify_segments(result)
41
+ print(f"Transcribing file {fileurl} with word_timestamps={word_timestamps}")
42
+ start_time = time.time()
43
+ result = model.transcribe(fileurl, word_timestamps=word_timestamps)
44
+ end_time = time.time()
45
+ execution_time = end_time - start_time
46
+ print("Transcribe execution time:", execution_time, "seconds")
76
47
 
77
48
  srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
78
49
 
79
50
  print(f"Saving transcription as : {srtpath}")
80
51
  writer = get_writer("srt", save_directory)
81
- with open(srtpath, 'w', encoding='utf-8') as file_obj :
82
- writer.write_result(result, file_obj)
52
+
53
+
54
+ writer_args = {'highlight_words': False, 'max_line_count': None, 'max_line_width': None, 'max_words_per_line': None}
55
+ if 'highlight_words' in params: #parse as bool
56
+ writer_args['highlight_words'] = params['highlight_words'] == 'True'
57
+ if 'max_line_count' in params: #parse as int
58
+ writer_args['max_line_count'] = int(params['max_line_count'])
59
+ if 'max_line_width' in params: #parse as int
60
+ writer_args['max_line_width'] = int(params['max_line_width'])
61
+ if 'max_words_per_line' in params: #parse as int
62
+ writer_args['max_words_per_line'] = int(params['max_words_per_line'])
63
+
64
+ # if and only if fileurl and word_timestamps=True, max_words_per_line=1
65
+ if fileurl and word_timestamps and len(params) <= 2:
66
+ writer_args['max_words_per_line'] = 1
67
+
68
+ # writer_args = {arg: args.pop(arg) for arg in word_options if arg in args}
69
+ writer(result, srtpath, **writer_args)
70
+
83
71
 
84
72
  with open(srtpath, "r") as f:
85
73
  srtstr = f.read()
86
74
 
87
- # clean up tmp files
88
- delete_tmp_file(save_path)
75
+ # clean up tmp out files
89
76
  delete_tmp_file(srtpath)
90
77
 
91
- print(f"Transcription done.")
78
+ print(f"Transcription of file {fileurl} completed")
92
79
  return srtstr
93
80
 
94
81
 
95
- @app.get("/")
96
- @app.post("/")
97
- async def root(request: Request):
82
+ async def get_params(request: Request):
83
+ params = {}
98
84
  if request.method == "POST":
99
85
  body = jsonable_encoder(await request.json())
100
- fileurl = body.get("fileurl")
86
+ params = body
101
87
  else:
102
- fileurl = request.query_params.get("fileurl")
103
- if not fileurl:
104
- return "No fileurl given!"
88
+ params = dict(request.query_params)
89
+ return params
105
90
 
91
+ @app.get("/")
92
+ @app.post("/")
93
+ async def root(request: Request):
106
94
  if semaphore.locked():
107
95
  raise HTTPException(status_code=429, detail="Too Many Requests")
108
96
 
97
+ params = await get_params(request)
109
98
  async with semaphore:
110
- result = await asyncio.to_thread(transcribe, fileurl)
99
+ result = await asyncio.to_thread(transcribe, params)
111
100
  return result
112
101
 
113
102
  if __name__ == "__main__":
114
- print("Starting APPWhisper server", flush=True)
103
+ print("Starting APP Whisper server", flush=True)
115
104
  uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -2,7 +2,7 @@ version: '3.4'
2
2
 
3
3
  services:
4
4
  cortex:
5
- image: cortex
5
+ image: arc/whisper
6
6
  build:
7
7
  context: .
8
8
  dockerfile: ./Dockerfile
@@ -2,9 +2,9 @@ version: '3.4'
2
2
 
3
3
  services:
4
4
  cortex:
5
- image: cortex
5
+ image: arc/whisper
6
6
  build:
7
7
  context: .
8
8
  dockerfile: ./Dockerfile
9
9
  ports:
10
- - 8000:8000
10
+ - 8000:8000
@@ -2,5 +2,4 @@
2
2
  fastapi[all]==0.89.0
3
3
  uvicorn[standard]==0.20.0
4
4
  gunicorn==20.1.0
5
- whisper
6
- stable-ts
5
+ openai-whisper
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.0.22",
3
+ "version": "1.0.24",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "private": false,
6
6
  "repository": {
@@ -13,7 +13,7 @@ import CohereSummarizePlugin from './plugins/cohereSummarizePlugin.js';
13
13
  import AzureCognitivePlugin from './plugins/azureCognitivePlugin.js';
14
14
  import OpenAiEmbeddingsPlugin from './plugins/openAiEmbeddingsPlugin.js';
15
15
  import OpenAIImagePlugin from './plugins/openAiImagePlugin.js';
16
- import OpenAIDallE3Plugin from './plugins/openAiDalle3Plugin.js';
16
+ import OpenAIDallE3Plugin from './plugins/openAiDallE3Plugin.js';
17
17
 
18
18
  class PathwayPrompter {
19
19
  constructor(config, pathway, modelName, model) {
@@ -241,7 +241,7 @@ class ModelPlugin {
241
241
  console.log(`\x1b[36m${modelInput}\x1b[0m`);
242
242
  }
243
243
 
244
- console.log(`\x1b[34m> ${this.parseResponse(responseData)}\x1b[0m`);
244
+ console.log(`\x1b[34m> ${JSON.stringify(this.parseResponse(responseData))}\x1b[0m`);
245
245
 
246
246
  prompt && prompt.debugInfo && (prompt.debugInfo += `\n${JSON.stringify(data)}`);
247
247
  }
@@ -1,6 +1,7 @@
1
1
  import RequestDurationEstimator from '../../lib/requestDurationEstimator.js';
2
2
  import pubsub from '../pubsub.js';
3
3
  import ModelPlugin from './modelPlugin.js';
4
+ import { request } from '../../lib/request.js';
4
5
 
5
6
  const requestDurationEstimator = new RequestDurationEstimator(10);
6
7
 
@@ -17,6 +18,22 @@ class OpenAIDallE3Plugin extends ModelPlugin {
17
18
  * we keep the request open and send progress updates to the client
18
19
  * over a websocket.
19
20
  */
21
+
22
+ async executeRequest(url, data, params, headers, prompt, requestId, pathway) {
23
+ try {
24
+ this.aiRequestStartTime = new Date();
25
+ this.requestId = requestId;
26
+ this.logRequestStart(url, data);
27
+ const responseData = await request({ url, data, params, headers, cache: this.shouldCache }, this.modelName, this.requestId, pathway);
28
+
29
+ this.logRequestData(data, responseData, prompt);
30
+ return this.parseResponse(responseData);
31
+ } catch (error) {
32
+ // Log the error and continue
33
+ console.error(error);
34
+ }
35
+ }
36
+
20
37
  async execute(text, parameters, _, pathwayResolver) {
21
38
  const url = this.requestUrl(text);
22
39
  const data = JSON.stringify({ prompt: text });
@@ -49,29 +66,25 @@ class OpenAIDallE3Plugin extends ModelPlugin {
49
66
  let attemptCount = 0;
50
67
  let data = null;
51
68
 
52
- requestPromise.then((response) => {
53
- state.status = "succeeded";
54
- requestDurationEstimator.endRequest();
55
- pubsub.publish('REQUEST_PROGRESS', {
56
- requestProgress: {
57
- requestId,
58
- status: "succeeded",
59
- progress: 1,
60
- data: JSON.stringify(response),
61
- }
62
- });
63
- }).catch((error) => {
64
- state.status = "failed";
69
+ requestPromise
70
+ .then((response) => handleResponse(response))
71
+ .catch((error) => handleResponse(error));
72
+
73
+ function handleResponse(response) {
74
+ const status = response?.error ? "failed" : "succeeded";
75
+ const data = JSON.stringify(response?.error ? response : response);
76
+
77
+ const requestProgress = {
78
+ requestId,
79
+ status,
80
+ progress: 1,
81
+ data,
82
+ };
83
+
84
+ state.status = status;
65
85
  requestDurationEstimator.endRequest();
66
- pubsub.publish('REQUEST_PROGRESS', {
67
- requestProgress: {
68
- requestId,
69
- status: "failed",
70
- progress: 1,
71
- data: JSON.stringify(error),
72
- }
73
- });
74
- });
86
+ pubsub.publish("REQUEST_PROGRESS", { requestProgress });
87
+ }
75
88
 
76
89
  // publish an update every 2 seconds, using the request duration estimator to calculate
77
90
  // the percent complete
@@ -96,7 +109,7 @@ class OpenAIDallE3Plugin extends ModelPlugin {
96
109
  await new Promise(resolve => setTimeout(resolve, 2000));
97
110
  }
98
111
  while (state.status !== "succeeded" && attemptCount++ < 30);
99
-
112
+
100
113
  return data;
101
114
  }
102
115
  }
@@ -1,76 +0,0 @@
1
- import uvicorn
2
- from fastapi import FastAPI
3
- import stable_whisper
4
- from uuid import uuid4
5
- import requests
6
- import os
7
-
8
- model_download_root = './models'
9
- model = stable_whisper.load_model('large', download_root=model_download_root) #large, tiny
10
-
11
- app = FastAPI()
12
-
13
- save_directory = "./tmp" # folder for downloaded files
14
- os.makedirs(save_directory, exist_ok=True)
15
-
16
-
17
- def download_remote_file(url, save_directory):
18
- # Generate a unique file name with a UUID
19
- unique_name = str(uuid4()) + os.path.splitext(url)[-1]
20
- save_path = os.path.join(save_directory, unique_name)
21
-
22
- # Download the remote file
23
- response = requests.get(url, stream=True)
24
- response.raise_for_status()
25
-
26
- # Save the downloaded file with the unique name
27
- with open(save_path, 'wb') as file:
28
- for chunk in response.iter_content(chunk_size=8192):
29
- file.write(chunk)
30
-
31
- return [unique_name, save_path]
32
-
33
-
34
- def delete_tmp_file(file_path):
35
- try:
36
- os.remove(file_path)
37
- print(f"Temporary file '{file_path}' has been deleted.")
38
- except OSError as e:
39
- print(f"Error: {e.strerror}")
40
-
41
-
42
- async def transcribe(fileurl):
43
- print(f"Downloading file from: {fileurl}")
44
- [unique_file_name, save_path] = download_remote_file(
45
- fileurl, save_directory)
46
- print(f"Downloaded file saved as: {unique_file_name}")
47
-
48
- print(f"Transcribing file")
49
- result = model.transcribe(save_path)
50
-
51
- srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
52
-
53
- print(f"Saving transcription as : {srtpath}")
54
- result.to_srt_vtt(srtpath, segment_level=False)
55
-
56
- with open(srtpath, "r") as f:
57
- srtstr = f.read()
58
-
59
- # clean up tmp files
60
- delete_tmp_file(save_path)
61
- delete_tmp_file(srtpath)
62
-
63
- print(f"Transcription done.")
64
- return srtstr
65
-
66
-
67
- @app.get("/")
68
- async def root(fileurl: str):
69
- if not fileurl:
70
- return "No fileurl given!"
71
- result = await transcribe(fileurl)
72
- return result
73
-
74
- if __name__ == "__main__":
75
- print("Starting server", flush=True)
76
- uvicorn.run(app, host="0.0.0.0", port=8000)