@aj-archipelago/cortex 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +3 -3
  2. package/config/default.example.json +18 -0
  3. package/config.js +28 -8
  4. package/helper_apps/MediaFileChunker/Dockerfile +20 -0
  5. package/helper_apps/MediaFileChunker/package-lock.json +18 -18
  6. package/helper_apps/MediaFileChunker/package.json +1 -1
  7. package/helper_apps/WhisperX/.dockerignore +27 -0
  8. package/helper_apps/WhisperX/Dockerfile +31 -0
  9. package/helper_apps/WhisperX/app-ts.py +76 -0
  10. package/helper_apps/WhisperX/app.py +115 -0
  11. package/helper_apps/WhisperX/docker-compose.debug.yml +12 -0
  12. package/helper_apps/WhisperX/docker-compose.yml +10 -0
  13. package/helper_apps/WhisperX/requirements.txt +6 -0
  14. package/index.js +1 -1
  15. package/lib/gcpAuthTokenHelper.js +37 -0
  16. package/lib/redisSubscription.js +1 -1
  17. package/package.json +9 -7
  18. package/pathways/basePathway.js +2 -2
  19. package/pathways/index.js +8 -2
  20. package/pathways/summary.js +2 -2
  21. package/pathways/sys_openai_chat.js +19 -0
  22. package/pathways/sys_openai_completion.js +11 -0
  23. package/pathways/{lc_test.mjs → test_langchain.mjs} +1 -1
  24. package/pathways/test_palm_chat.js +31 -0
  25. package/pathways/transcribe.js +3 -1
  26. package/pathways/translate.js +2 -1
  27. package/{graphql → server}/graphql.js +64 -62
  28. package/{graphql → server}/pathwayPrompter.js +9 -1
  29. package/{graphql → server}/pathwayResolver.js +46 -47
  30. package/{graphql → server}/plugins/azureTranslatePlugin.js +22 -0
  31. package/{graphql → server}/plugins/modelPlugin.js +15 -42
  32. package/server/plugins/openAiChatPlugin.js +134 -0
  33. package/{graphql → server}/plugins/openAiCompletionPlugin.js +38 -2
  34. package/{graphql → server}/plugins/openAiWhisperPlugin.js +59 -7
  35. package/server/plugins/palmChatPlugin.js +229 -0
  36. package/server/plugins/palmCompletionPlugin.js +134 -0
  37. package/{graphql → server}/prompt.js +11 -4
  38. package/server/rest.js +321 -0
  39. package/{graphql → server}/typeDef.js +30 -13
  40. package/tests/chunkfunction.test.js +1 -1
  41. package/tests/config.test.js +1 -1
  42. package/tests/main.test.js +282 -43
  43. package/tests/mocks.js +1 -1
  44. package/tests/modelPlugin.test.js +3 -15
  45. package/tests/openAiChatPlugin.test.js +125 -0
  46. package/tests/openai_api.test.js +147 -0
  47. package/tests/palmChatPlugin.test.js +256 -0
  48. package/tests/palmCompletionPlugin.test.js +87 -0
  49. package/tests/pathwayResolver.test.js +1 -1
  50. package/tests/server.js +23 -0
  51. package/tests/truncateMessages.test.js +1 -1
  52. package/graphql/plugins/openAiChatPlugin.js +0 -46
  53. package/tests/chunking.test.js +0 -155
  54. package/tests/translate.test.js +0 -126
  55. /package/{graphql → server}/chunker.js +0 -0
  56. /package/{graphql → server}/parser.js +0 -0
  57. /package/{graphql → server}/pathwayResponseParser.js +0 -0
  58. /package/{graphql → server}/plugins/localModelPlugin.js +0 -0
  59. /package/{graphql → server}/pubsub.js +0 -0
  60. /package/{graphql → server}/requestState.js +0 -0
  61. /package/{graphql → server}/resolver.js +0 -0
  62. /package/{graphql → server}/subscriptions.js +0 -0
package/README.md CHANGED
@@ -17,7 +17,7 @@ Just about anything! It's kind of an LLM swiss army knife. Here are some ideas:
17
17
  ## Features
18
18
 
19
19
  * Simple architecture to build custom functional endpoints (called `pathways`), that implement common NL AI tasks. Default pathways include chat, summarization, translation, paraphrasing, completion, spelling and grammar correction, entity extraction, sentiment analysis, and bias analysis.
20
- * Allows for building multi-model, multi-tool, multi-vendor, and model-agnostic pathways (choose the right model or combination of models and tools for the job, implement redundancy) with built-in support for OpenAI GPT-3, GPT-3.5 (chatGPT), and GPT-4 models - both from OpenAI directly and through Azure OpenAI, OpenAI Whisper, Azure Translator, LangChain.js and more.
20
+ * Allows for building multi-model, multi-tool, multi-vendor, and model-agnostic pathways (choose the right model or combination of models and tools for the job, implement redundancy) with built-in support for OpenAI GPT-3, GPT-3.5 (chatGPT), and GPT-4 models - both from OpenAI directly and through Azure OpenAI, PaLM Text and PaLM Chat from Google, OpenAI Whisper, Azure Translator, LangChain.js and more.
21
21
  * Easy, templatized prompt definition with flexible support for most prompt engineering techniques and strategies ranging from simple single prompts to complex custom prompt chains with context continuity.
22
22
  * Built in support for long-running, asynchronous operations with progress updates or streaming responses
23
23
  * Integrated context persistence: have your pathways "remember" whatever you want and use it on the next request to the model
@@ -198,8 +198,8 @@ The core pathway `summary.js` below is implemented using custom pathway logic an
198
198
  // This module exports a prompt that takes an input text and generates a summary using a custom resolver.
199
199
 
200
200
  // Import required modules
201
- import { semanticTruncate } from '../graphql/chunker.js';
202
- import { PathwayResolver } from '../graphql/pathwayResolver.js';
201
+ import { semanticTruncate } from '../server/chunker.js';
202
+ import { PathwayResolver } from '../server/pathwayResolver.js';
203
203
 
204
204
  export default {
205
205
  // The main prompt function that takes the input text and asks to generate a summary.
@@ -51,6 +51,24 @@
51
51
  "requestsPerSecond": 10,
52
52
  "maxTokenLength": 8192
53
53
  },
54
+ "palm-text": {
55
+ "type": "PALM-COMPLETION",
56
+ "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/text-bison@001:predict",
57
+ "headers": {
58
+ "Content-Type": "application/json"
59
+ },
60
+ "requestsPerSecond": 10,
61
+ "maxTokenLength": 2048
62
+ },
63
+ "palm-chat": {
64
+ "type": "PALM-CHAT",
65
+ "url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/chat-bison@001:predict",
66
+ "headers": {
67
+ "Content-Type": "application/json"
68
+ },
69
+ "requestsPerSecond": 10,
70
+ "maxTokenLength": 2048
71
+ },
54
72
  "local-llama13B": {
55
73
  "type": "LOCAL-CPP-MODEL",
56
74
  "executablePath": "../llm/llama.cpp/main",
package/config.js CHANGED
@@ -3,6 +3,7 @@ import convict from 'convict';
3
3
  import HandleBars from './lib/handleBars.js';
4
4
  import fs from 'fs';
5
5
  import { fileURLToPath, pathToFileURL } from 'url';
6
+ import GcpAuthTokenHelper from './lib/gcpAuthTokenHelper.js';
6
7
 
7
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
9
 
@@ -57,7 +58,8 @@ var config = convict({
57
58
  cortexApiKey: {
58
59
  format: String,
59
60
  default: null,
60
- env: 'CORTEX_API_KEY'
61
+ env: 'CORTEX_API_KEY',
62
+ sensitive: true
61
63
  },
62
64
  defaultModelName: {
63
65
  format: String,
@@ -67,19 +69,21 @@ var config = convict({
67
69
  models: {
68
70
  format: Object,
69
71
  default: {
70
- "oai-td3": {
71
- "type": "OPENAI-COMPLETION",
72
- "url": "{{openaiApiUrl}}",
72
+ "oai-gpturbo": {
73
+ "type": "OPENAI-CHAT",
74
+ "url": "https://api.openai.com/v1/chat/completions",
73
75
  "headers": {
74
- "Authorization": "Bearer {{openaiApiKey}}",
76
+ "Authorization": "Bearer {{OPENAI_API_KEY}}",
75
77
  "Content-Type": "application/json"
76
78
  },
77
79
  "params": {
78
- "model": "{{openaiDefaultModel}}"
80
+ "model": "gpt-3.5-turbo"
79
81
  },
82
+ "requestsPerSecond": 10,
83
+ "maxTokenLength": 8192
80
84
  },
81
85
  "oai-whisper": {
82
- "type": "OPENAI_WHISPER",
86
+ "type": "OPENAI-WHISPER",
83
87
  "url": "https://api.openai.com/v1/audio/transcriptions",
84
88
  "headers": {
85
89
  "Authorization": "Bearer {{OPENAI_API_KEY}}"
@@ -93,7 +97,7 @@ var config = convict({
93
97
  },
94
98
  openaiDefaultModel: {
95
99
  format: String,
96
- default: 'text-davinci-003',
100
+ default: 'gpt-3.5-turbo',
97
101
  env: 'OPENAI_DEFAULT_MODEL'
98
102
  },
99
103
  openaiApiKey: {
@@ -117,6 +121,17 @@ var config = convict({
117
121
  default: 'null',
118
122
  env: 'WHISPER_MEDIA_API_URL'
119
123
  },
124
+ whisperTSApiUrl: {
125
+ format: String,
126
+ default: 'null',
127
+ env: 'WHISPER_TS_API_URL'
128
+ },
129
+ gcpServiceAccountKey: {
130
+ format: String,
131
+ default: null,
132
+ env: 'GCP_SERVICE_ACCOUNT_KEY',
133
+ sensitive: true
134
+ },
120
135
  });
121
136
 
122
137
  // Read in environment variables and set up service configuration
@@ -135,6 +150,11 @@ if (configFile && fs.existsSync(configFile)) {
135
150
  }
136
151
  }
137
152
 
153
+ if (config.get('gcpServiceAccountKey')) {
154
+ const gcpAuthTokenHelper = new GcpAuthTokenHelper(config.getProperties());
155
+ config.set('gcpAuthTokenHelper', gcpAuthTokenHelper);
156
+ }
157
+
138
158
  // Build and load pathways to config
139
159
  const buildPathways = async (config) => {
140
160
  const { pathwaysPath, corePathwaysPath, basePathwayPath } = config.getProperties();
@@ -0,0 +1,20 @@
1
+ FROM node:18-alpine
2
+
3
+ WORKDIR /usr/src/app
4
+
5
+ COPY package*.json ./
6
+
7
+ RUN npm install
8
+
9
+ ## following 3 lines are for installing ffmepg
10
+ RUN apk update
11
+ RUN apk add
12
+ RUN apk add ffmpeg
13
+
14
+ COPY . .
15
+
16
+ EXPOSE 7071
17
+
18
+ # RUN npm run build
19
+
20
+ CMD [ "node", "start.js" ]
@@ -18,7 +18,7 @@
18
18
  "ioredis": "^5.3.1",
19
19
  "public-ip": "^6.0.1",
20
20
  "uuid": "^9.0.0",
21
- "ytdl-core": "git+ssh://git@github.com:khlevon/node-ytdl-core.git#v4.11.3-patch.1"
21
+ "ytdl-core": "github:khlevon/node-ytdl-core#v4.11.4-patch.2"
22
22
  }
23
23
  },
24
24
  "node_modules/@azure/abort-controller": {
@@ -45,9 +45,9 @@
45
45
  }
46
46
  },
47
47
  "node_modules/@azure/core-http": {
48
- "version": "3.0.0",
49
- "resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.0.tgz",
50
- "integrity": "sha512-BxI2SlGFPPz6J1XyZNIVUf0QZLBKFX+ViFjKOkzqD18J1zOINIQ8JSBKKr+i+v8+MB6LacL6Nn/sP/TE13+s2Q==",
48
+ "version": "3.0.1",
49
+ "resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.1.tgz",
50
+ "integrity": "sha512-A3x+um3cAPgQe42Lu7Iv/x8/fNjhL/nIoEfqFxfn30EyxK6zC13n+OUxzZBRC0IzQqssqIbt4INf5YG7lYYFtw==",
51
51
  "dependencies": {
52
52
  "@azure/abort-controller": "^1.0.0",
53
53
  "@azure/core-auth": "^1.3.0",
@@ -62,7 +62,7 @@
62
62
  "tslib": "^2.2.0",
63
63
  "tunnel": "^0.0.6",
64
64
  "uuid": "^8.3.0",
65
- "xml2js": "^0.4.19"
65
+ "xml2js": "^0.5.0"
66
66
  },
67
67
  "engines": {
68
68
  "node": ">=14.0.0"
@@ -1720,9 +1720,9 @@
1720
1720
  }
1721
1721
  },
1722
1722
  "node_modules/xml2js": {
1723
- "version": "0.4.23",
1724
- "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
1725
- "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
1723
+ "version": "0.5.0",
1724
+ "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz",
1725
+ "integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==",
1726
1726
  "dependencies": {
1727
1727
  "sax": ">=0.6.0",
1728
1728
  "xmlbuilder": "~11.0.0"
@@ -1741,7 +1741,7 @@
1741
1741
  },
1742
1742
  "node_modules/ytdl-core": {
1743
1743
  "version": "0.0.0-development",
1744
- "resolved": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#586971bd9aeda1cbb4600851cfef82a809833ac2",
1744
+ "resolved": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#87450450caabb91f81afa6e66758bf2f629664a1",
1745
1745
  "license": "MIT",
1746
1746
  "dependencies": {
1747
1747
  "m3u8stream": "^0.8.6",
@@ -1772,9 +1772,9 @@
1772
1772
  }
1773
1773
  },
1774
1774
  "@azure/core-http": {
1775
- "version": "3.0.0",
1776
- "resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.0.tgz",
1777
- "integrity": "sha512-BxI2SlGFPPz6J1XyZNIVUf0QZLBKFX+ViFjKOkzqD18J1zOINIQ8JSBKKr+i+v8+MB6LacL6Nn/sP/TE13+s2Q==",
1775
+ "version": "3.0.1",
1776
+ "resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.1.tgz",
1777
+ "integrity": "sha512-A3x+um3cAPgQe42Lu7Iv/x8/fNjhL/nIoEfqFxfn30EyxK6zC13n+OUxzZBRC0IzQqssqIbt4INf5YG7lYYFtw==",
1778
1778
  "requires": {
1779
1779
  "@azure/abort-controller": "^1.0.0",
1780
1780
  "@azure/core-auth": "^1.3.0",
@@ -1789,7 +1789,7 @@
1789
1789
  "tslib": "^2.2.0",
1790
1790
  "tunnel": "^0.0.6",
1791
1791
  "uuid": "^8.3.0",
1792
- "xml2js": "^0.4.19"
1792
+ "xml2js": "^0.5.0"
1793
1793
  },
1794
1794
  "dependencies": {
1795
1795
  "uuid": {
@@ -2948,9 +2948,9 @@
2948
2948
  }
2949
2949
  },
2950
2950
  "xml2js": {
2951
- "version": "0.4.23",
2952
- "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
2953
- "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
2951
+ "version": "0.5.0",
2952
+ "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz",
2953
+ "integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==",
2954
2954
  "requires": {
2955
2955
  "sax": ">=0.6.0",
2956
2956
  "xmlbuilder": "~11.0.0"
@@ -2962,8 +2962,8 @@
2962
2962
  "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA=="
2963
2963
  },
2964
2964
  "ytdl-core": {
2965
- "version": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#586971bd9aeda1cbb4600851cfef82a809833ac2",
2966
- "from": "ytdl-core@git+ssh://git@github.com:khlevon/node-ytdl-core.git#v4.11.3-patch.1",
2965
+ "version": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#87450450caabb91f81afa6e66758bf2f629664a1",
2966
+ "from": "ytdl-core@github:khlevon/node-ytdl-core#v4.11.4-patch.2",
2967
2967
  "requires": {
2968
2968
  "m3u8stream": "^0.8.6",
2969
2969
  "miniget": "^4.2.2",
@@ -18,6 +18,6 @@
18
18
  "ioredis": "^5.3.1",
19
19
  "public-ip": "^6.0.1",
20
20
  "uuid": "^9.0.0",
21
- "ytdl-core": "git+ssh://git@github.com:khlevon/node-ytdl-core.git#v4.11.3-patch.1"
21
+ "ytdl-core": "github:khlevon/node-ytdl-core#v4.11.4-patch.2"
22
22
  }
23
23
  }
@@ -0,0 +1,27 @@
1
+ **/__pycache__
2
+ **/.venv
3
+ **/.classpath
4
+ **/.dockerignore
5
+ **/.env
6
+ **/.git
7
+ **/.gitignore
8
+ **/.project
9
+ **/.settings
10
+ **/.toolstarget
11
+ **/.vs
12
+ **/.vscode
13
+ **/*.*proj.user
14
+ **/*.dbmdl
15
+ **/*.jfm
16
+ **/bin
17
+ **/charts
18
+ **/docker-compose*
19
+ **/compose*
20
+ **/Dockerfile*
21
+ **/node_modules
22
+ **/npm-debug.log
23
+ **/obj
24
+ **/secrets.dev.yaml
25
+ **/values.dev.yaml
26
+ LICENSE
27
+ README.md
@@ -0,0 +1,31 @@
1
+ # For more information, please refer to https://aka.ms/vscode-docker-python
2
+ FROM python:3.10-slim
3
+
4
+ EXPOSE 8000
5
+
6
+ ## following 3 lines are for installing ffmepg
7
+ RUN apt-get -y update
8
+ RUN apt-get -y upgrade
9
+ RUN apt-get install -y ffmpeg
10
+
11
+ # Keeps Python from generating .pyc files in the container
12
+ ENV PYTHONDONTWRITEBYTECODE=1
13
+
14
+ # Turns off buffering for easier container logging
15
+ ENV PYTHONUNBUFFERED=1
16
+
17
+ # Install pip requirements
18
+ COPY requirements.txt .
19
+ RUN python -m pip install -r requirements.txt
20
+
21
+ WORKDIR /app
22
+ COPY ./models /app/models
23
+ COPY . /app
24
+
25
+ # Creates a non-root user with an explicit UID and adds permission to access the /app folder
26
+ # For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
27
+ RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app
28
+ USER appuser
29
+
30
+ # During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
31
+ CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--timeout", "0", "-k", "uvicorn.workers.UvicornWorker", "app:app"]
@@ -0,0 +1,76 @@
1
+ import uvicorn
2
+ from fastapi import FastAPI
3
+ import stable_whisper
4
+ from uuid import uuid4
5
+ import requests
6
+ import os
7
+
8
+ model_download_root = './models'
9
+ model = stable_whisper.load_model('large', download_root=model_download_root) #large, tiny
10
+
11
+ app = FastAPI()
12
+
13
+ save_directory = "./tmp" # folder for downloaded files
14
+ os.makedirs(save_directory, exist_ok=True)
15
+
16
+
17
+ def download_remote_file(url, save_directory):
18
+ # Generate a unique file name with a UUID
19
+ unique_name = str(uuid4()) + os.path.splitext(url)[-1]
20
+ save_path = os.path.join(save_directory, unique_name)
21
+
22
+ # Download the remote file
23
+ response = requests.get(url, stream=True)
24
+ response.raise_for_status()
25
+
26
+ # Save the downloaded file with the unique name
27
+ with open(save_path, 'wb') as file:
28
+ for chunk in response.iter_content(chunk_size=8192):
29
+ file.write(chunk)
30
+
31
+ return [unique_name, save_path]
32
+
33
+
34
+ def delete_tmp_file(file_path):
35
+ try:
36
+ os.remove(file_path)
37
+ print(f"Temporary file '{file_path}' has been deleted.")
38
+ except OSError as e:
39
+ print(f"Error: {e.strerror}")
40
+
41
+
42
+ async def transcribe(fileurl):
43
+ print(f"Downloading file from: {fileurl}")
44
+ [unique_file_name, save_path] = download_remote_file(
45
+ fileurl, save_directory)
46
+ print(f"Downloaded file saved as: {unique_file_name}")
47
+
48
+ print(f"Transcribing file")
49
+ result = model.transcribe(save_path)
50
+
51
+ srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
52
+
53
+ print(f"Saving transcription as : {srtpath}")
54
+ result.to_srt_vtt(srtpath, segment_level=False)
55
+
56
+ with open(srtpath, "r") as f:
57
+ srtstr = f.read()
58
+
59
+ # clean up tmp files
60
+ delete_tmp_file(save_path)
61
+ delete_tmp_file(srtpath)
62
+
63
+ print(f"Transcription done.")
64
+ return srtstr
65
+
66
+
67
+ @app.get("/")
68
+ async def root(fileurl: str):
69
+ if not fileurl:
70
+ return "No fileurl given!"
71
+ result = await transcribe(fileurl)
72
+ return result
73
+
74
+ if __name__ == "__main__":
75
+ print("Starting server", flush=True)
76
+ uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,115 @@
1
+ import uvicorn
2
+ from fastapi import FastAPI, HTTPException, Request
3
+ from uuid import uuid4
4
+ import os
5
+ import requests
6
+ import asyncio
7
+ import whisper
8
+ from whisper.utils import get_writer
9
+ from fastapi.encoders import jsonable_encoder
10
+
11
+ model_download_root = './models'
12
+ model = whisper.load_model("large", download_root=model_download_root) #large, tiny
13
+
14
+ # Create a semaphore with a limit of 1
15
+ semaphore = asyncio.Semaphore(1)
16
+
17
+ app = FastAPI()
18
+
19
+ save_directory = "./tmp" # folder for downloaded files
20
+ os.makedirs(save_directory, exist_ok=True)
21
+
22
+
23
+ def download_remote_file(url, save_directory):
24
+ # Generate a unique file name with a UUID
25
+ unique_name = str(uuid4()) + os.path.splitext(url)[-1]
26
+ save_path = os.path.join(save_directory, unique_name)
27
+
28
+ # Download the remote file
29
+ response = requests.get(url, stream=True)
30
+ response.raise_for_status()
31
+
32
+ # Save the downloaded file with the unique name
33
+ with open(save_path, 'wb') as file:
34
+ for chunk in response.iter_content(chunk_size=8192):
35
+ file.write(chunk)
36
+
37
+ return [unique_name, save_path]
38
+
39
+ def delete_tmp_file(file_path):
40
+ try:
41
+ os.remove(file_path)
42
+ print(f"Temporary file '{file_path}' has been deleted.")
43
+ except OSError as e:
44
+ print(f"Error: {e.strerror}")
45
+
46
+ def modify_segments(result):
47
+ modified_segments = []
48
+
49
+ id = 0
50
+ for segment in result["segments"]:
51
+ for word_info in segment['words']:
52
+ word = word_info['word']
53
+ start = word_info['start']
54
+ end = word_info['end']
55
+
56
+ modified_segment = {} #segment.copy()
57
+ modified_segment['id'] = id
58
+ modified_segment['text'] = word
59
+ modified_segment['start'] = start
60
+ modified_segment['end'] = end
61
+ modified_segments.append(modified_segment)
62
+ id+=1
63
+
64
+ result["segments"] = modified_segments
65
+
66
+ def transcribe(fileurl):
67
+ print(f"Downloading file from: {fileurl}")
68
+ [unique_file_name, save_path] = download_remote_file(
69
+ fileurl, save_directory)
70
+ print(f"Downloaded file saved as: {unique_file_name}")
71
+
72
+ print(f"Transcribing file")
73
+ result = model.transcribe(save_path, word_timestamps=True)
74
+
75
+ modify_segments(result)
76
+
77
+ srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
78
+
79
+ print(f"Saving transcription as : {srtpath}")
80
+ writer = get_writer("srt", save_directory)
81
+ with open(srtpath, 'w', encoding='utf-8') as file_obj :
82
+ writer.write_result(result, file_obj)
83
+
84
+ with open(srtpath, "r") as f:
85
+ srtstr = f.read()
86
+
87
+ # clean up tmp files
88
+ delete_tmp_file(save_path)
89
+ delete_tmp_file(srtpath)
90
+
91
+ print(f"Transcription done.")
92
+ return srtstr
93
+
94
+
95
+ @app.get("/")
96
+ @app.post("/")
97
+ async def root(request: Request):
98
+ if request.method == "POST":
99
+ body = jsonable_encoder(await request.json())
100
+ fileurl = body.get("fileurl")
101
+ else:
102
+ fileurl = request.query_params.get("fileurl")
103
+ if not fileurl:
104
+ return "No fileurl given!"
105
+
106
+ if semaphore.locked():
107
+ raise HTTPException(status_code=429, detail="Too Many Requests")
108
+
109
+ async with semaphore:
110
+ result = await asyncio.to_thread(transcribe, fileurl)
111
+ return result
112
+
113
+ if __name__ == "__main__":
114
+ print("Starting APPWhisper server", flush=True)
115
+ uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,12 @@
1
+ version: '3.4'
2
+
3
+ services:
4
+ cortex:
5
+ image: cortex
6
+ build:
7
+ context: .
8
+ dockerfile: ./Dockerfile
9
+ command: ["sh", "-c", "pip install debugpy -t /tmp && python /tmp/debugpy --wait-for-client --listen 0.0.0.0:5678 -m uvicorn helper_apps.WhisperX/app:app --host 0.0.0.0 --port 8000"]
10
+ ports:
11
+ - 8000:8000
12
+ - 5678:5678
@@ -0,0 +1,10 @@
1
+ version: '3.4'
2
+
3
+ services:
4
+ cortex:
5
+ image: cortex
6
+ build:
7
+ context: .
8
+ dockerfile: ./Dockerfile
9
+ ports:
10
+ - 8000:8000
@@ -0,0 +1,6 @@
1
+ # To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file
2
+ fastapi[all]==0.89.0
3
+ uvicorn[standard]==0.20.0
4
+ gunicorn==20.1.0
5
+ whisper
6
+ stable-ts
package/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { config } from './config.js';
2
- import { build } from './graphql/graphql.js';
2
+ import { build } from './server/graphql.js';
3
3
 
4
4
  export default async (configParams) => {
5
5
  configParams && config.load(configParams);
@@ -0,0 +1,37 @@
1
+ import { GoogleAuth } from 'google-auth-library';
2
+
3
+ class GcpAuthTokenHelper {
4
+ constructor(config) {
5
+ const creds = config.gcpServiceAccountKey ? JSON.parse(config.gcpServiceAccountKey) : null;
6
+ if (!creds) {
7
+ throw new Error('GCP_SERVICE_ACCOUNT_KEY is missing or undefined');
8
+ }
9
+ this.authClient = new GoogleAuth({
10
+ credentials: creds,
11
+ scopes: ['https://www.googleapis.com/auth/cloud-platform'],
12
+ });
13
+ this.token = null;
14
+ this.expiry = null;
15
+ }
16
+
17
+ async getAccessToken() {
18
+ if (!this.token || !this.isTokenValid()) {
19
+ await this.refreshToken();
20
+ }
21
+ return this.token;
22
+ }
23
+
24
+ isTokenValid() {
25
+ // Check if token is still valid with a 5-minute buffer
26
+ return this.expiry && Date.now() < this.expiry.getTime() - 5 * 60 * 1000;
27
+ }
28
+
29
+ async refreshToken() {
30
+ const authClient = await this.authClient.getClient();
31
+ const accessTokenResponse = await authClient.getAccessToken();
32
+ this.token = accessTokenResponse.token;
33
+ this.expiry = new Date(accessTokenResponse.expirationTime);
34
+ }
35
+ }
36
+
37
+ export default GcpAuthTokenHelper;
@@ -1,6 +1,6 @@
1
1
  import Redis from 'ioredis';
2
2
  import { config } from '../config.js';
3
- import pubsub from '../graphql/pubsub.js';
3
+ import pubsub from '../server/pubsub.js';
4
4
 
5
5
  const connectionString = config.get('storageConnectionString');
6
6
  const client = new Redis(connectionString);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aj-archipelago/cortex",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
5
5
  "repository": {
6
6
  "type": "git",
@@ -28,13 +28,11 @@
28
28
  "type": "module",
29
29
  "homepage": "https://github.com/aj-archipelago/cortex#readme",
30
30
  "dependencies": {
31
- "@apollo/utils.keyvadapter": "^1.1.2",
31
+ "@apollo/server": "^4.7.3",
32
+ "@apollo/server-plugin-response-cache": "^4.1.2",
33
+ "@apollo/utils.keyvadapter": "^3.0.0",
32
34
  "@graphql-tools/schema": "^9.0.12",
33
35
  "@keyv/redis": "^2.5.4",
34
- "apollo-server": "^3.12.0",
35
- "apollo-server-core": "^3.11.1",
36
- "apollo-server-express": "^3.11.1",
37
- "apollo-server-plugin-response-cache": "^3.8.1",
38
36
  "axios": "^1.3.4",
39
37
  "axios-cache-interceptor": "^1.0.1",
40
38
  "bottleneck": "^2.19.5",
@@ -43,6 +41,7 @@
43
41
  "convict": "^6.2.3",
44
42
  "express": "^4.18.2",
45
43
  "form-data": "^4.0.0",
44
+ "google-auth-library": "^8.8.0",
46
45
  "gpt-3-encoder": "^1.1.4",
47
46
  "graphql": "^16.6.0",
48
47
  "graphql-subscriptions": "^2.0.0",
@@ -51,6 +50,7 @@
51
50
  "ioredis": "^5.3.1",
52
51
  "keyv": "^4.5.2",
53
52
  "langchain": "^0.0.47",
53
+ "subsrt": "^1.1.1",
54
54
  "uuid": "^9.0.0",
55
55
  "ws": "^8.12.0"
56
56
  },
@@ -59,6 +59,7 @@
59
59
  "dotenv": "^16.0.3",
60
60
  "eslint": "^8.38.0",
61
61
  "eslint-plugin-import": "^2.27.5",
62
+ "got": "^13.0.0",
62
63
  "sinon": "^15.0.3"
63
64
  },
64
65
  "publishConfig": {
@@ -70,6 +71,7 @@
70
71
  ],
71
72
  "require": [
72
73
  "dotenv/config"
73
- ]
74
+ ],
75
+ "concurrency": 1
74
76
  }
75
77
  }
@@ -1,5 +1,5 @@
1
- import { rootResolver, resolver } from '../graphql/resolver.js';
2
- import { typeDef } from '../graphql/typeDef.js';
1
+ import { rootResolver, resolver } from '../server/resolver.js';
2
+ import { typeDef } from '../server/typeDef.js';
3
3
 
4
4
  // all default definitions of a single pathway
5
5
  export default {