@aj-archipelago/cortex 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/config/default.example.json +18 -0
- package/config.js +28 -8
- package/helper_apps/MediaFileChunker/Dockerfile +20 -0
- package/helper_apps/MediaFileChunker/package-lock.json +18 -18
- package/helper_apps/MediaFileChunker/package.json +1 -1
- package/helper_apps/WhisperX/.dockerignore +27 -0
- package/helper_apps/WhisperX/Dockerfile +31 -0
- package/helper_apps/WhisperX/app-ts.py +76 -0
- package/helper_apps/WhisperX/app.py +115 -0
- package/helper_apps/WhisperX/docker-compose.debug.yml +12 -0
- package/helper_apps/WhisperX/docker-compose.yml +10 -0
- package/helper_apps/WhisperX/requirements.txt +6 -0
- package/index.js +1 -1
- package/lib/gcpAuthTokenHelper.js +37 -0
- package/lib/redisSubscription.js +1 -1
- package/package.json +9 -7
- package/pathways/basePathway.js +2 -2
- package/pathways/index.js +8 -2
- package/pathways/summary.js +2 -2
- package/pathways/sys_openai_chat.js +19 -0
- package/pathways/sys_openai_completion.js +11 -0
- package/pathways/{lc_test.mjs → test_langchain.mjs} +1 -1
- package/pathways/test_palm_chat.js +31 -0
- package/pathways/transcribe.js +3 -1
- package/pathways/translate.js +2 -1
- package/{graphql → server}/graphql.js +64 -62
- package/{graphql → server}/pathwayPrompter.js +9 -1
- package/{graphql → server}/pathwayResolver.js +46 -47
- package/{graphql → server}/plugins/azureTranslatePlugin.js +22 -0
- package/{graphql → server}/plugins/modelPlugin.js +15 -42
- package/server/plugins/openAiChatPlugin.js +134 -0
- package/{graphql → server}/plugins/openAiCompletionPlugin.js +38 -2
- package/{graphql → server}/plugins/openAiWhisperPlugin.js +59 -7
- package/server/plugins/palmChatPlugin.js +229 -0
- package/server/plugins/palmCompletionPlugin.js +134 -0
- package/{graphql → server}/prompt.js +11 -4
- package/server/rest.js +321 -0
- package/{graphql → server}/typeDef.js +30 -13
- package/tests/chunkfunction.test.js +1 -1
- package/tests/config.test.js +1 -1
- package/tests/main.test.js +282 -43
- package/tests/mocks.js +1 -1
- package/tests/modelPlugin.test.js +3 -15
- package/tests/openAiChatPlugin.test.js +125 -0
- package/tests/openai_api.test.js +147 -0
- package/tests/palmChatPlugin.test.js +256 -0
- package/tests/palmCompletionPlugin.test.js +87 -0
- package/tests/pathwayResolver.test.js +1 -1
- package/tests/server.js +23 -0
- package/tests/truncateMessages.test.js +1 -1
- package/graphql/plugins/openAiChatPlugin.js +0 -46
- package/tests/chunking.test.js +0 -155
- package/tests/translate.test.js +0 -126
- /package/{graphql → server}/chunker.js +0 -0
- /package/{graphql → server}/parser.js +0 -0
- /package/{graphql → server}/pathwayResponseParser.js +0 -0
- /package/{graphql → server}/plugins/localModelPlugin.js +0 -0
- /package/{graphql → server}/pubsub.js +0 -0
- /package/{graphql → server}/requestState.js +0 -0
- /package/{graphql → server}/resolver.js +0 -0
- /package/{graphql → server}/subscriptions.js +0 -0
package/README.md
CHANGED
|
@@ -17,7 +17,7 @@ Just about anything! It's kind of an LLM swiss army knife. Here are some ideas:
|
|
|
17
17
|
## Features
|
|
18
18
|
|
|
19
19
|
* Simple architecture to build custom functional endpoints (called `pathways`), that implement common NL AI tasks. Default pathways include chat, summarization, translation, paraphrasing, completion, spelling and grammar correction, entity extraction, sentiment analysis, and bias analysis.
|
|
20
|
-
* Allows for building multi-model, multi-tool, multi-vendor, and model-agnostic pathways (choose the right model or combination of models and tools for the job, implement redundancy) with built-in support for OpenAI GPT-3, GPT-3.5 (chatGPT), and GPT-4 models - both from OpenAI directly and through Azure OpenAI, OpenAI Whisper, Azure Translator, LangChain.js and more.
|
|
20
|
+
* Allows for building multi-model, multi-tool, multi-vendor, and model-agnostic pathways (choose the right model or combination of models and tools for the job, implement redundancy) with built-in support for OpenAI GPT-3, GPT-3.5 (chatGPT), and GPT-4 models - both from OpenAI directly and through Azure OpenAI, PaLM Text and PaLM Chat from Google, OpenAI Whisper, Azure Translator, LangChain.js and more.
|
|
21
21
|
* Easy, templatized prompt definition with flexible support for most prompt engineering techniques and strategies ranging from simple single prompts to complex custom prompt chains with context continuity.
|
|
22
22
|
* Built in support for long-running, asynchronous operations with progress updates or streaming responses
|
|
23
23
|
* Integrated context persistence: have your pathways "remember" whatever you want and use it on the next request to the model
|
|
@@ -198,8 +198,8 @@ The core pathway `summary.js` below is implemented using custom pathway logic an
|
|
|
198
198
|
// This module exports a prompt that takes an input text and generates a summary using a custom resolver.
|
|
199
199
|
|
|
200
200
|
// Import required modules
|
|
201
|
-
import { semanticTruncate } from '../
|
|
202
|
-
import { PathwayResolver } from '../
|
|
201
|
+
import { semanticTruncate } from '../server/chunker.js';
|
|
202
|
+
import { PathwayResolver } from '../server/pathwayResolver.js';
|
|
203
203
|
|
|
204
204
|
export default {
|
|
205
205
|
// The main prompt function that takes the input text and asks to generate a summary.
|
|
@@ -51,6 +51,24 @@
|
|
|
51
51
|
"requestsPerSecond": 10,
|
|
52
52
|
"maxTokenLength": 8192
|
|
53
53
|
},
|
|
54
|
+
"palm-text": {
|
|
55
|
+
"type": "PALM-COMPLETION",
|
|
56
|
+
"url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/text-bison@001:predict",
|
|
57
|
+
"headers": {
|
|
58
|
+
"Content-Type": "application/json"
|
|
59
|
+
},
|
|
60
|
+
"requestsPerSecond": 10,
|
|
61
|
+
"maxTokenLength": 2048
|
|
62
|
+
},
|
|
63
|
+
"palm-chat": {
|
|
64
|
+
"type": "PALM-CHAT",
|
|
65
|
+
"url": "https://us-central1-aiplatform.googleapis.com/v1/projects/project-id/locations/us-central1/publishers/google/models/chat-bison@001:predict",
|
|
66
|
+
"headers": {
|
|
67
|
+
"Content-Type": "application/json"
|
|
68
|
+
},
|
|
69
|
+
"requestsPerSecond": 10,
|
|
70
|
+
"maxTokenLength": 2048
|
|
71
|
+
},
|
|
54
72
|
"local-llama13B": {
|
|
55
73
|
"type": "LOCAL-CPP-MODEL",
|
|
56
74
|
"executablePath": "../llm/llama.cpp/main",
|
package/config.js
CHANGED
|
@@ -3,6 +3,7 @@ import convict from 'convict';
|
|
|
3
3
|
import HandleBars from './lib/handleBars.js';
|
|
4
4
|
import fs from 'fs';
|
|
5
5
|
import { fileURLToPath, pathToFileURL } from 'url';
|
|
6
|
+
import GcpAuthTokenHelper from './lib/gcpAuthTokenHelper.js';
|
|
6
7
|
|
|
7
8
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
8
9
|
|
|
@@ -57,7 +58,8 @@ var config = convict({
|
|
|
57
58
|
cortexApiKey: {
|
|
58
59
|
format: String,
|
|
59
60
|
default: null,
|
|
60
|
-
env: 'CORTEX_API_KEY'
|
|
61
|
+
env: 'CORTEX_API_KEY',
|
|
62
|
+
sensitive: true
|
|
61
63
|
},
|
|
62
64
|
defaultModelName: {
|
|
63
65
|
format: String,
|
|
@@ -67,19 +69,21 @@ var config = convict({
|
|
|
67
69
|
models: {
|
|
68
70
|
format: Object,
|
|
69
71
|
default: {
|
|
70
|
-
"oai-
|
|
71
|
-
"type": "OPENAI-
|
|
72
|
-
"url": "
|
|
72
|
+
"oai-gpturbo": {
|
|
73
|
+
"type": "OPENAI-CHAT",
|
|
74
|
+
"url": "https://api.openai.com/v1/chat/completions",
|
|
73
75
|
"headers": {
|
|
74
|
-
"Authorization": "Bearer {{
|
|
76
|
+
"Authorization": "Bearer {{OPENAI_API_KEY}}",
|
|
75
77
|
"Content-Type": "application/json"
|
|
76
78
|
},
|
|
77
79
|
"params": {
|
|
78
|
-
"model": "
|
|
80
|
+
"model": "gpt-3.5-turbo"
|
|
79
81
|
},
|
|
82
|
+
"requestsPerSecond": 10,
|
|
83
|
+
"maxTokenLength": 8192
|
|
80
84
|
},
|
|
81
85
|
"oai-whisper": {
|
|
82
|
-
"type": "
|
|
86
|
+
"type": "OPENAI-WHISPER",
|
|
83
87
|
"url": "https://api.openai.com/v1/audio/transcriptions",
|
|
84
88
|
"headers": {
|
|
85
89
|
"Authorization": "Bearer {{OPENAI_API_KEY}}"
|
|
@@ -93,7 +97,7 @@ var config = convict({
|
|
|
93
97
|
},
|
|
94
98
|
openaiDefaultModel: {
|
|
95
99
|
format: String,
|
|
96
|
-
default: '
|
|
100
|
+
default: 'gpt-3.5-turbo',
|
|
97
101
|
env: 'OPENAI_DEFAULT_MODEL'
|
|
98
102
|
},
|
|
99
103
|
openaiApiKey: {
|
|
@@ -117,6 +121,17 @@ var config = convict({
|
|
|
117
121
|
default: 'null',
|
|
118
122
|
env: 'WHISPER_MEDIA_API_URL'
|
|
119
123
|
},
|
|
124
|
+
whisperTSApiUrl: {
|
|
125
|
+
format: String,
|
|
126
|
+
default: 'null',
|
|
127
|
+
env: 'WHISPER_TS_API_URL'
|
|
128
|
+
},
|
|
129
|
+
gcpServiceAccountKey: {
|
|
130
|
+
format: String,
|
|
131
|
+
default: null,
|
|
132
|
+
env: 'GCP_SERVICE_ACCOUNT_KEY',
|
|
133
|
+
sensitive: true
|
|
134
|
+
},
|
|
120
135
|
});
|
|
121
136
|
|
|
122
137
|
// Read in environment variables and set up service configuration
|
|
@@ -135,6 +150,11 @@ if (configFile && fs.existsSync(configFile)) {
|
|
|
135
150
|
}
|
|
136
151
|
}
|
|
137
152
|
|
|
153
|
+
if (config.get('gcpServiceAccountKey')) {
|
|
154
|
+
const gcpAuthTokenHelper = new GcpAuthTokenHelper(config.getProperties());
|
|
155
|
+
config.set('gcpAuthTokenHelper', gcpAuthTokenHelper);
|
|
156
|
+
}
|
|
157
|
+
|
|
138
158
|
// Build and load pathways to config
|
|
139
159
|
const buildPathways = async (config) => {
|
|
140
160
|
const { pathwaysPath, corePathwaysPath, basePathwayPath } = config.getProperties();
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
FROM node:18-alpine
|
|
2
|
+
|
|
3
|
+
WORKDIR /usr/src/app
|
|
4
|
+
|
|
5
|
+
COPY package*.json ./
|
|
6
|
+
|
|
7
|
+
RUN npm install
|
|
8
|
+
|
|
9
|
+
## following 3 lines are for installing ffmepg
|
|
10
|
+
RUN apk update
|
|
11
|
+
RUN apk add
|
|
12
|
+
RUN apk add ffmpeg
|
|
13
|
+
|
|
14
|
+
COPY . .
|
|
15
|
+
|
|
16
|
+
EXPOSE 7071
|
|
17
|
+
|
|
18
|
+
# RUN npm run build
|
|
19
|
+
|
|
20
|
+
CMD [ "node", "start.js" ]
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
"ioredis": "^5.3.1",
|
|
19
19
|
"public-ip": "^6.0.1",
|
|
20
20
|
"uuid": "^9.0.0",
|
|
21
|
-
"ytdl-core": "
|
|
21
|
+
"ytdl-core": "github:khlevon/node-ytdl-core#v4.11.4-patch.2"
|
|
22
22
|
}
|
|
23
23
|
},
|
|
24
24
|
"node_modules/@azure/abort-controller": {
|
|
@@ -45,9 +45,9 @@
|
|
|
45
45
|
}
|
|
46
46
|
},
|
|
47
47
|
"node_modules/@azure/core-http": {
|
|
48
|
-
"version": "3.0.
|
|
49
|
-
"resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.
|
|
50
|
-
"integrity": "sha512-
|
|
48
|
+
"version": "3.0.1",
|
|
49
|
+
"resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.1.tgz",
|
|
50
|
+
"integrity": "sha512-A3x+um3cAPgQe42Lu7Iv/x8/fNjhL/nIoEfqFxfn30EyxK6zC13n+OUxzZBRC0IzQqssqIbt4INf5YG7lYYFtw==",
|
|
51
51
|
"dependencies": {
|
|
52
52
|
"@azure/abort-controller": "^1.0.0",
|
|
53
53
|
"@azure/core-auth": "^1.3.0",
|
|
@@ -62,7 +62,7 @@
|
|
|
62
62
|
"tslib": "^2.2.0",
|
|
63
63
|
"tunnel": "^0.0.6",
|
|
64
64
|
"uuid": "^8.3.0",
|
|
65
|
-
"xml2js": "^0.
|
|
65
|
+
"xml2js": "^0.5.0"
|
|
66
66
|
},
|
|
67
67
|
"engines": {
|
|
68
68
|
"node": ">=14.0.0"
|
|
@@ -1720,9 +1720,9 @@
|
|
|
1720
1720
|
}
|
|
1721
1721
|
},
|
|
1722
1722
|
"node_modules/xml2js": {
|
|
1723
|
-
"version": "0.
|
|
1724
|
-
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.
|
|
1725
|
-
"integrity": "sha512-
|
|
1723
|
+
"version": "0.5.0",
|
|
1724
|
+
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz",
|
|
1725
|
+
"integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==",
|
|
1726
1726
|
"dependencies": {
|
|
1727
1727
|
"sax": ">=0.6.0",
|
|
1728
1728
|
"xmlbuilder": "~11.0.0"
|
|
@@ -1741,7 +1741,7 @@
|
|
|
1741
1741
|
},
|
|
1742
1742
|
"node_modules/ytdl-core": {
|
|
1743
1743
|
"version": "0.0.0-development",
|
|
1744
|
-
"resolved": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#
|
|
1744
|
+
"resolved": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#87450450caabb91f81afa6e66758bf2f629664a1",
|
|
1745
1745
|
"license": "MIT",
|
|
1746
1746
|
"dependencies": {
|
|
1747
1747
|
"m3u8stream": "^0.8.6",
|
|
@@ -1772,9 +1772,9 @@
|
|
|
1772
1772
|
}
|
|
1773
1773
|
},
|
|
1774
1774
|
"@azure/core-http": {
|
|
1775
|
-
"version": "3.0.
|
|
1776
|
-
"resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.
|
|
1777
|
-
"integrity": "sha512-
|
|
1775
|
+
"version": "3.0.1",
|
|
1776
|
+
"resolved": "https://registry.npmjs.org/@azure/core-http/-/core-http-3.0.1.tgz",
|
|
1777
|
+
"integrity": "sha512-A3x+um3cAPgQe42Lu7Iv/x8/fNjhL/nIoEfqFxfn30EyxK6zC13n+OUxzZBRC0IzQqssqIbt4INf5YG7lYYFtw==",
|
|
1778
1778
|
"requires": {
|
|
1779
1779
|
"@azure/abort-controller": "^1.0.0",
|
|
1780
1780
|
"@azure/core-auth": "^1.3.0",
|
|
@@ -1789,7 +1789,7 @@
|
|
|
1789
1789
|
"tslib": "^2.2.0",
|
|
1790
1790
|
"tunnel": "^0.0.6",
|
|
1791
1791
|
"uuid": "^8.3.0",
|
|
1792
|
-
"xml2js": "^0.
|
|
1792
|
+
"xml2js": "^0.5.0"
|
|
1793
1793
|
},
|
|
1794
1794
|
"dependencies": {
|
|
1795
1795
|
"uuid": {
|
|
@@ -2948,9 +2948,9 @@
|
|
|
2948
2948
|
}
|
|
2949
2949
|
},
|
|
2950
2950
|
"xml2js": {
|
|
2951
|
-
"version": "0.
|
|
2952
|
-
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.
|
|
2953
|
-
"integrity": "sha512-
|
|
2951
|
+
"version": "0.5.0",
|
|
2952
|
+
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz",
|
|
2953
|
+
"integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==",
|
|
2954
2954
|
"requires": {
|
|
2955
2955
|
"sax": ">=0.6.0",
|
|
2956
2956
|
"xmlbuilder": "~11.0.0"
|
|
@@ -2962,8 +2962,8 @@
|
|
|
2962
2962
|
"integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA=="
|
|
2963
2963
|
},
|
|
2964
2964
|
"ytdl-core": {
|
|
2965
|
-
"version": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#
|
|
2966
|
-
"from": "ytdl-core@
|
|
2965
|
+
"version": "git+ssh://git@github.com/khlevon/node-ytdl-core.git#87450450caabb91f81afa6e66758bf2f629664a1",
|
|
2966
|
+
"from": "ytdl-core@github:khlevon/node-ytdl-core#v4.11.4-patch.2",
|
|
2967
2967
|
"requires": {
|
|
2968
2968
|
"m3u8stream": "^0.8.6",
|
|
2969
2969
|
"miniget": "^4.2.2",
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
**/__pycache__
|
|
2
|
+
**/.venv
|
|
3
|
+
**/.classpath
|
|
4
|
+
**/.dockerignore
|
|
5
|
+
**/.env
|
|
6
|
+
**/.git
|
|
7
|
+
**/.gitignore
|
|
8
|
+
**/.project
|
|
9
|
+
**/.settings
|
|
10
|
+
**/.toolstarget
|
|
11
|
+
**/.vs
|
|
12
|
+
**/.vscode
|
|
13
|
+
**/*.*proj.user
|
|
14
|
+
**/*.dbmdl
|
|
15
|
+
**/*.jfm
|
|
16
|
+
**/bin
|
|
17
|
+
**/charts
|
|
18
|
+
**/docker-compose*
|
|
19
|
+
**/compose*
|
|
20
|
+
**/Dockerfile*
|
|
21
|
+
**/node_modules
|
|
22
|
+
**/npm-debug.log
|
|
23
|
+
**/obj
|
|
24
|
+
**/secrets.dev.yaml
|
|
25
|
+
**/values.dev.yaml
|
|
26
|
+
LICENSE
|
|
27
|
+
README.md
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# For more information, please refer to https://aka.ms/vscode-docker-python
|
|
2
|
+
FROM python:3.10-slim
|
|
3
|
+
|
|
4
|
+
EXPOSE 8000
|
|
5
|
+
|
|
6
|
+
## following 3 lines are for installing ffmepg
|
|
7
|
+
RUN apt-get -y update
|
|
8
|
+
RUN apt-get -y upgrade
|
|
9
|
+
RUN apt-get install -y ffmpeg
|
|
10
|
+
|
|
11
|
+
# Keeps Python from generating .pyc files in the container
|
|
12
|
+
ENV PYTHONDONTWRITEBYTECODE=1
|
|
13
|
+
|
|
14
|
+
# Turns off buffering for easier container logging
|
|
15
|
+
ENV PYTHONUNBUFFERED=1
|
|
16
|
+
|
|
17
|
+
# Install pip requirements
|
|
18
|
+
COPY requirements.txt .
|
|
19
|
+
RUN python -m pip install -r requirements.txt
|
|
20
|
+
|
|
21
|
+
WORKDIR /app
|
|
22
|
+
COPY ./models /app/models
|
|
23
|
+
COPY . /app
|
|
24
|
+
|
|
25
|
+
# Creates a non-root user with an explicit UID and adds permission to access the /app folder
|
|
26
|
+
# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
|
|
27
|
+
RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app
|
|
28
|
+
USER appuser
|
|
29
|
+
|
|
30
|
+
# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
|
|
31
|
+
CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--timeout", "0", "-k", "uvicorn.workers.UvicornWorker", "app:app"]
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import uvicorn
|
|
2
|
+
from fastapi import FastAPI
|
|
3
|
+
import stable_whisper
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
import requests
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
model_download_root = './models'
|
|
9
|
+
model = stable_whisper.load_model('large', download_root=model_download_root) #large, tiny
|
|
10
|
+
|
|
11
|
+
app = FastAPI()
|
|
12
|
+
|
|
13
|
+
save_directory = "./tmp" # folder for downloaded files
|
|
14
|
+
os.makedirs(save_directory, exist_ok=True)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def download_remote_file(url, save_directory):
|
|
18
|
+
# Generate a unique file name with a UUID
|
|
19
|
+
unique_name = str(uuid4()) + os.path.splitext(url)[-1]
|
|
20
|
+
save_path = os.path.join(save_directory, unique_name)
|
|
21
|
+
|
|
22
|
+
# Download the remote file
|
|
23
|
+
response = requests.get(url, stream=True)
|
|
24
|
+
response.raise_for_status()
|
|
25
|
+
|
|
26
|
+
# Save the downloaded file with the unique name
|
|
27
|
+
with open(save_path, 'wb') as file:
|
|
28
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
29
|
+
file.write(chunk)
|
|
30
|
+
|
|
31
|
+
return [unique_name, save_path]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def delete_tmp_file(file_path):
|
|
35
|
+
try:
|
|
36
|
+
os.remove(file_path)
|
|
37
|
+
print(f"Temporary file '{file_path}' has been deleted.")
|
|
38
|
+
except OSError as e:
|
|
39
|
+
print(f"Error: {e.strerror}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
async def transcribe(fileurl):
|
|
43
|
+
print(f"Downloading file from: {fileurl}")
|
|
44
|
+
[unique_file_name, save_path] = download_remote_file(
|
|
45
|
+
fileurl, save_directory)
|
|
46
|
+
print(f"Downloaded file saved as: {unique_file_name}")
|
|
47
|
+
|
|
48
|
+
print(f"Transcribing file")
|
|
49
|
+
result = model.transcribe(save_path)
|
|
50
|
+
|
|
51
|
+
srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
|
|
52
|
+
|
|
53
|
+
print(f"Saving transcription as : {srtpath}")
|
|
54
|
+
result.to_srt_vtt(srtpath, segment_level=False)
|
|
55
|
+
|
|
56
|
+
with open(srtpath, "r") as f:
|
|
57
|
+
srtstr = f.read()
|
|
58
|
+
|
|
59
|
+
# clean up tmp files
|
|
60
|
+
delete_tmp_file(save_path)
|
|
61
|
+
delete_tmp_file(srtpath)
|
|
62
|
+
|
|
63
|
+
print(f"Transcription done.")
|
|
64
|
+
return srtstr
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@app.get("/")
|
|
68
|
+
async def root(fileurl: str):
|
|
69
|
+
if not fileurl:
|
|
70
|
+
return "No fileurl given!"
|
|
71
|
+
result = await transcribe(fileurl)
|
|
72
|
+
return result
|
|
73
|
+
|
|
74
|
+
if __name__ == "__main__":
|
|
75
|
+
print("Starting server", flush=True)
|
|
76
|
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import uvicorn
|
|
2
|
+
from fastapi import FastAPI, HTTPException, Request
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
import os
|
|
5
|
+
import requests
|
|
6
|
+
import asyncio
|
|
7
|
+
import whisper
|
|
8
|
+
from whisper.utils import get_writer
|
|
9
|
+
from fastapi.encoders import jsonable_encoder
|
|
10
|
+
|
|
11
|
+
model_download_root = './models'
|
|
12
|
+
model = whisper.load_model("large", download_root=model_download_root) #large, tiny
|
|
13
|
+
|
|
14
|
+
# Create a semaphore with a limit of 1
|
|
15
|
+
semaphore = asyncio.Semaphore(1)
|
|
16
|
+
|
|
17
|
+
app = FastAPI()
|
|
18
|
+
|
|
19
|
+
save_directory = "./tmp" # folder for downloaded files
|
|
20
|
+
os.makedirs(save_directory, exist_ok=True)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def download_remote_file(url, save_directory):
|
|
24
|
+
# Generate a unique file name with a UUID
|
|
25
|
+
unique_name = str(uuid4()) + os.path.splitext(url)[-1]
|
|
26
|
+
save_path = os.path.join(save_directory, unique_name)
|
|
27
|
+
|
|
28
|
+
# Download the remote file
|
|
29
|
+
response = requests.get(url, stream=True)
|
|
30
|
+
response.raise_for_status()
|
|
31
|
+
|
|
32
|
+
# Save the downloaded file with the unique name
|
|
33
|
+
with open(save_path, 'wb') as file:
|
|
34
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
35
|
+
file.write(chunk)
|
|
36
|
+
|
|
37
|
+
return [unique_name, save_path]
|
|
38
|
+
|
|
39
|
+
def delete_tmp_file(file_path):
|
|
40
|
+
try:
|
|
41
|
+
os.remove(file_path)
|
|
42
|
+
print(f"Temporary file '{file_path}' has been deleted.")
|
|
43
|
+
except OSError as e:
|
|
44
|
+
print(f"Error: {e.strerror}")
|
|
45
|
+
|
|
46
|
+
def modify_segments(result):
|
|
47
|
+
modified_segments = []
|
|
48
|
+
|
|
49
|
+
id = 0
|
|
50
|
+
for segment in result["segments"]:
|
|
51
|
+
for word_info in segment['words']:
|
|
52
|
+
word = word_info['word']
|
|
53
|
+
start = word_info['start']
|
|
54
|
+
end = word_info['end']
|
|
55
|
+
|
|
56
|
+
modified_segment = {} #segment.copy()
|
|
57
|
+
modified_segment['id'] = id
|
|
58
|
+
modified_segment['text'] = word
|
|
59
|
+
modified_segment['start'] = start
|
|
60
|
+
modified_segment['end'] = end
|
|
61
|
+
modified_segments.append(modified_segment)
|
|
62
|
+
id+=1
|
|
63
|
+
|
|
64
|
+
result["segments"] = modified_segments
|
|
65
|
+
|
|
66
|
+
def transcribe(fileurl):
|
|
67
|
+
print(f"Downloading file from: {fileurl}")
|
|
68
|
+
[unique_file_name, save_path] = download_remote_file(
|
|
69
|
+
fileurl, save_directory)
|
|
70
|
+
print(f"Downloaded file saved as: {unique_file_name}")
|
|
71
|
+
|
|
72
|
+
print(f"Transcribing file")
|
|
73
|
+
result = model.transcribe(save_path, word_timestamps=True)
|
|
74
|
+
|
|
75
|
+
modify_segments(result)
|
|
76
|
+
|
|
77
|
+
srtpath = os.path.join(save_directory, str(uuid4()) + ".srt")
|
|
78
|
+
|
|
79
|
+
print(f"Saving transcription as : {srtpath}")
|
|
80
|
+
writer = get_writer("srt", save_directory)
|
|
81
|
+
with open(srtpath, 'w', encoding='utf-8') as file_obj :
|
|
82
|
+
writer.write_result(result, file_obj)
|
|
83
|
+
|
|
84
|
+
with open(srtpath, "r") as f:
|
|
85
|
+
srtstr = f.read()
|
|
86
|
+
|
|
87
|
+
# clean up tmp files
|
|
88
|
+
delete_tmp_file(save_path)
|
|
89
|
+
delete_tmp_file(srtpath)
|
|
90
|
+
|
|
91
|
+
print(f"Transcription done.")
|
|
92
|
+
return srtstr
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@app.get("/")
|
|
96
|
+
@app.post("/")
|
|
97
|
+
async def root(request: Request):
|
|
98
|
+
if request.method == "POST":
|
|
99
|
+
body = jsonable_encoder(await request.json())
|
|
100
|
+
fileurl = body.get("fileurl")
|
|
101
|
+
else:
|
|
102
|
+
fileurl = request.query_params.get("fileurl")
|
|
103
|
+
if not fileurl:
|
|
104
|
+
return "No fileurl given!"
|
|
105
|
+
|
|
106
|
+
if semaphore.locked():
|
|
107
|
+
raise HTTPException(status_code=429, detail="Too Many Requests")
|
|
108
|
+
|
|
109
|
+
async with semaphore:
|
|
110
|
+
result = await asyncio.to_thread(transcribe, fileurl)
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
print("Starting APPWhisper server", flush=True)
|
|
115
|
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
version: '3.4'
|
|
2
|
+
|
|
3
|
+
services:
|
|
4
|
+
cortex:
|
|
5
|
+
image: cortex
|
|
6
|
+
build:
|
|
7
|
+
context: .
|
|
8
|
+
dockerfile: ./Dockerfile
|
|
9
|
+
command: ["sh", "-c", "pip install debugpy -t /tmp && python /tmp/debugpy --wait-for-client --listen 0.0.0.0:5678 -m uvicorn helper_apps.WhisperX/app:app --host 0.0.0.0 --port 8000"]
|
|
10
|
+
ports:
|
|
11
|
+
- 8000:8000
|
|
12
|
+
- 5678:5678
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
# To ensure app dependencies are ported from your virtual environment/host machine into your container, run 'pip freeze > requirements.txt' in the terminal to overwrite this file
|
|
2
|
+
fastapi[all]==0.89.0
|
|
3
|
+
uvicorn[standard]==0.20.0
|
|
4
|
+
gunicorn==20.1.0
|
|
5
|
+
whisper
|
|
6
|
+
stable-ts
|
package/index.js
CHANGED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { GoogleAuth } from 'google-auth-library';
|
|
2
|
+
|
|
3
|
+
class GcpAuthTokenHelper {
|
|
4
|
+
constructor(config) {
|
|
5
|
+
const creds = config.gcpServiceAccountKey ? JSON.parse(config.gcpServiceAccountKey) : null;
|
|
6
|
+
if (!creds) {
|
|
7
|
+
throw new Error('GCP_SERVICE_ACCOUNT_KEY is missing or undefined');
|
|
8
|
+
}
|
|
9
|
+
this.authClient = new GoogleAuth({
|
|
10
|
+
credentials: creds,
|
|
11
|
+
scopes: ['https://www.googleapis.com/auth/cloud-platform'],
|
|
12
|
+
});
|
|
13
|
+
this.token = null;
|
|
14
|
+
this.expiry = null;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async getAccessToken() {
|
|
18
|
+
if (!this.token || !this.isTokenValid()) {
|
|
19
|
+
await this.refreshToken();
|
|
20
|
+
}
|
|
21
|
+
return this.token;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
isTokenValid() {
|
|
25
|
+
// Check if token is still valid with a 5-minute buffer
|
|
26
|
+
return this.expiry && Date.now() < this.expiry.getTime() - 5 * 60 * 1000;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async refreshToken() {
|
|
30
|
+
const authClient = await this.authClient.getClient();
|
|
31
|
+
const accessTokenResponse = await authClient.getAccessToken();
|
|
32
|
+
this.token = accessTokenResponse.token;
|
|
33
|
+
this.expiry = new Date(accessTokenResponse.expirationTime);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export default GcpAuthTokenHelper;
|
package/lib/redisSubscription.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Redis from 'ioredis';
|
|
2
2
|
import { config } from '../config.js';
|
|
3
|
-
import pubsub from '../
|
|
3
|
+
import pubsub from '../server/pubsub.js';
|
|
4
4
|
|
|
5
5
|
const connectionString = config.get('storageConnectionString');
|
|
6
6
|
const client = new Redis(connectionString);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aj-archipelago/cortex",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.6",
|
|
4
4
|
"description": "Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -28,13 +28,11 @@
|
|
|
28
28
|
"type": "module",
|
|
29
29
|
"homepage": "https://github.com/aj-archipelago/cortex#readme",
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@apollo/
|
|
31
|
+
"@apollo/server": "^4.7.3",
|
|
32
|
+
"@apollo/server-plugin-response-cache": "^4.1.2",
|
|
33
|
+
"@apollo/utils.keyvadapter": "^3.0.0",
|
|
32
34
|
"@graphql-tools/schema": "^9.0.12",
|
|
33
35
|
"@keyv/redis": "^2.5.4",
|
|
34
|
-
"apollo-server": "^3.12.0",
|
|
35
|
-
"apollo-server-core": "^3.11.1",
|
|
36
|
-
"apollo-server-express": "^3.11.1",
|
|
37
|
-
"apollo-server-plugin-response-cache": "^3.8.1",
|
|
38
36
|
"axios": "^1.3.4",
|
|
39
37
|
"axios-cache-interceptor": "^1.0.1",
|
|
40
38
|
"bottleneck": "^2.19.5",
|
|
@@ -43,6 +41,7 @@
|
|
|
43
41
|
"convict": "^6.2.3",
|
|
44
42
|
"express": "^4.18.2",
|
|
45
43
|
"form-data": "^4.0.0",
|
|
44
|
+
"google-auth-library": "^8.8.0",
|
|
46
45
|
"gpt-3-encoder": "^1.1.4",
|
|
47
46
|
"graphql": "^16.6.0",
|
|
48
47
|
"graphql-subscriptions": "^2.0.0",
|
|
@@ -51,6 +50,7 @@
|
|
|
51
50
|
"ioredis": "^5.3.1",
|
|
52
51
|
"keyv": "^4.5.2",
|
|
53
52
|
"langchain": "^0.0.47",
|
|
53
|
+
"subsrt": "^1.1.1",
|
|
54
54
|
"uuid": "^9.0.0",
|
|
55
55
|
"ws": "^8.12.0"
|
|
56
56
|
},
|
|
@@ -59,6 +59,7 @@
|
|
|
59
59
|
"dotenv": "^16.0.3",
|
|
60
60
|
"eslint": "^8.38.0",
|
|
61
61
|
"eslint-plugin-import": "^2.27.5",
|
|
62
|
+
"got": "^13.0.0",
|
|
62
63
|
"sinon": "^15.0.3"
|
|
63
64
|
},
|
|
64
65
|
"publishConfig": {
|
|
@@ -70,6 +71,7 @@
|
|
|
70
71
|
],
|
|
71
72
|
"require": [
|
|
72
73
|
"dotenv/config"
|
|
73
|
-
]
|
|
74
|
+
],
|
|
75
|
+
"concurrency": 1
|
|
74
76
|
}
|
|
75
77
|
}
|
package/pathways/basePathway.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { rootResolver, resolver } from '../
|
|
2
|
-
import { typeDef } from '../
|
|
1
|
+
import { rootResolver, resolver } from '../server/resolver.js';
|
|
2
|
+
import { typeDef } from '../server/typeDef.js';
|
|
3
3
|
|
|
4
4
|
// all default definitions of a single pathway
|
|
5
5
|
export default {
|