quintessentia 1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. quintessentia-1.1/LICENSE +21 -0
  2. quintessentia-1.1/MANIFEST.in +3 -0
  3. quintessentia-1.1/PKG-INFO +279 -0
  4. quintessentia-1.1/README.md +223 -0
  5. quintessentia-1.1/pyproject.toml +16 -0
  6. quintessentia-1.1/quint/__init__.py +8 -0
  7. quintessentia-1.1/quint/api/__init__.py +0 -0
  8. quintessentia-1.1/quint/api/fast.py +207 -0
  9. quintessentia-1.1/quint/chunking/__init__.py +8 -0
  10. quintessentia-1.1/quint/chunking/generate.py +46 -0
  11. quintessentia-1.1/quint/chunking/similarities.py +54 -0
  12. quintessentia-1.1/quint/data/__init__.py +0 -0
  13. quintessentia-1.1/quint/data/youtube.py +40 -0
  14. quintessentia-1.1/quint/highlighting/__init__.py +8 -0
  15. quintessentia-1.1/quint/highlighting/highlights.py +25 -0
  16. quintessentia-1.1/quint/highlighting/words_outline.py +14 -0
  17. quintessentia-1.1/quint/params.py +1 -0
  18. quintessentia-1.1/quint/preprocessing/__init__.py +0 -0
  19. quintessentia-1.1/quint/preprocessing/audio.py +48 -0
  20. quintessentia-1.1/quint/summarizing/__init__.py +0 -0
  21. quintessentia-1.1/quint/summarizing/summarizer.py +55 -0
  22. quintessentia-1.1/quint/tools/__init__.py +8 -0
  23. quintessentia-1.1/quint/tools/activations.py +5 -0
  24. quintessentia-1.1/quint/tools/embedding.py +28 -0
  25. quintessentia-1.1/quint/tools/time.py +17 -0
  26. quintessentia-1.1/quint/transcription/__init__.py +8 -0
  27. quintessentia-1.1/quint/transcription/transcriber.py +100 -0
  28. quintessentia-1.1/quintessentia.egg-info/PKG-INFO +279 -0
  29. quintessentia-1.1/quintessentia.egg-info/SOURCES.txt +35 -0
  30. quintessentia-1.1/quintessentia.egg-info/dependency_links.txt +1 -0
  31. quintessentia-1.1/quintessentia.egg-info/requires.txt +22 -0
  32. quintessentia-1.1/quintessentia.egg-info/top_level.txt +1 -0
  33. quintessentia-1.1/requirements.txt +35 -0
  34. quintessentia-1.1/setup.cfg +4 -0
  35. quintessentia-1.1/setup.py +43 -0
  36. quintessentia-1.1/tests/test_jax_device.py +20 -0
  37. quintessentia-1.1/tests/test_youtube_downloader.py +31 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023-2026 Polovinkin Nikita
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ include requirements.txt
2
+ include README.md
3
+ include LICENSE
@@ -0,0 +1,279 @@
1
+ Metadata-Version: 2.4
2
+ Name: quintessentia
3
+ Version: 1.1
4
+ Summary: Transcribe, chunk and summarize podcasts (FastAPI + Whisper + OpenAI)
5
+ Home-page: https://github.com/poloniki/quint
6
+ Author: Polovinkin Nikita
7
+ License: MIT
8
+ Project-URL: Source, https://github.com/poloniki/quint
9
+ Project-URL: Issues, https://github.com/poloniki/quint/issues
10
+ Keywords: whisper,transcription,summarization,podcast,fastapi,nlp
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Framework :: FastAPI
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: numpy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: scikit-learn
25
+ Requires-Dist: pytest
26
+ Requires-Dist: fastapi
27
+ Requires-Dist: uvicorn
28
+ Requires-Dist: sentence_transformers
29
+ Requires-Dist: python-multipart
30
+ Requires-Dist: pydub
31
+ Requires-Dist: soundfile
32
+ Requires-Dist: protobuf==3.20.2
33
+ Requires-Dist: spacy~=3.4.4
34
+ Requires-Dist: scipy~=1.9.3
35
+ Requires-Dist: pydantic
36
+ Requires-Dist: nltk~=3.8.1
37
+ Requires-Dist: ml-dtypes==0.2.0
38
+ Requires-Dist: pytube
39
+ Requires-Dist: cached_property
40
+ Requires-Dist: gunicorn
41
+ Requires-Dist: mock
42
+ Requires-Dist: pysbd
43
+ Requires-Dist: openai~=0.28.1
44
+ Dynamic: author
45
+ Dynamic: classifier
46
+ Dynamic: description
47
+ Dynamic: description-content-type
48
+ Dynamic: home-page
49
+ Dynamic: keywords
50
+ Dynamic: license
51
+ Dynamic: license-file
52
+ Dynamic: project-url
53
+ Dynamic: requires-dist
54
+ Dynamic: requires-python
55
+ Dynamic: summary
56
+
57
+ # Quint: transcribe | chunk | summarize
58
+
59
+ <p align="center">
60
+ <img src="https://raw.githubusercontent.com/poloniki/quint/master/frontend/logo.png" alt="Quint logo">
61
+ </p>
62
+
63
+ <p align="center">
64
+ <a href="https://github.com/poloniki/quint/actions/workflows/build.yml">
65
+ <img src="https://img.shields.io/github/actions/workflow/status/poloniki/quint/build.yml?branch=master&style=for-the-badge&logo=github&label=CI" alt="CI">
66
+ </a>
67
+ <a href="LICENSE">
68
+ <img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT">
69
+ </a>
70
+ <a href="https://fastapi.tiangolo.com">
71
+ <img src="https://img.shields.io/badge/FastAPI-005571?style=for-the-badge&logo=fastapi" alt="FastAPI">
72
+ </a>
73
+ <a href="https://www.python.org/downloads/release/python-3100/">
74
+ <img src="https://img.shields.io/badge/python-3.10-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54" alt="Python 3.10">
75
+ </a>
76
+ <a href="https://hub.docker.com/r/poloniki/quint">
77
+ <img src="https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white" alt="Docker">
78
+ </a>
79
+ </p>
80
+
81
+ "Quint" is designed to enhance the podcast experience. It simplifies the process for users, making it easier for them to understand and navigate podcasts by providing concise summaries, highlights, and transcripts.
82
+
83
+ ## Table of Contents
84
+
85
+ - [Main Functionality](#-main-functionality)
86
+ - [Quickstart](#-quickstart)
87
+ - [License](#-license)
88
+ - [Deploy on a GPU cloud](#-how-to-deploy-this-api-on-cloud)
89
+
90
+ ## πŸš€ Main Functionality
91
+
92
+ Below is a list of the core API endpoints offered by Quint:
93
+
94
+ Once the API is running (see [Quickstart](#-quickstart)), interactive docs are available at `/docs`.
95
+
96
+ ### πŸŽ₯ YouTube Video Transcription
97
+
98
+ Provide a YouTube video ID. Quint fetches the video, extracts its audio, and returns a transcription.
99
+
100
+ ```http
101
+ GET /youtube_transcript?video_id=YOUR_YOUTUBE_VIDEO_ID
102
+ ```
103
+
104
+ ```json
105
+ { "transcript": "The transcribed text of the video goes here..." }
106
+ ```
107
+
108
+ ### πŸŽ™οΈ Transcription from Audio File
109
+
110
+ Upload an audio file and receive its transcription in text format.
111
+
112
+ ```http
113
+ POST /file_transcript
114
+ ```
115
+
116
+ ```json
117
+ { "transcript": "The transcribed text of the audio goes here..." }
118
+ ```
119
+
120
+ ### πŸ“œ Text Chunking
121
+
122
+ Submit a lengthy text and get it divided into semantically meaningful chunks or paragraphs.
123
+
124
+ ```http
125
+ POST /chunk
126
+ { "body": "Your lengthy continuous text here..." }
127
+ ```
128
+
129
+ ```json
130
+ { "output": ["Chunk 1", "Chunk 2", "..."] }
131
+ ```
132
+
133
+ ### 🌟 Highlight the Best Sentence
134
+
135
+ Submit a text and Quint returns the index of the most descriptive sentence based on the embeddings.
136
+
137
+ ```http
138
+ POST /best_sentence
139
+ { "body": "Your raw text here..." }
140
+ ```
141
+
142
+ ```json
143
+ { "best_sentence_index": 5 }
144
+ ```
145
+
146
+ ### πŸ“ YouTube Summary
147
+
148
+ Provide a YouTube video ID to get back a list of chunked summaries of the video.
149
+
150
+ ```http
151
+ GET /youtube_summarize?video_id=YOUR_YOUTUBE_VIDEO_ID
152
+ ```
153
+
154
+ ```json
155
+ { "summary": ["Summary of part 1", "Summary of part 2", "..."] }
156
+ ```
157
+
158
+ ## πŸ§‘β€πŸ’» Quickstart
159
+
160
+ Run the API locally β€” CPU is fine for chunking and summarization; transcription is far faster on a GPU (see [deploy](#-how-to-deploy-this-api-on-cloud)).
161
+
162
+ ```shell
163
+ git clone https://github.com/poloniki/quint.git
164
+ cd quint
165
+ make install # pip install -e .
166
+ cp env.sample .env # then set OPENAI_API_KEY
167
+ make run_api # serves on http://localhost:8083
168
+ ```
169
+
170
+ Then open `http://localhost:8083/docs` for the interactive API docs.
171
+
172
+ ### Web UI (optional)
173
+
174
+ A small [Streamlit](https://streamlit.io) frontend lives in [`frontend/`](frontend/app.py). With the API running:
175
+
176
+ ```shell
177
+ pip install -r frontend/requirements.txt
178
+ streamlit run frontend/app.py
179
+ ```
180
+
181
+ Set `QUINT_API_URL` if the API isn't on `http://localhost:8083`.
182
+
183
+ ## πŸ“– License
184
+
185
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
186
+
187
+ ## πŸ›œ How to deploy this API on cloud
188
+
189
+ Important note: I highly recommend using the JAX solution, as it is much faster than the OpenAI-proposed way. Please refer to this repo [Whisper JAX](https://github.com/sanchit-gandhi/whisper-jax) for more details. I will attach one of the tables from that repo:
190
+
191
+ **Table 1:** Average inference time in seconds for audio files of increasing length. GPU device is a single A100 40GB GPU.
192
+ TPU device is a single TPU v4-8.
193
+
194
+ <div align="center">
195
+
196
+ | | OpenAI | Transformers | Whisper JAX | Whisper JAX |
197
+ | --------- | ------- | ------------ | ----------- | ----------- |
198
+ | | | | | |
199
+ | Framework | PyTorch | PyTorch | JAX | JAX |
200
+ | Backend | GPU | GPU | GPU | TPU |
201
+ | | | | | |
202
+ | 1 min | 13.8 | 4.54 | 1.72 | 0.45 |
203
+ | 10 min | 108.3 | 20.2 | 9.38 | 2.01 |
204
+ | 1 hour | 1001.0 | 126.1 | 75.3 | 13.8 |
205
+ | | | | | |
206
+
207
+ </div>
208
+
209
+ ### Choosing a GPU cloud provider
210
+
211
+ Quint runs on any machine with an NVIDIA GPU, so you are free to use whichever cloud provider (AWS, GCP, Azure, Lambda, Paperspace, RunPod, …) or on-prem hardware you prefer. For the best price/performance on transcription, look for an **Ada-generation card** such as the RTX 6000 Ada or A6000 β€” these are typically far cheaper than A100-class GPUs while offering comparable [CUDA compute capability](https://developer.nvidia.com/cuda-gpus).
212
+
213
+ Whatever you pick, you only need an instance that provides:
214
+
215
+ - An **NVIDIA GPU** (Ampere/Ada or newer recommended)
216
+ - **Ubuntu 22.04** (or similar) with **CUDA 12** and **Docker**
217
+ - **SSH access** (root or sudo)
218
+
219
+ The steps below are provider-neutral: provision the instance however your provider requires, then follow along.
220
+
221
+ ### 1. Configure your environment
222
+
223
+ ```shell
224
+ cp env.sample .env # then edit .env
225
+ direnv reload # or: source .env
226
+ ```
227
+
228
+ Set the following in `.env`:
229
+
230
+ | Variable | Used by | Purpose |
231
+ | --- | --- | --- |
232
+ | `OPENAI_API_KEY` | API (summarization) | Key for the summarization step |
233
+ | `GPU_TYPE` | API (optional) | Set to `A100` to enable bfloat16 on the JAX backend; any other value (or unset) uses float16 |
234
+ | `EMAIL` | deploy helper | Labels / generates your SSH key |
235
+ | `HOST` | deploy helper | Public IP or hostname of your GPU instance |
236
+ | `SSH_USER` | deploy helper | SSH login user for your image (often `root`, but `ubuntu` on AWS, your username on GCP, `azureuser` on Azure) |
237
+
238
+ ### 2. Provision and connect to the instance
239
+
240
+ Create a GPU instance with your provider using an **Ubuntu 22.04 + CUDA 12 + Docker** image and your SSH public key. Once it is running, note its public IP (set it as `HOST` in `.env`) and connect:
241
+
242
+ ```shell
243
+ ssh $SSH_USER@$HOST -i ~/.ssh/<your_key>
244
+ ```
245
+
246
+ > Use the login user your provider specifies for the image. `root` works on many bare-VM providers, but AWS Ubuntu AMIs use `ubuntu`, GCP uses your username, Azure uses `azureuser`, etc. Set it as `SSH_USER` in `.env`.
247
+
248
+ The notebook [`notebooks/Deploy_gpu_instance.ipynb`](notebooks/Deploy_gpu_instance.ipynb) automates the provider-neutral parts: generating an SSH key, copying the code to the host, and building/running the container.
249
+
250
+ ### 3. Install NVIDIA drivers (if your image doesn't include them)
251
+
252
+ If the instance image already ships with working drivers, skip this. Otherwise run the bundled script on the instance and reboot to load them:
253
+
254
+ ```shell
255
+ bash scripts/install_nvidia_driver.sh
256
+ sudo reboot
257
+ ```
258
+
259
+ ### 4. Get the code onto the instance
260
+
261
+ Clone it directly:
262
+
263
+ ```shell
264
+ git clone https://github.com/poloniki/quint.git
265
+ cd quint
266
+ ```
267
+
268
+ …or copy your local checkout up with `scp` (the deploy notebook does this for you).
269
+
270
+ ### 5. Build and run
271
+
272
+ ```shell
273
+ docker build -t quint --file Dockerfile.jax .
274
+ docker run --gpus all -p 80:80 --shm-size=1g --env-file .env quint
275
+ ```
276
+
277
+ > The `--env-file .env` flag passes `OPENAI_API_KEY` (and optional `GPU_TYPE`) into the container, so make sure `.env` is present on the instance. Also ensure your provider's firewall / security group allows inbound TCP on port **80** β€” most clouds only open SSH (port 22) by default.
278
+
279
+ Your API is now available on the instance's public IP (port 80).
@@ -0,0 +1,223 @@
1
+ # Quint: transcribe | chunk | summarize
2
+
3
+ <p align="center">
4
+ <img src="https://raw.githubusercontent.com/poloniki/quint/master/frontend/logo.png" alt="Quint logo">
5
+ </p>
6
+
7
+ <p align="center">
8
+ <a href="https://github.com/poloniki/quint/actions/workflows/build.yml">
9
+ <img src="https://img.shields.io/github/actions/workflow/status/poloniki/quint/build.yml?branch=master&style=for-the-badge&logo=github&label=CI" alt="CI">
10
+ </a>
11
+ <a href="LICENSE">
12
+ <img src="https://img.shields.io/badge/License-MIT-green?style=for-the-badge" alt="License: MIT">
13
+ </a>
14
+ <a href="https://fastapi.tiangolo.com">
15
+ <img src="https://img.shields.io/badge/FastAPI-005571?style=for-the-badge&logo=fastapi" alt="FastAPI">
16
+ </a>
17
+ <a href="https://www.python.org/downloads/release/python-3100/">
18
+ <img src="https://img.shields.io/badge/python-3.10-3670A0?style=for-the-badge&logo=python&logoColor=ffdd54" alt="Python 3.10">
19
+ </a>
20
+ <a href="https://hub.docker.com/r/poloniki/quint">
21
+ <img src="https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white" alt="Docker">
22
+ </a>
23
+ </p>
24
+
25
+ "Quint" is designed to enhance the podcast experience. It simplifies the process for users, making it easier for them to understand and navigate podcasts by providing concise summaries, highlights, and transcripts.
26
+
27
+ ## Table of Contents
28
+
29
+ - [Main Functionality](#-main-functionality)
30
+ - [Quickstart](#-quickstart)
31
+ - [License](#-license)
32
+ - [Deploy on a GPU cloud](#-how-to-deploy-this-api-on-cloud)
33
+
34
+ ## πŸš€ Main Functionality
35
+
36
+ Below is a list of the core API endpoints offered by Quint:
37
+
38
+ Once the API is running (see [Quickstart](#-quickstart)), interactive docs are available at `/docs`.
39
+
40
+ ### πŸŽ₯ YouTube Video Transcription
41
+
42
+ Provide a YouTube video ID. Quint fetches the video, extracts its audio, and returns a transcription.
43
+
44
+ ```http
45
+ GET /youtube_transcript?video_id=YOUR_YOUTUBE_VIDEO_ID
46
+ ```
47
+
48
+ ```json
49
+ { "transcript": "The transcribed text of the video goes here..." }
50
+ ```
51
+
52
+ ### πŸŽ™οΈ Transcription from Audio File
53
+
54
+ Upload an audio file and receive its transcription in text format.
55
+
56
+ ```http
57
+ POST /file_transcript
58
+ ```
59
+
60
+ ```json
61
+ { "transcript": "The transcribed text of the audio goes here..." }
62
+ ```
63
+
64
+ ### πŸ“œ Text Chunking
65
+
66
+ Submit a lengthy text and get it divided into semantically meaningful chunks or paragraphs.
67
+
68
+ ```http
69
+ POST /chunk
70
+ { "body": "Your lengthy continuous text here..." }
71
+ ```
72
+
73
+ ```json
74
+ { "output": ["Chunk 1", "Chunk 2", "..."] }
75
+ ```
76
+
77
+ ### 🌟 Highlight the Best Sentence
78
+
79
+ Submit a text and Quint returns the index of the most descriptive sentence based on the embeddings.
80
+
81
+ ```http
82
+ POST /best_sentence
83
+ { "body": "Your raw text here..." }
84
+ ```
85
+
86
+ ```json
87
+ { "best_sentence_index": 5 }
88
+ ```
89
+
90
+ ### πŸ“ YouTube Summary
91
+
92
+ Provide a YouTube video ID to get back a list of chunked summaries of the video.
93
+
94
+ ```http
95
+ GET /youtube_summarize?video_id=YOUR_YOUTUBE_VIDEO_ID
96
+ ```
97
+
98
+ ```json
99
+ { "summary": ["Summary of part 1", "Summary of part 2", "..."] }
100
+ ```
101
+
102
+ ## πŸ§‘β€πŸ’» Quickstart
103
+
104
+ Run the API locally β€” CPU is fine for chunking and summarization; transcription is far faster on a GPU (see [deploy](#-how-to-deploy-this-api-on-cloud)).
105
+
106
+ ```shell
107
+ git clone https://github.com/poloniki/quint.git
108
+ cd quint
109
+ make install # pip install -e .
110
+ cp env.sample .env # then set OPENAI_API_KEY
111
+ make run_api # serves on http://localhost:8083
112
+ ```
113
+
114
+ Then open `http://localhost:8083/docs` for the interactive API docs.
115
+
116
+ ### Web UI (optional)
117
+
118
+ A small [Streamlit](https://streamlit.io) frontend lives in [`frontend/`](frontend/app.py). With the API running:
119
+
120
+ ```shell
121
+ pip install -r frontend/requirements.txt
122
+ streamlit run frontend/app.py
123
+ ```
124
+
125
+ Set `QUINT_API_URL` if the API isn't on `http://localhost:8083`.
126
+
127
+ ## πŸ“– License
128
+
129
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
130
+
131
+ ## πŸ›œ How to deploy this API on cloud
132
+
133
+ Important note: I highly recommend using the JAX solution, as it is much faster than the OpenAI-proposed way. Please refer to this repo [Whisper JAX](https://github.com/sanchit-gandhi/whisper-jax) for more details. I will attach one of the tables from that repo:
134
+
135
+ **Table 1:** Average inference time in seconds for audio files of increasing length. GPU device is a single A100 40GB GPU.
136
+ TPU device is a single TPU v4-8.
137
+
138
+ <div align="center">
139
+
140
+ | | OpenAI | Transformers | Whisper JAX | Whisper JAX |
141
+ | --------- | ------- | ------------ | ----------- | ----------- |
142
+ | | | | | |
143
+ | Framework | PyTorch | PyTorch | JAX | JAX |
144
+ | Backend | GPU | GPU | GPU | TPU |
145
+ | | | | | |
146
+ | 1 min | 13.8 | 4.54 | 1.72 | 0.45 |
147
+ | 10 min | 108.3 | 20.2 | 9.38 | 2.01 |
148
+ | 1 hour | 1001.0 | 126.1 | 75.3 | 13.8 |
149
+ | | | | | |
150
+
151
+ </div>
152
+
153
+ ### Choosing a GPU cloud provider
154
+
155
+ Quint runs on any machine with an NVIDIA GPU, so you are free to use whichever cloud provider (AWS, GCP, Azure, Lambda, Paperspace, RunPod, …) or on-prem hardware you prefer. For the best price/performance on transcription, look for an **Ada-generation card** such as the RTX 6000 Ada or A6000 β€” these are typically far cheaper than A100-class GPUs while offering comparable [CUDA compute capability](https://developer.nvidia.com/cuda-gpus).
156
+
157
+ Whatever you pick, you only need an instance that provides:
158
+
159
+ - An **NVIDIA GPU** (Ampere/Ada or newer recommended)
160
+ - **Ubuntu 22.04** (or similar) with **CUDA 12** and **Docker**
161
+ - **SSH access** (root or sudo)
162
+
163
+ The steps below are provider-neutral: provision the instance however your provider requires, then follow along.
164
+
165
+ ### 1. Configure your environment
166
+
167
+ ```shell
168
+ cp env.sample .env # then edit .env
169
+ direnv reload # or: source .env
170
+ ```
171
+
172
+ Set the following in `.env`:
173
+
174
+ | Variable | Used by | Purpose |
175
+ | --- | --- | --- |
176
+ | `OPENAI_API_KEY` | API (summarization) | Key for the summarization step |
177
+ | `GPU_TYPE` | API (optional) | Set to `A100` to enable bfloat16 on the JAX backend; any other value (or unset) uses float16 |
178
+ | `EMAIL` | deploy helper | Labels / generates your SSH key |
179
+ | `HOST` | deploy helper | Public IP or hostname of your GPU instance |
180
+ | `SSH_USER` | deploy helper | SSH login user for your image (often `root`, but `ubuntu` on AWS, your username on GCP, `azureuser` on Azure) |
181
+
182
+ ### 2. Provision and connect to the instance
183
+
184
+ Create a GPU instance with your provider using an **Ubuntu 22.04 + CUDA 12 + Docker** image and your SSH public key. Once it is running, note its public IP (set it as `HOST` in `.env`) and connect:
185
+
186
+ ```shell
187
+ ssh $SSH_USER@$HOST -i ~/.ssh/<your_key>
188
+ ```
189
+
190
+ > Use the login user your provider specifies for the image. `root` works on many bare-VM providers, but AWS Ubuntu AMIs use `ubuntu`, GCP uses your username, Azure uses `azureuser`, etc. Set it as `SSH_USER` in `.env`.
191
+
192
+ The notebook [`notebooks/Deploy_gpu_instance.ipynb`](notebooks/Deploy_gpu_instance.ipynb) automates the provider-neutral parts: generating an SSH key, copying the code to the host, and building/running the container.
193
+
194
+ ### 3. Install NVIDIA drivers (if your image doesn't include them)
195
+
196
+ If the instance image already ships with working drivers, skip this. Otherwise run the bundled script on the instance and reboot to load them:
197
+
198
+ ```shell
199
+ bash scripts/install_nvidia_driver.sh
200
+ sudo reboot
201
+ ```
202
+
203
+ ### 4. Get the code onto the instance
204
+
205
+ Clone it directly:
206
+
207
+ ```shell
208
+ git clone https://github.com/poloniki/quint.git
209
+ cd quint
210
+ ```
211
+
212
+ …or copy your local checkout up with `scp` (the deploy notebook does this for you).
213
+
214
+ ### 5. Build and run
215
+
216
+ ```shell
217
+ docker build -t quint --file Dockerfile.jax .
218
+ docker run --gpus all -p 80:80 --shm-size=1g --env-file .env quint
219
+ ```
220
+
221
+ > The `--env-file .env` flag passes `OPENAI_API_KEY` (and optional `GPU_TYPE`) into the container, so make sure `.env` is present on the instance. Also ensure your provider's firewall / security group allows inbound TCP on port **80** β€” most clouds only open SSH (port 22) by default.
222
+
223
+ Your API is now available on the instance's public IP (port 80).
@@ -0,0 +1,16 @@
1
+ # Build backend + tooling config. Package metadata lives in setup.py and
2
+ # dependencies in requirements.txt.
3
+
4
+ [build-system]
5
+ requires = ["setuptools>=61", "wheel"]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+ [tool.ruff]
9
+ target-version = "py310"
10
+ line-length = 88
11
+ # Notebooks are exploratory; skip them.
12
+ extend-exclude = ["notebooks"]
13
+
14
+ [tool.ruff.lint]
15
+ # Mirror ruff's default rule set explicitly so CI is deterministic across versions.
16
+ select = ["E4", "E7", "E9", "F"]
@@ -0,0 +1,8 @@
1
+ from os.path import isfile
2
+ from os.path import dirname
3
+
4
+ version_file = '{}/version.txt'.format(dirname(__file__))
5
+
6
+ if isfile(version_file):
7
+ with open(version_file) as version_file:
8
+ __version__ = version_file.read().strip()
File without changes