gst-python-ml 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gst_python_ml-0.1.0/COPYING +16 -0
- gst_python_ml-0.1.0/MANIFEST.in +8 -0
- gst_python_ml-0.1.0/PKG-INFO +382 -0
- gst_python_ml-0.1.0/README.md +328 -0
- gst_python_ml-0.1.0/plugins/python/__init__.py +0 -0
- gst_python_ml-0.1.0/plugins/python/analytics_utils.py +72 -0
- gst_python_ml-0.1.0/plugins/python/caption.py +222 -0
- gst_python_ml-0.1.0/plugins/python/coquitts.py +90 -0
- gst_python_ml-0.1.0/plugins/python/engine/__init__.py +0 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_device_queue_pool.py +121 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_engine_factory.py +130 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_ml_engine.py +75 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_onnx_engine.py +99 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_openvino_engine.py +150 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_pytorch_engine.py +376 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_pytorch_yolo_engine.py +74 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_tensorflow_engine.py +72 -0
- gst_python_ml-0.1.0/plugins/python/engine/gst_tflite_engine.py +105 -0
- gst_python_ml-0.1.0/plugins/python/gst_aggregator.py +236 -0
- gst_python_ml-0.1.0/plugins/python/gst_base_transform.py +232 -0
- gst_python_ml-0.1.0/plugins/python/gst_feature_manager.py +109 -0
- gst_python_ml-0.1.0/plugins/python/gst_llm.py +108 -0
- gst_python_ml-0.1.0/plugins/python/gst_object_detector.py +308 -0
- gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info/PKG-INFO +382 -0
- gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info/SOURCES.txt +51 -0
- gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info/dependency_links.txt +1 -0
- gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info/requires.txt +31 -0
- gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info/top_level.txt +2 -0
- gst_python_ml-0.1.0/plugins/python/gst_transcribe.py +266 -0
- gst_python_ml-0.1.0/plugins/python/gst_translate.py +165 -0
- gst_python_ml-0.1.0/plugins/python/gst_tts.py +202 -0
- gst_python_ml-0.1.0/plugins/python/gst_video_transform.py +84 -0
- gst_python_ml-0.1.0/plugins/python/kafkasink.py +330 -0
- gst_python_ml-0.1.0/plugins/python/llm.py +46 -0
- gst_python_ml-0.1.0/plugins/python/mariantranslate.py +48 -0
- gst_python_ml-0.1.0/plugins/python/maskrcnn.py +115 -0
- gst_python_ml-0.1.0/plugins/python/objectdetector.py +58 -0
- gst_python_ml-0.1.0/plugins/python/overlay.py +197 -0
- gst_python_ml-0.1.0/plugins/python/overlay_counter.py +70 -0
- gst_python_ml-0.1.0/plugins/python/overlay_skia.py +351 -0
- gst_python_ml-0.1.0/plugins/python/overlay_utils.py +316 -0
- gst_python_ml-0.1.0/plugins/python/stablediffusion.py +163 -0
- gst_python_ml-0.1.0/plugins/python/streamdemux.py +128 -0
- gst_python_ml-0.1.0/plugins/python/streammux.py +143 -0
- gst_python_ml-0.1.0/plugins/python/utils.py +44 -0
- gst_python_ml-0.1.0/plugins/python/whisperlive.py +219 -0
- gst_python_ml-0.1.0/plugins/python/whisperspeechtts.py +106 -0
- gst_python_ml-0.1.0/plugins/python/whispertranscribe.py +79 -0
- gst_python_ml-0.1.0/plugins/python/yolo.py +227 -0
- gst_python_ml-0.1.0/pyproject.toml +3 -0
- gst_python_ml-0.1.0/requirements.txt +12 -0
- gst_python_ml-0.1.0/setup.cfg +4 -0
- gst_python_ml-0.1.0/setup.py +41 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Copyright (C) 2024-2025 Collabora Ltd.
|
|
2
|
+
|
|
3
|
+
This library is free software; you can redistribute it and/or
|
|
4
|
+
modify it under the terms of the GNU Library General Public
|
|
5
|
+
License as published by the Free Software Foundation; either
|
|
6
|
+
version 2 of the License, or (at your option) any later version.
|
|
7
|
+
|
|
8
|
+
This library is distributed in the hope that it will be useful,
|
|
9
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
11
|
+
Library General Public License for more details.
|
|
12
|
+
|
|
13
|
+
You should have received a copy of the GNU Library General Public
|
|
14
|
+
License along with this library; if not, write to the
|
|
15
|
+
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
16
|
+
Boston, MA 02110-1301, USA.
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: gst-python-ml
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: An ML package for GStreamer
|
|
5
|
+
Home-page: https://github.com/collabora/gst-python-ml
|
|
6
|
+
Author: Aaron Boxer
|
|
7
|
+
Author-email: aaron.boxer@collabora.com
|
|
8
|
+
License: LGPL-3.0
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.6
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: COPYING
|
|
15
|
+
Requires-Dist: pygobject
|
|
16
|
+
Requires-Dist: torch
|
|
17
|
+
Requires-Dist: torchvision
|
|
18
|
+
Requires-Dist: torchaudio
|
|
19
|
+
Requires-Dist: transformers
|
|
20
|
+
Requires-Dist: numpy
|
|
21
|
+
Requires-Dist: huggingface-hub
|
|
22
|
+
Requires-Dist: lap
|
|
23
|
+
Requires-Dist: ultralytics
|
|
24
|
+
Requires-Dist: pycairo
|
|
25
|
+
Provides-Extra: kafka
|
|
26
|
+
Requires-Dist: confluent-kafka; extra == "kafka"
|
|
27
|
+
Provides-Extra: captioning
|
|
28
|
+
Requires-Dist: ninja; extra == "captioning"
|
|
29
|
+
Requires-Dist: flash_attn; extra == "captioning"
|
|
30
|
+
Provides-Extra: language
|
|
31
|
+
Requires-Dist: nvidia-cudnn-cu11==8.9.6.50; extra == "language"
|
|
32
|
+
Requires-Dist: sentencepiece; extra == "language"
|
|
33
|
+
Requires-Dist: protobuf; extra == "language"
|
|
34
|
+
Requires-Dist: accelerate; extra == "language"
|
|
35
|
+
Requires-Dist: pysilero; extra == "language"
|
|
36
|
+
Requires-Dist: faster_whisper; extra == "language"
|
|
37
|
+
Requires-Dist: pysilero-vad; extra == "language"
|
|
38
|
+
Requires-Dist: diffusers; extra == "language"
|
|
39
|
+
Requires-Dist: whisperspeech; extra == "language"
|
|
40
|
+
Requires-Dist: webdataset; extra == "language"
|
|
41
|
+
Requires-Dist: sacremoses; extra == "language"
|
|
42
|
+
Requires-Dist: coqui-tts; extra == "language"
|
|
43
|
+
Dynamic: author
|
|
44
|
+
Dynamic: author-email
|
|
45
|
+
Dynamic: classifier
|
|
46
|
+
Dynamic: description
|
|
47
|
+
Dynamic: description-content-type
|
|
48
|
+
Dynamic: home-page
|
|
49
|
+
Dynamic: license
|
|
50
|
+
Dynamic: provides-extra
|
|
51
|
+
Dynamic: requires-dist
|
|
52
|
+
Dynamic: requires-python
|
|
53
|
+
Dynamic: summary
|
|
54
|
+
|
|
55
|
+
# GStreamer Python ML
|
|
56
|
+
|
|
57
|
+
This project provides Python base classes and GStreamer elements supporting a broad range
|
|
58
|
+
of ML features.
|
|
59
|
+
|
|
60
|
+
Supported functionality includes:
|
|
61
|
+
|
|
62
|
+
1. object detection
|
|
63
|
+
1. tracking
|
|
64
|
+
1. video captioning
|
|
65
|
+
1. translation
|
|
66
|
+
1. transcription
|
|
67
|
+
1. speech to text
|
|
68
|
+
1. text to speech
|
|
69
|
+
1. text to image
|
|
70
|
+
1. LLMs
|
|
71
|
+
1. serializing model metadata to Kafka server
|
|
72
|
+
|
|
73
|
+
ML toolkits are supported via the `MLEngine` abstraction - we have nominal support for
|
|
74
|
+
TensorFlow, LiteRT and OpenVINO, but all testing thus far has been done with PyTorch.
|
|
75
|
+
|
|
76
|
+
These elements will work with your distribution's GStreamer packages. They have been tested on Ubuntu 24 with GStreamer 1.24.
|
|
77
|
+
|
|
78
|
+
## Python Version
|
|
79
|
+
|
|
80
|
+
All elements have been tested with Python 3.12, the installed version of Python on Ubuntu 24
|
|
81
|
+
|
|
82
|
+
## Install
|
|
83
|
+
|
|
84
|
+
There are two installation options described below: installing on your host machine,
|
|
85
|
+
or installing with a Docker container:
|
|
86
|
+
|
|
87
|
+
### Host Install (Ubuntu 24)
|
|
88
|
+
|
|
89
|
+
#### Install packages
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
sudo apt update && sudo apt -y upgrade
|
|
93
|
+
sudo apt install -y python3-pip python3-venv \
|
|
94
|
+
gstreamer1.0-plugins-base gstreamer1.0-plugins-base-apps \
|
|
95
|
+
gstreamer1.0-plugins-good gstreamer1.0-plugins-bad \
|
|
96
|
+
gir1.2-gst-plugins-bad-1.0 python3-gst-1.0 gstreamer1.0-python3-plugin-loader \
|
|
97
|
+
libcairo2 libcairo2-dev git
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
#### Install venv
|
|
101
|
+
|
|
102
|
+
`python3 -m venv --system-site-packages ~/venv`
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
#### Clone repo (host)
|
|
106
|
+
|
|
107
|
+
`git clone https://github.com/collabora/gst-python-ml.git`
|
|
108
|
+
|
|
109
|
+
#### Update .bashrc
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
export VIRTUAL_ENV=$HOME/venv
|
|
113
|
+
export PATH=$VIRTUAL_ENV/bin:$PATH
|
|
114
|
+
export GST_PLUGIN_PATH=$HOME/src/gst-python-ml/plugins
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
and then
|
|
118
|
+
|
|
119
|
+
`source ~/.bashrc`
|
|
120
|
+
|
|
121
|
+
#### Activate venv and install basic pip packages
|
|
122
|
+
|
|
123
|
+
```
|
|
124
|
+
source $VIRTUAL_ENV/bin/activate
|
|
125
|
+
pip install --upgrade pip
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
#### Install pip requirements
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
cd ~/src/gst-python-ml
|
|
132
|
+
pip install -r requirements.txt
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Docker Install
|
|
136
|
+
|
|
137
|
+
#### Build Docker Container
|
|
138
|
+
|
|
139
|
+
Important Note:
|
|
140
|
+
|
|
141
|
+
This Dockerfile maps a local `gst-python-ml` repository to the container,
|
|
142
|
+
and expects this repository to be located in `~/src` i.e. `~/src/gst-python-ml`.
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
#### Enable Docker GPU Support on Host
|
|
146
|
+
|
|
147
|
+
To use the host GPU in a docker container, you will need to install the nvidia container toolkit. If running on CPU, these steps can be skipped.
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
Add nvidia repository (Ubuntu)
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
|
|
154
|
+
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
|
155
|
+
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
|
|
156
|
+
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Then
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
sudo apt update
|
|
163
|
+
sudo apt install -y nvidia-container-toolkit
|
|
164
|
+
sudo systemctl restart docker
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
#### Build Ubuntu 24.04 Container
|
|
168
|
+
`docker build -f ./Dockerfile -t ubuntu24:latest .`
|
|
169
|
+
|
|
170
|
+
#### Run Docker Container
|
|
171
|
+
|
|
172
|
+
a) If running on CPU, just remove `--gpus all` from command below
|
|
173
|
+
b) This command assumes you have set up a Kafka network as described below
|
|
174
|
+
|
|
175
|
+
`docker run -v ~/src/gst-python-ml/:/root/gst-python-ml -it --rm --gpus all --name ubuntu24 ubuntu24:latest /bin/bash`
|
|
176
|
+
|
|
177
|
+
In the container shell, run
|
|
178
|
+
|
|
179
|
+
`pip install -r requirements.txt`
|
|
180
|
+
|
|
181
|
+
to install base requirements, and then
|
|
182
|
+
|
|
183
|
+
`cd gst-python-ml` to run the pipelines below. After installing requirements,
|
|
184
|
+
it is recommended to open another terminal on host and run
|
|
185
|
+
|
|
186
|
+
`docker ps` to get the container id, and then run
|
|
187
|
+
|
|
188
|
+
`docker commit $CONTAINER_ID` to commit the changes, where `$CONTAINER_ID`
|
|
189
|
+
is the id for your docker instance.
|
|
190
|
+
|
|
191
|
+
#### Docker Cleanup
|
|
192
|
+
|
|
193
|
+
If you want to purge existing docker containers and images:
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
docker container prune -f
|
|
197
|
+
docker image prune -a -f
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## IMPORTANT NOTE
|
|
201
|
+
|
|
202
|
+
To use the language elements included in this project, the `nvidia-cuda-toolkit`
|
|
203
|
+
ubuntu package must be installed, and additional pip requirements must be installed from
|
|
204
|
+
`requirements/language_requrements.txt`
|
|
205
|
+
|
|
206
|
+
## Post Install
|
|
207
|
+
|
|
208
|
+
Run `gst-inspect-1.0 python` to see all of the pyml elements listed.
|
|
209
|
+
|
|
210
|
+
# Building PyPI Package
|
|
211
|
+
|
|
212
|
+
1. `pip install setuptools wheel twine`
|
|
213
|
+
2. `python setup.py sdist bdist_wheel`
|
|
214
|
+
3. ls dist/
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
## Using GStreamer Python ML Elements
|
|
218
|
+
|
|
219
|
+
## Pipelines
|
|
220
|
+
|
|
221
|
+
Below are some sample pipelines for the various elements in this project.
|
|
222
|
+
|
|
223
|
+
### kafkasink
|
|
224
|
+
|
|
225
|
+
#### Setting up kafka network
|
|
226
|
+
|
|
227
|
+
`docker network create kafka-network`
|
|
228
|
+
|
|
229
|
+
and list networks
|
|
230
|
+
|
|
231
|
+
`docker network ls`
|
|
232
|
+
|
|
233
|
+
#### docker launch
|
|
234
|
+
|
|
235
|
+
To launch a docker instance with the kafka network, add ` --network kafka-network `
|
|
236
|
+
to the docker launch command above.
|
|
237
|
+
|
|
238
|
+
#### Set up kafka and zookeeper
|
|
239
|
+
|
|
240
|
+
Note: setup below assumes you are running your pipeline in a docker container.
|
|
241
|
+
If running pipeline from host, then the port changes from `9092` to `29092`,
|
|
242
|
+
and the broker changes from `kafka` to `localhost`.
|
|
243
|
+
|
|
244
|
+
```
|
|
245
|
+
docker stop kafka zookeeper
|
|
246
|
+
docker rm kafka zookeeper
|
|
247
|
+
docker run -d --name zookeeper --network kafka-network -e ZOOKEEPER_CLIENT_PORT=2181 confluentinc/cp-zookeeper:latest
|
|
248
|
+
docker run -d --name kafka --network kafka-network \
|
|
249
|
+
-e KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 \
|
|
250
|
+
-e KAFKA_ADVERTISED_LISTENERS=INSIDE://kafka:9092,OUTSIDE://localhost:29092 \
|
|
251
|
+
-e KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT \
|
|
252
|
+
-e KAFKA_LISTENERS=INSIDE://0.0.0.0:9092,OUTSIDE://0.0.0.0:29092 \
|
|
253
|
+
-e KAFKA_INTER_BROKER_LISTENER_NAME=INSIDE \
|
|
254
|
+
-e KAFKA_BROKER_ID=1 \
|
|
255
|
+
-e KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 \
|
|
256
|
+
-p 9092:9092 \
|
|
257
|
+
-p 29092:29092 \
|
|
258
|
+
confluentinc/cp-kafka:latest
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
#### Create test topic
|
|
262
|
+
```
|
|
263
|
+
docker exec kafka kafka-topics --create --topic test-kafkasink-topic --bootstrap-server kafka:9092 --partitions 1 --replication-factor 1
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
#### list topics
|
|
267
|
+
|
|
268
|
+
`docker exec -it kafka kafka-topics --list --bootstrap-server kafka:9092`
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
#### delete topic
|
|
272
|
+
|
|
273
|
+
`docker exec -it kafka kafka-topics --delete --topic test-topic --bootstrap-server kafka:9092`
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
#### consume topic
|
|
277
|
+
|
|
278
|
+
`docker exec -it kafka kafka-console-consumer --bootstrap-server kafka:9092 --topic test-kafkasink-topic --from-beginning`
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
### non ML
|
|
282
|
+
|
|
283
|
+
`GST_DEBUG=4 gst-launch-1.0 videotestsrc ! video/x-raw,width=1280,height=720 ! pyml_overlay meta-path=data/sample_metadata.json tracking=true ! videoconvert ! autovideosink`
|
|
284
|
+
|
|
285
|
+
Note: make sure to set the following in `.bashrc` file :
|
|
286
|
+
|
|
287
|
+
`export GST_PLUGIN_PATH=/home/$USER/src/gst-python-ml/plugins:$GST_PLUGIN_PATH`
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
### Bird's Eye View
|
|
291
|
+
|
|
292
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_single_camera.mp4 ! decodebin ! videoconvert ! pyml_birdseye ! videoconvert ! autovideosink`
|
|
293
|
+
|
|
294
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_single_camera.mp4 ! decodebin ! videorate ! video/x-raw,framerate=30/1 ! videoconvert ! pyml_birdseye ! videoconvert ! openh264enc ! h264parse ! matroskamux ! filesink location=output.mkv`
|
|
295
|
+
|
|
296
|
+
### Object Detection
|
|
297
|
+
|
|
298
|
+
Possible model names:
|
|
299
|
+
`fasterrcnn_resnet50_fpn`
|
|
300
|
+
`retinanet_resnet50_fpn`
|
|
301
|
+
|
|
302
|
+
#### fasterrcnn/kafka
|
|
303
|
+
|
|
304
|
+
`GST_DEBUG=4 gst-launch-1.0 multifilesrc location=data/000015.jpg ! jpegdec ! videoconvert ! videoscale ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! pyml_kafkasink schema-file=data/pyml_object_detector.json broker=kafka:9092 topic=test-kafkasink-topic 2>&1 | grep pyml_kafkasink`
|
|
305
|
+
|
|
306
|
+
#### maskrcnn
|
|
307
|
+
|
|
308
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! pyml_maskrcnn device=cuda batch-size=4 model-name=maskrcnn_resnet50_fpn ! videoconvert ! objectdetectionoverlay labels-color=0xFFFF0000 object-detection-outline-color=0xFFFF0000 ! autovideosink`
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
#### yolo with tracking
|
|
312
|
+
|
|
313
|
+
`gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! videoconvert ! pyml_overlay labels-color=0xFFFF0000 object-detection-outline-color=0xFFFF0000 ! autovideosink`
|
|
314
|
+
|
|
315
|
+
#### yolo with overlay
|
|
316
|
+
|
|
317
|
+
`gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_overlay ! videoconvert ! autovideosink`
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
### streammux pipeline
|
|
321
|
+
|
|
322
|
+
`GST_DEBUG=4 gst-launch-1.0 pyml_streammux name=mux ! videoconvert ! fakesink videotestsrc ! mux. videotestsrc pattern=ball ! mux. videotestsrc pattern=snow ! mux.`
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
### Transcription
|
|
326
|
+
|
|
327
|
+
#### transcription with initial prompt set
|
|
328
|
+
|
|
329
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko initial_prompt = "Air Traffic Control은, radar systems를, weather conditions에, flight paths를, communication은, unexpected weather conditions가, continuous training을, dedication과, professionalism" ! fakesink`
|
|
330
|
+
|
|
331
|
+
#### translation to English
|
|
332
|
+
|
|
333
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! fakesink`
|
|
334
|
+
|
|
335
|
+
#### coquitts
|
|
336
|
+
|
|
337
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_coquitts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav`
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
#### whisperspeechtts
|
|
341
|
+
|
|
342
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_whisperspeechtts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav`
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
#### mariantranslate
|
|
346
|
+
|
|
347
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_mariantranslate device=cuda src=en target=fr ! fakesink`
|
|
348
|
+
|
|
349
|
+
Supported src/target languages:
|
|
350
|
+
|
|
351
|
+
https://huggingface.co/models?sort=trending&search=Helsinki
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
#### whisperlive
|
|
355
|
+
|
|
356
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whisperlive device=cuda language=ko translate=yes llm-model-name="microsoft/phi-2" ! audioconvert ! wavenc ! filesink location=output_audio.wav`
|
|
357
|
+
|
|
358
|
+
### LLM
|
|
359
|
+
|
|
360
|
+
1. generate HuggingFace token
|
|
361
|
+
|
|
362
|
+
2. `huggingface-cli login`
|
|
363
|
+
and pass in token
|
|
364
|
+
|
|
365
|
+
3. LLM pipeline (in this case, we use phi-2)
|
|
366
|
+
|
|
367
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/prompt_for_llm.txt ! pyml_llm device=cuda model-name="microsoft/phi-2" ! fakesink`
|
|
368
|
+
|
|
369
|
+
### stablediffusion
|
|
370
|
+
|
|
371
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/prompt_for_stable_diffusion.txt ! pyml_stablediffusion device=cuda ! pngenc ! filesink location=output_image.png`
|
|
372
|
+
|
|
373
|
+
#### Caption
|
|
374
|
+
|
|
375
|
+
#### caption + yolo
|
|
376
|
+
|
|
377
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_caption device=cuda:0 ! textoverlay ! pyml_overlay ! videoconvert ! autovideosink`
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
#### caption
|
|
381
|
+
|
|
382
|
+
`GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! pyml_caption device=cuda:0 downsampled_width=320 downsampled_height=240 prompt="What is the name of the game being played?" ! textoverlay ! autovideosink`
|