gst-python-ml 0.1.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info → gst_python_ml-0.3.0}/PKG-INFO +88 -24
  2. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/README.md +84 -22
  3. gst_python_ml-0.1.0/plugins/python/gst_aggregator.py → gst_python_ml-0.3.0/plugins/python/aggregator_base.py +50 -68
  4. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/analytics_utils.py +31 -11
  5. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/caption.py +24 -16
  6. gst_python_ml-0.3.0/plugins/python/classifier.py +62 -0
  7. gst_python_ml-0.3.0/plugins/python/classifier_base.py +119 -0
  8. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/coquitts.py +9 -6
  9. gst_python_ml-0.1.0/plugins/python/engine/gst_device_queue_pool.py → gst_python_ml-0.3.0/plugins/python/engine/device_queue_pool.py +8 -12
  10. gst_python_ml-0.1.0/plugins/python/engine/gst_engine_factory.py → gst_python_ml-0.3.0/plugins/python/engine/engine_factory.py +26 -28
  11. gst_python_ml-0.1.0/plugins/python/engine/gst_tflite_engine.py → gst_python_ml-0.3.0/plugins/python/engine/litert_engine.py +9 -13
  12. gst_python_ml-0.1.0/plugins/python/engine/gst_ml_engine.py → gst_python_ml-0.3.0/plugins/python/engine/ml_engine.py +13 -8
  13. gst_python_ml-0.1.0/plugins/python/engine/gst_onnx_engine.py → gst_python_ml-0.3.0/plugins/python/engine/onnx_engine.py +8 -12
  14. gst_python_ml-0.1.0/plugins/python/engine/gst_openvino_engine.py → gst_python_ml-0.3.0/plugins/python/engine/openvino_engine.py +8 -10
  15. gst_python_ml-0.3.0/plugins/python/engine/pytorch_engine.py +321 -0
  16. gst_python_ml-0.3.0/plugins/python/engine/pytorch_yolo_engine.py +71 -0
  17. gst_python_ml-0.1.0/plugins/python/engine/gst_tensorflow_engine.py → gst_python_ml-0.3.0/plugins/python/engine/tensorflow_engine.py +10 -14
  18. gst_python_ml-0.3.0/plugins/python/format_converter.py +153 -0
  19. gst_python_ml-0.3.0/plugins/python/global_logger.py +46 -0
  20. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/gst_feature_manager.py +19 -0
  21. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0/plugins/python/gst_python_ml.egg-info}/PKG-INFO +88 -24
  22. gst_python_ml-0.3.0/plugins/python/gst_python_ml.egg-info/SOURCES.txt +68 -0
  23. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/gst_python_ml.egg-info/requires.txt +1 -0
  24. gst_python_ml-0.3.0/plugins/python/gst_python_ml.egg-info/top_level.txt +4 -0
  25. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/kafkasink.py +40 -29
  26. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/llm.py +6 -4
  27. gst_python_ml-0.1.0/plugins/python/gst_llm.py → gst_python_ml-0.3.0/plugins/python/llm_base.py +12 -11
  28. gst_python_ml-0.3.0/plugins/python/log/__init__.py +0 -0
  29. gst_python_ml-0.3.0/plugins/python/log/gst_logger.py +54 -0
  30. gst_python_ml-0.3.0/plugins/python/log/logger.py +59 -0
  31. gst_python_ml-0.3.0/plugins/python/log/logger_factory.py +60 -0
  32. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/mariantranslate.py +6 -4
  33. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/maskrcnn.py +40 -38
  34. gst_python_ml-0.3.0/plugins/python/metadata.py +186 -0
  35. gst_python_ml-0.3.0/plugins/python/ml_helper.py +17 -0
  36. gst_python_ml-0.3.0/plugins/python/model_engine_helper.py +78 -0
  37. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/objectdetector.py +10 -6
  38. gst_python_ml-0.3.0/plugins/python/objectdetector_base.py +235 -0
  39. gst_python_ml-0.3.0/plugins/python/overlay.py +452 -0
  40. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/overlay_counter.py +9 -4
  41. gst_python_ml-0.3.0/plugins/python/overlay_helper/__init__.py +0 -0
  42. gst_python_ml-0.3.0/plugins/python/overlay_helper/overlay_cairo.py +119 -0
  43. gst_python_ml-0.3.0/plugins/python/overlay_helper/overlay_opengl.py +280 -0
  44. {gst_python_ml-0.1.0/plugins/python → gst_python_ml-0.3.0/plugins/python/overlay_helper}/overlay_skia.py +14 -9
  45. gst_python_ml-0.3.0/plugins/python/overlay_helper/overlay_utils.py +64 -0
  46. gst_python_ml-0.1.0/plugins/python/overlay_utils.py → gst_python_ml-0.3.0/plugins/python/overlay_helper/overlay_utils_interface.py +15 -145
  47. gst_python_ml-0.3.0/plugins/python/overlay_helper/overlay_vulkan.py +384 -0
  48. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/stablediffusion.py +17 -13
  49. gst_python_ml-0.3.0/plugins/python/streamdemux.py +233 -0
  50. gst_python_ml-0.3.0/plugins/python/streammux.py +214 -0
  51. gst_python_ml-0.1.0/plugins/python/gst_transcribe.py → gst_python_ml-0.3.0/plugins/python/transcribe_base.py +14 -13
  52. gst_python_ml-0.1.0/plugins/python/gst_base_transform.py → gst_python_ml-0.3.0/plugins/python/transform_base.py +20 -62
  53. gst_python_ml-0.1.0/plugins/python/gst_translate.py → gst_python_ml-0.3.0/plugins/python/translate_base.py +15 -14
  54. gst_python_ml-0.1.0/plugins/python/gst_tts.py → gst_python_ml-0.3.0/plugins/python/tts_base.py +12 -13
  55. gst_python_ml-0.1.0/plugins/python/gst_video_transform.py → gst_python_ml-0.3.0/plugins/python/video_transform.py +5 -4
  56. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/whisperlive.py +26 -18
  57. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/whisperspeechtts.py +12 -8
  58. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/whispertranscribe.py +10 -8
  59. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/yolo.py +85 -67
  60. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/requirements.txt +3 -0
  61. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/setup.py +1 -1
  62. gst_python_ml-0.3.0/tests/test_pipelines.py +191 -0
  63. gst_python_ml-0.1.0/plugins/python/engine/gst_pytorch_engine.py +0 -376
  64. gst_python_ml-0.1.0/plugins/python/engine/gst_pytorch_yolo_engine.py +0 -74
  65. gst_python_ml-0.1.0/plugins/python/gst_object_detector.py +0 -308
  66. gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info/SOURCES.txt +0 -51
  67. gst_python_ml-0.1.0/plugins/python/gst_python_ml.egg-info/top_level.txt +0 -2
  68. gst_python_ml-0.1.0/plugins/python/overlay.py +0 -197
  69. gst_python_ml-0.1.0/plugins/python/streamdemux.py +0 -128
  70. gst_python_ml-0.1.0/plugins/python/streammux.py +0 -143
  71. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/COPYING +0 -0
  72. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/MANIFEST.in +0 -0
  73. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/__init__.py +0 -0
  74. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/engine/__init__.py +0 -0
  75. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/gst_python_ml.egg-info/dependency_links.txt +0 -0
  76. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/plugins/python/utils.py +0 -0
  77. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/pyproject.toml +0 -0
  78. {gst_python_ml-0.1.0 → gst_python_ml-0.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: gst-python-ml
3
- Version: 0.1.0
3
+ Version: 0.3.0
4
4
  Summary: An ML package for GStreamer
5
5
  Home-page: https://github.com/collabora/gst-python-ml
6
6
  Author: Aaron Boxer
@@ -22,6 +22,7 @@ Requires-Dist: huggingface-hub
22
22
  Requires-Dist: lap
23
23
  Requires-Dist: ultralytics
24
24
  Requires-Dist: pycairo
25
+ Requires-Dist: pytest>=7.0
25
26
  Provides-Extra: kafka
26
27
  Requires-Dist: confluent-kafka; extra == "kafka"
27
28
  Provides-Extra: captioning
@@ -47,6 +48,7 @@ Dynamic: description
47
48
  Dynamic: description-content-type
48
49
  Dynamic: home-page
49
50
  Dynamic: license
51
+ Dynamic: license-file
50
52
  Dynamic: provides-extra
51
53
  Dynamic: requires-dist
52
54
  Dynamic: requires-python
@@ -209,9 +211,24 @@ Run `gst-inspect-1.0 python` to see all of the pyml elements listed.
209
211
 
210
212
  # Building PyPI Package
211
213
 
212
- 1. `pip install setuptools wheel twine`
213
- 2. `python setup.py sdist bdist_wheel`
214
- 3. ls dist/
214
+ ## Setup
215
+ 1. Generate token on PyPI and add to `.pypirc` :
216
+
217
+ ```
218
+ [pypi]
219
+ username = __token__
220
+ password = FOOBAR
221
+ ```
222
+
223
+ 2. `pip install setuptools wheel twine`
224
+
225
+ ## Build
226
+
227
+ `python -m build`
228
+
229
+ ## Upload
230
+
231
+ `twine upload dist/*`
215
232
 
216
233
 
217
234
  ## Using GStreamer Python ML Elements
@@ -293,58 +310,105 @@ Note: make sure to set the following in `.bashrc` file :
293
310
 
294
311
  `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_single_camera.mp4 ! decodebin ! videorate ! video/x-raw,framerate=30/1 ! videoconvert ! pyml_birdseye ! videoconvert ! openh264enc ! h264parse ! matroskamux ! filesink location=output.mkv`
295
312
 
313
+ ### Classification
314
+
315
+ ```
316
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_classifier model-name=resnet18 device=cuda ! videoconvert ! autovideosink
317
+ ```
318
+
319
+
296
320
  ### Object Detection
297
321
 
298
- Possible model names:
299
- `fasterrcnn_resnet50_fpn`
300
- `retinanet_resnet50_fpn`
322
+ #### TorchVision
301
323
 
302
- #### fasterrcnn/kafka
324
+ `pyml_objectdetector` supports all TorchVision object detection models.
325
+ Simply choose a suitable model name and set it on the `model-name` property.
326
+ A few possible model names:
303
327
 
304
- `GST_DEBUG=4 gst-launch-1.0 multifilesrc location=data/000015.jpg ! jpegdec ! videoconvert ! videoscale ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! pyml_kafkasink schema-file=data/pyml_object_detector.json broker=kafka:9092 topic=test-kafkasink-topic 2>&1 | grep pyml_kafkasink`
328
+ ```
329
+ fasterrcnn_resnet50_fpn
330
+ ssdlite320_mobilenet_v3_large
331
+ ```
305
332
 
306
- #### maskrcnn
333
+ ##### fasterrcnn
334
+
335
+ `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink`
336
+
337
+ ##### fasterrcnn/kafka
338
+
339
+ a) run pipeline from host
340
+
341
+ ```
342
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! pyml_kafkasink schema-file=data/pyml_object_detector.json broker=localhost:29092 topic=test-kafkasink-topic
343
+ ```
344
+
345
+ b) run pipeline from docker
346
+
347
+ ```
348
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! pyml_kafkasink schema-file=data/pyml_object_detector.json broker=kafka:9092 topic=test-kafkasink-topic
349
+ ```
307
350
 
308
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! pyml_maskrcnn device=cuda batch-size=4 model-name=maskrcnn_resnet50_fpn ! videoconvert ! objectdetectionoverlay labels-color=0xFFFF0000 object-detection-outline-color=0xFFFF0000 ! autovideosink`
309
351
 
352
+ #### maskrcnn
353
+
354
+ ```
355
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! pyml_maskrcnn device=cuda batch-size=4 model-name=maskrcnn_resnet50_fpn ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink
356
+ ```
310
357
 
311
358
  #### yolo with tracking
312
359
 
313
- `gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! videoconvert ! pyml_overlay labels-color=0xFFFF0000 object-detection-outline-color=0xFFFF0000 ! autovideosink`
360
+ ```
361
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvertscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_overlay ! videoconvert ! autovideosink
362
+ ```
314
363
 
315
- #### yolo with overlay
364
+ ```
365
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvertscale ! video/x-raw,width=640,height=480,format=RGB ! pyml_streammux name=mux filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvertscale ! video/x-raw,width=640,height=480,format=RGB ! mux. mux. ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_streamdemux name=demux demux. ! queue ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink sync=false demux. ! queue ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink sync=false
316
366
 
317
- `gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_overlay ! videoconvert ! autovideosink`
367
+ ```
318
368
 
369
+ #### yolo with overlay
319
370
 
320
- ### streammux pipeline
371
+ ```
372
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_overlay ! videoconvert ! autovideosink
373
+ ```
321
374
 
322
- `GST_DEBUG=4 gst-launch-1.0 pyml_streammux name=mux ! videoconvert ! fakesink videotestsrc ! mux. videotestsrc pattern=ball ! mux. videotestsrc pattern=snow ! mux.`
375
+ ### streammux/streamdemux pipeline
323
376
 
377
+ ```
378
+ GST_DEBUG=4 gst-launch-1.0 videotestsrc pattern=ball ! video/x-raw, width=320, height=240 ! queue ! pyml_streammux name=mux videotestsrc pattern=smpte ! video/x-raw, width=320, height=240 ! queue ! mux.sink_1 videotestsrc pattern=smpte ! video/x-raw, width=320, height=240 ! queue ! mux.sink_2 mux.src ! queue ! pyml_streamdemux name=demux demux.src_0 ! queue ! glimagesink demux.src_1 ! queue ! glimagesink demux.src_2 ! queue ! glimagesink
379
+ ```
324
380
 
325
381
  ### Transcription
326
382
 
327
383
  #### transcription with initial prompt set
328
384
 
329
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko initial_prompt = "Air Traffic Control은, radar systems를, weather conditions에, flight paths를, communication은, unexpected weather conditions가, continuous training을, dedication과, professionalism" ! fakesink`
385
+ ```
386
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko initial_prompt = "Air Traffic Control은, radar systems를, weather conditions에, flight paths를, communication은, unexpected weather conditions가, continuous training을, dedication과, professionalism" ! fakesink
387
+ ```
330
388
 
331
389
  #### translation to English
332
390
 
333
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! fakesink`
391
+ ```
392
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! fakesink
393
+ ```
334
394
 
335
395
  #### coquitts
336
396
 
337
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_coquitts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav`
338
-
397
+ ```
398
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_coquitts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav
399
+ ```
339
400
 
340
401
  #### whisperspeechtts
341
402
 
342
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_whisperspeechtts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav`
343
-
403
+ ```
404
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_whisperspeechtts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav
405
+ ```
344
406
 
345
407
  #### mariantranslate
346
408
 
347
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_mariantranslate device=cuda src=en target=fr ! fakesink`
409
+ ```
410
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_mariantranslate device=cuda src=en target=fr ! fakesink
411
+ ```
348
412
 
349
413
  Supported src/target languages:
350
414
 
@@ -155,9 +155,24 @@ Run `gst-inspect-1.0 python` to see all of the pyml elements listed.
155
155
 
156
156
  # Building PyPI Package
157
157
 
158
- 1. `pip install setuptools wheel twine`
159
- 2. `python setup.py sdist bdist_wheel`
160
- 3. ls dist/
158
+ ## Setup
159
+ 1. Generate token on PyPI and add to `.pypirc` :
160
+
161
+ ```
162
+ [pypi]
163
+ username = __token__
164
+ password = FOOBAR
165
+ ```
166
+
167
+ 2. `pip install setuptools wheel twine`
168
+
169
+ ## Build
170
+
171
+ `python -m build`
172
+
173
+ ## Upload
174
+
175
+ `twine upload dist/*`
161
176
 
162
177
 
163
178
  ## Using GStreamer Python ML Elements
@@ -239,58 +254,105 @@ Note: make sure to set the following in `.bashrc` file :
239
254
 
240
255
  `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_single_camera.mp4 ! decodebin ! videorate ! video/x-raw,framerate=30/1 ! videoconvert ! pyml_birdseye ! videoconvert ! openh264enc ! h264parse ! matroskamux ! filesink location=output.mkv`
241
256
 
257
+ ### Classification
258
+
259
+ ```
260
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_classifier model-name=resnet18 device=cuda ! videoconvert ! autovideosink
261
+ ```
262
+
263
+
242
264
  ### Object Detection
243
265
 
244
- Possible model names:
245
- `fasterrcnn_resnet50_fpn`
246
- `retinanet_resnet50_fpn`
266
+ #### TorchVision
247
267
 
248
- #### fasterrcnn/kafka
268
+ `pyml_objectdetector` supports all TorchVision object detection models.
269
+ Simply choose a suitable model name and set it on the `model-name` property.
270
+ A few possible model names:
249
271
 
250
- `GST_DEBUG=4 gst-launch-1.0 multifilesrc location=data/000015.jpg ! jpegdec ! videoconvert ! videoscale ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! pyml_kafkasink schema-file=data/pyml_object_detector.json broker=kafka:9092 topic=test-kafkasink-topic 2>&1 | grep pyml_kafkasink`
272
+ ```
273
+ fasterrcnn_resnet50_fpn
274
+ ssdlite320_mobilenet_v3_large
275
+ ```
251
276
 
252
- #### maskrcnn
277
+ ##### fasterrcnn
278
+
279
+ `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink`
280
+
281
+ ##### fasterrcnn/kafka
282
+
283
+ a) run pipeline from host
284
+
285
+ ```
286
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! pyml_kafkasink schema-file=data/pyml_object_detector.json broker=localhost:29092 topic=test-kafkasink-topic
287
+ ```
288
+
289
+ b) run pipeline from docker
290
+
291
+ ```
292
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_objectdetector model-name=fasterrcnn_resnet50_fpn device=cuda batch-size=4 ! pyml_kafkasink schema-file=data/pyml_object_detector.json broker=kafka:9092 topic=test-kafkasink-topic
293
+ ```
253
294
 
254
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! pyml_maskrcnn device=cuda batch-size=4 model-name=maskrcnn_resnet50_fpn ! videoconvert ! objectdetectionoverlay labels-color=0xFFFF0000 object-detection-outline-color=0xFFFF0000 ! autovideosink`
255
295
 
296
+ #### maskrcnn
297
+
298
+ ```
299
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/people.mp4 ! decodebin ! videoconvert ! videoscale ! pyml_maskrcnn device=cuda batch-size=4 model-name=maskrcnn_resnet50_fpn ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink
300
+ ```
256
301
 
257
302
  #### yolo with tracking
258
303
 
259
- `gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! videoconvert ! pyml_overlay labels-color=0xFFFF0000 object-detection-outline-color=0xFFFF0000 ! autovideosink`
304
+ ```
305
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvertscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_overlay ! videoconvert ! autovideosink
306
+ ```
260
307
 
261
- #### yolo with overlay
308
+ ```
309
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvertscale ! video/x-raw,width=640,height=480,format=RGB ! pyml_streammux name=mux filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvertscale ! video/x-raw,width=640,height=480,format=RGB ! mux. mux. ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_streamdemux name=demux demux. ! queue ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink sync=false demux. ! queue ! videoconvert ! pyml_overlay ! videoconvert ! autovideosink sync=false
262
310
 
263
- `gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_overlay ! videoconvert ! autovideosink`
311
+ ```
264
312
 
313
+ #### yolo with overlay
265
314
 
266
- ### streammux pipeline
315
+ ```
316
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/soccer_tracking.mp4 ! decodebin ! videoconvert ! videoscale ! video/x-raw,width=640,height=480 ! pyml_yolo model-name=yolo11m device=cuda:0 track=True ! pyml_overlay ! videoconvert ! autovideosink
317
+ ```
267
318
 
268
- `GST_DEBUG=4 gst-launch-1.0 pyml_streammux name=mux ! videoconvert ! fakesink videotestsrc ! mux. videotestsrc pattern=ball ! mux. videotestsrc pattern=snow ! mux.`
319
+ ### streammux/streamdemux pipeline
269
320
 
321
+ ```
322
+ GST_DEBUG=4 gst-launch-1.0 videotestsrc pattern=ball ! video/x-raw, width=320, height=240 ! queue ! pyml_streammux name=mux videotestsrc pattern=smpte ! video/x-raw, width=320, height=240 ! queue ! mux.sink_1 videotestsrc pattern=smpte ! video/x-raw, width=320, height=240 ! queue ! mux.sink_2 mux.src ! queue ! pyml_streamdemux name=demux demux.src_0 ! queue ! glimagesink demux.src_1 ! queue ! glimagesink demux.src_2 ! queue ! glimagesink
323
+ ```
270
324
 
271
325
  ### Transcription
272
326
 
273
327
  #### transcription with initial prompt set
274
328
 
275
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko initial_prompt = "Air Traffic Control은, radar systems를, weather conditions에, flight paths를, communication은, unexpected weather conditions가, continuous training을, dedication과, professionalism" ! fakesink`
329
+ ```
330
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko initial_prompt = "Air Traffic Control은, radar systems를, weather conditions에, flight paths를, communication은, unexpected weather conditions가, continuous training을, dedication과, professionalism" ! fakesink
331
+ ```
276
332
 
277
333
  #### translation to English
278
334
 
279
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! fakesink`
335
+ ```
336
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! fakesink
337
+ ```
280
338
 
281
339
  #### coquitts
282
340
 
283
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_coquitts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav`
284
-
341
+ ```
342
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_coquitts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav
343
+ ```
285
344
 
286
345
  #### whisperspeechtts
287
346
 
288
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_whisperspeechtts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav`
289
-
347
+ ```
348
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_whisperspeechtts device=cuda ! audioconvert ! wavenc ! filesink location=output_audio.wav
349
+ ```
290
350
 
291
351
  #### mariantranslate
292
352
 
293
- `GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_mariantranslate device=cuda src=en target=fr ! fakesink`
353
+ ```
354
+ GST_DEBUG=4 gst-launch-1.0 filesrc location=data/air_traffic_korean_with_english.wav ! decodebin ! audioconvert ! pyml_whispertranscribe device=cuda language=ko translate=yes ! pyml_mariantranslate device=cuda src=en target=fr ! fakesink
355
+ ```
294
356
 
295
357
  Supported src/target languages:
296
358
 
@@ -1,4 +1,4 @@
1
- # GstAggregator
1
+ # AggregatorBase
2
2
  # Copyright (C) 2024-2025 Collabora Ltd.
3
3
  #
4
4
  # This library is free software; you can redistribute it and/or
@@ -18,15 +18,18 @@
18
18
 
19
19
  from abc import abstractmethod
20
20
  import gi
21
- from engine.gst_engine_factory import GstEngineFactory
22
21
 
23
22
  gi.require_version("Gst", "1.0")
24
23
  gi.require_version("GstBase", "1.0")
25
24
  gi.require_version("GLib", "2.0")
26
25
  from gi.repository import Gst, GObject, GstBase # noqa: E402
27
26
 
27
+ from engine.engine_factory import EngineFactory
28
+ from log.logger_factory import LoggerFactory
29
+ from model_engine_helper import ModelEngineHelper
28
30
 
29
- class GstAggregator(GstBase.Aggregator):
31
+
32
+ class AggregatorBase(GstBase.Aggregator):
30
33
  """
31
34
  Base class for GStreamer aggregator elements that perform inference
32
35
  with a machine learning model. This class manages shared properties
@@ -34,7 +37,7 @@ class GstAggregator(GstBase.Aggregator):
34
37
  """
35
38
 
36
39
  __gstmetadata__ = (
37
- "GstAggregator",
40
+ "AggregatorBase",
38
41
  "Aggregator",
39
42
  "Generic machine learning model aggregator element",
40
43
  "Aaron Boxer <aaron.boxer@collabora.com>",
@@ -74,7 +77,7 @@ class GstAggregator(GstBase.Aggregator):
74
77
  blurb="Name of the pre-trained model or local model path",
75
78
  flags=GObject.ParamFlags.READWRITE,
76
79
  )
77
- ml_engine = GObject.Property(
80
+ engine_name = GObject.Property(
78
81
  type=str,
79
82
  default=None,
80
83
  nick="ML Engine",
@@ -84,9 +87,9 @@ class GstAggregator(GstBase.Aggregator):
84
87
 
85
88
  device_queue_id = GObject.Property(
86
89
  type=int,
87
- default=0, # Default to queue ID 0
90
+ default=0,
88
91
  minimum=0,
89
- maximum=32, # You can adjust the maximum depending on the size of your pool
92
+ maximum=32,
90
93
  nick="Device Queue ID",
91
94
  blurb="ID of the DeviceQueue from the pool to use",
92
95
  flags=GObject.ParamFlags.READWRITE,
@@ -94,8 +97,9 @@ class GstAggregator(GstBase.Aggregator):
94
97
 
95
98
  def __init__(self):
96
99
  super().__init__()
97
- self.ml_engine = GstEngineFactory.PYTORCH_ENGINE
98
- self.engine = None
100
+ self.logger = LoggerFactory.get(LoggerFactory.LOGGER_TYPE_GST)
101
+ self.engine_helper = ModelEngineHelper(self.logger)
102
+ self.engine_name = self.engine_helper.engine_name
99
103
  self.kwargs = {}
100
104
  self.segment_pushed = False
101
105
 
@@ -107,12 +111,9 @@ class GstAggregator(GstBase.Aggregator):
107
111
  elif prop.name == "model-name":
108
112
  return self.model_name
109
113
  elif prop.name == "device":
110
- if self.engine:
111
- return self.engine.get_device()
112
- else:
113
- return None
114
- elif prop.name == "ml-engine":
115
- return self.ml_engine
114
+ return self.device # Return from AggregatorBase, not from helper
115
+ elif prop.name == "engine-name":
116
+ return self.engine_name
116
117
  elif prop.name == "device-queue-id":
117
118
  return self.device_queue_id
118
119
  else:
@@ -121,86 +122,72 @@ class GstAggregator(GstBase.Aggregator):
121
122
  def do_set_property(self, prop: GObject.ParamSpec, value):
122
123
  if prop.name == "batch-size":
123
124
  self.batch_size = value
124
- if self.engine:
125
- self.engine.batch_size = value
125
+ if self.engine_helper.engine:
126
+ self.engine_helper.engine.batch_size = value
126
127
  elif prop.name == "frame-stride":
127
128
  self.frame_stride = value
128
- if self.engine:
129
- self.engine.frame_stride = value
129
+ if self.engine_helper.engine:
130
+ self.engine_helper.engine.frame_stride = value
130
131
  elif prop.name == "model-name":
131
132
  self.model_name = value
132
- self.do_load_model()
133
+ self.engine_helper.load_model(value)
133
134
  elif prop.name == "device":
134
135
  self.device = value
135
- # Only set the device if the engine is initialized
136
- if self.engine:
137
- self.engine.set_device(value)
138
- self.do_load_model()
139
- elif prop.name == "ml-engine":
140
- if self.device:
141
- self.ml_engine = GstEngineFactory.create_engine(value, self.device)
142
- self.initialize_engine()
143
- self.do_load_model()
136
+ self.engine_helper.set_device(value) # Update device in helper
137
+ self.engine_helper.initialize_engine(self.engine_name)
138
+ self.engine_helper.load_model(self.model_name)
139
+ elif prop.name == "engine-name":
140
+ self.engine_name = value
141
+ self.engine_helper.initialize_engine(value)
142
+ self.engine_helper.load_model(self.model_name)
144
143
  elif prop.name == "device-queue-id":
145
144
  self.device_queue_id = value
146
- if self.engine:
147
- self.engine.device_queue_id = value
145
+ if self.engine_helper.engine:
146
+ self.engine_helper.engine.device_queue_id = value
148
147
  else:
149
148
  raise AttributeError(f"Unknown property {prop.name}")
150
149
 
151
150
  def _initialize_engine_if_needed(self):
152
- """Initialize the engine if it hasn't been initialized yet."""
153
- if not self.engine and self.ml_engine:
154
- self.initialize_engine()
151
+ if not self.engine_helper.engine and self.engine_name:
152
+ self.engine_helper.initialize_engine(self.engine_name)
155
153
 
156
154
  def initialize_engine(self):
157
- """Initialize the machine learning engine based on the ml_engine property."""
158
- if self.ml_engine is not None:
159
- self.engine = GstEngineFactory.create_engine(self.ml_engine, self.device)
160
- self.engine.batch_size = self.batch_size
161
- self.engine.frame_stride = self.frame_stride
155
+ if self.engine_name is not None:
156
+ self.engine_helper.initialize_engine(self.engine_name)
157
+ self.engine_helper.engine.batch_size = self.batch_size
158
+ self.engine_helper.engine.frame_stride = self.frame_stride
162
159
  if self.device_queue_id:
163
- self.engine.device_queue_id = self.device_queue_id
160
+ self.engine_helper.engine.device_queue_id = self.device_queue_id
164
161
  else:
165
- Gst.error(f"Unsupported ML engine: {self.ml_engine}")
166
- return
162
+ self.logger.error(f"Unsupported ML engine: {self.engine_name}")
167
163
 
168
164
  def do_load_model(self):
169
- """Loads the model using the current engine."""
170
- if self.engine and self.model_name:
171
- self.engine.load_model(self.model_name, **self.kwargs)
165
+ if self.engine_helper.engine and self.model_name:
166
+ self.engine_helper.load_model(self.model_name)
172
167
  else:
173
- Gst.warning("Engine is not present, unable to load the model.")
168
+ self.logger.warning("Engine is not present, unable to load the model.")
174
169
 
175
170
  def get_model(self):
176
- """Gets the model from the engine."""
177
171
  self._initialize_engine_if_needed()
178
- """Gets the model from the engine."""
179
- if self.engine:
180
- return self.engine.get_model()
172
+ if self.engine_helper.engine:
173
+ return self.engine_helper.get_model()
181
174
  else:
182
- Gst.warning("Engine is not present, unable to get the model.")
175
+ self.logger.warning("Engine is not present, unable to get the model.")
183
176
  return None
184
177
 
185
178
  def set_model(self, model):
186
- """Gets the model from the engine."""
187
179
  self._initialize_engine_if_needed()
188
- """Sets the model in the engine."""
189
- if self.engine:
190
- self.engine.set_model(model)
180
+ if self.engine_helper.engine:
181
+ self.engine_helper.set_model(model)
191
182
  else:
192
- Gst.warning("Engine is not present, unable to set the model.")
183
+ self.logger.warning("Engine is not present, unable to set the model.")
193
184
 
194
185
  def get_tokenizer(self):
195
- """Gets the model from the engine."""
196
186
  self._initialize_engine_if_needed()
197
- if self.get_model() is None:
198
- self.do_load_model()
199
- """Gets the model from the engine."""
200
- if self.engine:
201
- return self.engine.tokenizer
187
+ if self.engine_helper.engine:
188
+ return self.engine_helper.get_tokenizer()
202
189
  else:
203
- Gst.warning("Engine is not present, unable to get the tokenizer.")
190
+ self.logger.warning("Engine is not present, unable to get the tokenizer.")
204
191
  return None
205
192
 
206
193
  def push_segment_if_needed(self):
@@ -215,10 +202,6 @@ class GstAggregator(GstBase.Aggregator):
215
202
  self.segment_pushed = True
216
203
 
217
204
  def do_aggregate(self, timeout):
218
- """
219
- Aggregates the buffers from the sink pads,
220
- processes with the model, and pushes the result downstream.
221
- """
222
205
  self.push_segment_if_needed()
223
206
  self.process_all_sink_pads()
224
207
  return Gst.FlowReturn.OK
@@ -232,5 +215,4 @@ class GstAggregator(GstBase.Aggregator):
232
215
 
233
216
  @abstractmethod
234
217
  def do_process(self, buf):
235
- """Process a buffer using the loaded model."""
236
218
  pass
@@ -25,27 +25,36 @@ try:
25
25
  gi.require_version("GLib", "2.0")
26
26
  gi.require_version("GstAnalytics", "1.0")
27
27
  from gi.repository import Gst, GstAnalytics, GLib # noqa: E402
28
+
29
+ from log.logger_factory import LoggerFactory
28
30
  except ImportError:
29
31
  ANALYTICS_UTILS_AVAILABLE = False
30
32
 
31
33
 
32
34
  class AnalyticsUtils:
35
+ def __init__(self):
36
+ super().__init__()
37
+ self.logger = LoggerFactory.get(LoggerFactory.LOGGER_TYPE_GST)
38
+
33
39
  def extract_analytics_metadata(self, buffer):
34
40
  metadata = []
35
41
  meta = GstAnalytics.buffer_get_analytics_relation_meta(buffer)
36
42
  if not meta:
43
+ self.logger.info("No analytics relation metadata found on buffer")
37
44
  return metadata
38
45
 
39
46
  try:
40
47
  count = GstAnalytics.relation_get_length(meta)
48
+ self.logger.info(f"Found {count} analytics relations in metadata")
41
49
  for index in range(count):
42
50
  ret, od_mtd = meta.get_od_mtd(index)
43
51
  if not ret or od_mtd is None:
52
+ # self.logger.warning(f"Failed to get od_mtd at index {index}")
44
53
  continue
45
-
46
54
  label_quark = od_mtd.get_obj_type()
47
- label = GLib.quark_to_string(label_quark)
48
- track_id = self.extract_id_from_label(label)
55
+ full_label = GLib.quark_to_string(label_quark)
56
+ self.logger.debug(f"Index {index}: quark={full_label}")
57
+ track_id, label = self.extract_id_from_label(full_label)
49
58
  location = od_mtd.get_location()
50
59
  presence, x, y, w, h, loc_conf_lvl = location
51
60
  if presence:
@@ -57,16 +66,27 @@ class AnalyticsUtils:
57
66
  "box": {"x1": x, "y1": y, "x2": x + w, "y2": y + h},
58
67
  }
59
68
  )
69
+ self.logger.debug(f"Added metadata entry: {metadata[-1]}")
60
70
  except Exception as e:
61
- Gst.error(f"Error while extracting analytics metadata: {e}")
71
+ self.logger.error(f"Error while extracting analytics metadata: {e}")
62
72
  return metadata
63
73
 
64
- def extract_id_from_label(self, label):
65
- """Extracts the numeric ID from a label formatted as 'id_<number>'."""
66
- match = re.match(r"id_(\d+)", label)
74
+ def extract_id_from_label(self, full_label):
75
+ match = re.match(r"stream_\d+_id_(\d+)", full_label)
67
76
  if match:
68
77
  track_id = int(match.group(1))
69
- return track_id
70
- else:
71
- print("No ID found in label") # Optional debug message for unmatched format
72
- return None # Return None if the ID format is not found
78
+ label = f"id_{track_id}"
79
+ self.logger.debug(
80
+ f"Extracted track_id {track_id} and label '{label}' from '{full_label}'"
81
+ )
82
+ return track_id, label
83
+ match = re.match(
84
+ r"stream_\d+_(.+)", full_label
85
+ ) # Match class name after stream_<idx>_
86
+ if match:
87
+ class_name = match.group(1)
88
+ label = class_name # Use class name directly
89
+ self.logger.debug(f"Extracted class label '{label}' from '{full_label}'")
90
+ return None, label
91
+ self.logger.info(f"No recognizable format in label '{full_label}', using as-is")
92
+ return None, full_label