ai-vision-tool 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_vision_tool-0.4.2/PKG-INFO +2470 -0
- ai_vision_tool-0.4.2/README.md +2404 -0
- ai_vision_tool-0.4.2/ai_vision_tool/__init__.py +264 -0
- ai_vision_tool-0.4.2/ai_vision_tool/__main__.py +5 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/__init__.py +100 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/blur.py +42 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/blur_artifact.py +527 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/brightness.py +39 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/camera_gain.py +44 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/common.py +152 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/composite.py +320 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/crop.py +63 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/cutout.py +62 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/exposure.py +44 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/flip.py +52 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/geometric_random.py +635 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/grayscale.py +41 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/hue.py +41 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/mosaic.py +70 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/motion_blur.py +54 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/noise.py +89 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/noise_dropout.py +336 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/rotate90.py +38 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/rotation.py +84 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/saturation.py +42 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/shear.py +67 -0
- ai_vision_tool-0.4.2/ai_vision_tool/augmentation/weather_light.py +476 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/burst_image_capture.py +68 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/frame_grabber.py +75 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/image_capture.py +73 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/image_template.py +65 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/motion_detector.py +63 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/roi_capture.py +67 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/screen_capture.py +30 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/time_lapse.py +64 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/time_lapse_capture.py +3 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/video_capture.py +90 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/video_recorder.py +130 -0
- ai_vision_tool-0.4.2/ai_vision_tool/capture/video_template.py +206 -0
- ai_vision_tool-0.4.2/ai_vision_tool/cli/__init__.py +0 -0
- ai_vision_tool-0.4.2/ai_vision_tool/cli/main.py +1300 -0
- ai_vision_tool-0.4.2/ai_vision_tool/config/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/config/env_config.py +71 -0
- ai_vision_tool-0.4.2/ai_vision_tool/config/json_config.py +87 -0
- ai_vision_tool-0.4.2/ai_vision_tool/config/profile_loader.py +79 -0
- ai_vision_tool-0.4.2/ai_vision_tool/config/registry.py +65 -0
- ai_vision_tool-0.4.2/ai_vision_tool/config/yaml_config.py +75 -0
- ai_vision_tool-0.4.2/ai_vision_tool/core/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/core/base.py +111 -0
- ai_vision_tool-0.4.2/ai_vision_tool/core/batch_processor.py +65 -0
- ai_vision_tool-0.4.2/ai_vision_tool/core/data_types.py +110 -0
- ai_vision_tool-0.4.2/ai_vision_tool/core/device.py +73 -0
- ai_vision_tool-0.4.2/ai_vision_tool/core/memory_manager.py +76 -0
- ai_vision_tool-0.4.2/ai_vision_tool/core/scheduler.py +69 -0
- ai_vision_tool-0.4.2/ai_vision_tool/detection/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/detection/anomaly_detector.py +127 -0
- ai_vision_tool-0.4.2/ai_vision_tool/detection/face_detector.py +76 -0
- ai_vision_tool-0.4.2/ai_vision_tool/detection/keypoint_detector.py +90 -0
- ai_vision_tool-0.4.2/ai_vision_tool/detection/object_detector.py +133 -0
- ai_vision_tool-0.4.2/ai_vision_tool/detection/text_detector.py +90 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/denoiser.py +80 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/frame_enhancer.py +58 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/low_light.py +119 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/__init__.py +0 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/colorization.py +96 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/deblurring.py +100 -0
- ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/super_resolution.py +77 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/__init__.py +0 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/cloud/__init__.py +0 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/cloud/gcs_source.py +72 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/cloud/s3_source.py +72 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/__init__.py +0 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/auto_labeller.py +42 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/darknet_auto_labeler.py +265 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/tensorflow_auto_labeler.py +172 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/streaming/__init__.py +0 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/streaming/kafka_io.py +157 -0
- ai_vision_tool-0.4.2/ai_vision_tool/integrations/streaming/websocket_sink.py +167 -0
- ai_vision_tool-0.4.2/ai_vision_tool/io/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/io/camera_source.py +66 -0
- ai_vision_tool-0.4.2/ai_vision_tool/io/dataset_collector.py +79 -0
- ai_vision_tool-0.4.2/ai_vision_tool/io/dataset_exporter.py +125 -0
- ai_vision_tool-0.4.2/ai_vision_tool/io/image_exporter.py +78 -0
- ai_vision_tool-0.4.2/ai_vision_tool/io/image_io.py +99 -0
- ai_vision_tool-0.4.2/ai_vision_tool/io/video_io.py +129 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/__init__.py +7 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/backends/__init__.py +0 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/backends/onnx_model.py +81 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/backends/tflite_model.py +74 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/backends/torch_model.py +87 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/benchmark.py +83 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/downloader.py +73 -0
- ai_vision_tool-0.4.2/ai_vision_tool/models/registry.py +74 -0
- ai_vision_tool-0.4.2/ai_vision_tool/pipelines/__init__.py +5 -0
- ai_vision_tool-0.4.2/ai_vision_tool/pipelines/async_pipeline.py +54 -0
- ai_vision_tool-0.4.2/ai_vision_tool/pipelines/parallel_pipeline.py +110 -0
- ai_vision_tool-0.4.2/ai_vision_tool/pipelines/prebuilt.py +90 -0
- ai_vision_tool-0.4.2/ai_vision_tool/pipelines/serializer.py +91 -0
- ai_vision_tool-0.4.2/ai_vision_tool/pipelines/vision_pipeline.py +51 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/__init__.py +93 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/auto_adjust_contrast.py +158 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/auto_orient.py +125 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/classical_segmentation.py +87 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/frame_resizer.py +62 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/geometry.py +677 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/intensity.py +851 -0
- ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/quality.py +369 -0
- ai_vision_tool-0.4.2/ai_vision_tool/segmentation/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/segmentation/instance_segmenter.py +98 -0
- ai_vision_tool-0.4.2/ai_vision_tool/segmentation/mask_post_processor.py +95 -0
- ai_vision_tool-0.4.2/ai_vision_tool/segmentation/panoptic_segmenter.py +94 -0
- ai_vision_tool-0.4.2/ai_vision_tool/segmentation/sam_segmenter.py +120 -0
- ai_vision_tool-0.4.2/ai_vision_tool/segmentation/semantic_segmenter.py +116 -0
- ai_vision_tool-0.4.2/ai_vision_tool/streaming/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/streaming/buffered_stream.py +99 -0
- ai_vision_tool-0.4.2/ai_vision_tool/streaming/frame_stream.py +110 -0
- ai_vision_tool-0.4.2/ai_vision_tool/streaming/rtsp_client.py +128 -0
- ai_vision_tool-0.4.2/ai_vision_tool/tracking/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/tracking/byte_tracker.py +144 -0
- ai_vision_tool-0.4.2/ai_vision_tool/tracking/deepsort_tracker.py +87 -0
- ai_vision_tool-0.4.2/ai_vision_tool/tracking/kalman_filter.py +81 -0
- ai_vision_tool-0.4.2/ai_vision_tool/tracking/reid_extractor.py +111 -0
- ai_vision_tool-0.4.2/ai_vision_tool/tracking/track_manager.py +131 -0
- ai_vision_tool-0.4.2/ai_vision_tool/utils/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/utils/color_palette.py +41 -0
- ai_vision_tool-0.4.2/ai_vision_tool/utils/draw_utils.py +86 -0
- ai_vision_tool-0.4.2/ai_vision_tool/utils/frame_sampler.py +56 -0
- ai_vision_tool-0.4.2/ai_vision_tool/utils/image_hash.py +74 -0
- ai_vision_tool-0.4.2/ai_vision_tool/utils/image_utils.py +147 -0
- ai_vision_tool-0.4.2/ai_vision_tool/utils/metrics_logger.py +70 -0
- ai_vision_tool-0.4.2/ai_vision_tool/visualization/__init__.py +1 -0
- ai_vision_tool-0.4.2/ai_vision_tool/visualization/bbox_renderer.py +90 -0
- ai_vision_tool-0.4.2/ai_vision_tool/visualization/dashboard_view.py +109 -0
- ai_vision_tool-0.4.2/ai_vision_tool/visualization/frame_annotator.py +82 -0
- ai_vision_tool-0.4.2/ai_vision_tool/visualization/frame_viewer.py +81 -0
- ai_vision_tool-0.4.2/ai_vision_tool/visualization/heatmap_renderer.py +94 -0
- ai_vision_tool-0.4.2/ai_vision_tool/visualization/video_annotation_exporter.py +110 -0
- ai_vision_tool-0.4.2/pyproject.toml +171 -0
|
@@ -0,0 +1,2470 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ai-vision-tool
|
|
3
|
+
Version: 0.4.2
|
|
4
|
+
Summary: Composable computer-vision pipeline components for image enhancement, motion analysis, capture, and dataset collection.
|
|
5
|
+
Keywords: computer-vision,opencv,image-processing,dataset,pipeline
|
|
6
|
+
Author: AI Vision Flow Maintainers
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Provides-Extra: all
|
|
19
|
+
Provides-Extra: api
|
|
20
|
+
Provides-Extra: cloud
|
|
21
|
+
Provides-Extra: detection
|
|
22
|
+
Provides-Extra: kafka
|
|
23
|
+
Provides-Extra: onnx
|
|
24
|
+
Provides-Extra: segmentation
|
|
25
|
+
Provides-Extra: streaming
|
|
26
|
+
Provides-Extra: tflite
|
|
27
|
+
Provides-Extra: torch
|
|
28
|
+
Provides-Extra: tracking
|
|
29
|
+
Provides-Extra: websocket
|
|
30
|
+
Requires-Dist: boto3 (>=1.34) ; extra == "all"
|
|
31
|
+
Requires-Dist: boto3 (>=1.34) ; extra == "cloud"
|
|
32
|
+
Requires-Dist: confluent-kafka (>=2.3.0) ; extra == "all"
|
|
33
|
+
Requires-Dist: confluent-kafka (>=2.3.0) ; extra == "kafka"
|
|
34
|
+
Requires-Dist: confluent-kafka (>=2.3.0) ; extra == "streaming"
|
|
35
|
+
Requires-Dist: fastapi (>=0.115) ; extra == "all"
|
|
36
|
+
Requires-Dist: fastapi (>=0.115) ; extra == "api"
|
|
37
|
+
Requires-Dist: google-cloud-storage (>=2.16) ; extra == "all"
|
|
38
|
+
Requires-Dist: google-cloud-storage (>=2.16) ; extra == "cloud"
|
|
39
|
+
Requires-Dist: mediapipe (>=0.10) ; extra == "all"
|
|
40
|
+
Requires-Dist: mediapipe (>=0.10) ; extra == "detection"
|
|
41
|
+
Requires-Dist: numpy (>=1.26)
|
|
42
|
+
Requires-Dist: onnxruntime (>=1.18) ; extra == "all"
|
|
43
|
+
Requires-Dist: onnxruntime (>=1.18) ; extra == "onnx"
|
|
44
|
+
Requires-Dist: onnxruntime (>=1.18) ; extra == "tracking"
|
|
45
|
+
Requires-Dist: opencv-python (>=4.8)
|
|
46
|
+
Requires-Dist: pyyaml (>=6.0)
|
|
47
|
+
Requires-Dist: segment-anything (>=1.0) ; extra == "all"
|
|
48
|
+
Requires-Dist: segment-anything (>=1.0) ; extra == "segmentation"
|
|
49
|
+
Requires-Dist: tflite-runtime (>=2.14) ; extra == "tflite"
|
|
50
|
+
Requires-Dist: torch (>=2.3) ; extra == "all"
|
|
51
|
+
Requires-Dist: torch (>=2.3) ; extra == "segmentation"
|
|
52
|
+
Requires-Dist: torch (>=2.3) ; extra == "torch"
|
|
53
|
+
Requires-Dist: torchvision (>=0.18) ; extra == "all"
|
|
54
|
+
Requires-Dist: torchvision (>=0.18) ; extra == "segmentation"
|
|
55
|
+
Requires-Dist: torchvision (>=0.18) ; extra == "torch"
|
|
56
|
+
Requires-Dist: ultralytics (>=8.0) ; extra == "all"
|
|
57
|
+
Requires-Dist: ultralytics (>=8.0) ; extra == "detection"
|
|
58
|
+
Requires-Dist: ultralytics (>=8.0) ; extra == "segmentation"
|
|
59
|
+
Requires-Dist: uvicorn (>=0.30) ; extra == "all"
|
|
60
|
+
Requires-Dist: uvicorn (>=0.30) ; extra == "api"
|
|
61
|
+
Requires-Dist: websockets (>=12.0) ; extra == "all"
|
|
62
|
+
Requires-Dist: websockets (>=12.0) ; extra == "streaming"
|
|
63
|
+
Requires-Dist: websockets (>=12.0) ; extra == "websocket"
|
|
64
|
+
Description-Content-Type: text/markdown
|
|
65
|
+
|
|
66
|
+
# AI Vision Tool
|
|
67
|
+
### Build Scalable, Real-Time Computer Vision Systems with OpenCV, AI Models, and Hybrid Pipelines
|
|
68
|
+
|
|
69
|
+
<p align="center">
|
|
70
|
+
<a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/v/ai-vision-tool?style=flat-square&color=blue&label=PyPI" alt="PyPI version"></a>
|
|
71
|
+
<a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/pyversions/ai-vision-tool?style=flat-square" alt="Python"></a>
|
|
72
|
+
<a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/l/ai-vision-tool?style=flat-square&color=green" alt="License"></a>
|
|
73
|
+
<a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/dm/ai-vision-tool?style=flat-square&color=orange" alt="Downloads"></a>
|
|
74
|
+
</p>
|
|
75
|
+
|
|
76
|
+
<p align="center">
|
|
77
|
+
<img src="images/github/ai-vision-tool.png" alt="AI Vision Tool" width="100%">
|
|
78
|
+
</p>
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
**AI Vision Tool** is a modular, extensible, and production-ready computer vision framework designed for modern AI-powered image and video processing workflows.
|
|
83
|
+
|
|
84
|
+
Built with a **lightweight OpenCV-first architecture**, it provides a unified ecosystem for preprocessing, augmentation, enhancement, visualization, streaming, capture pipelines, and AI model integration — enabling developers to rapidly build scalable vision applications ranging from classical computer vision systems to advanced deep learning pipelines.
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from ai_vision_tool.pipelines import AIVisionPipeline, PrebuiltPipelines
|
|
88
|
+
from ai_vision_tool.preprocessing import AutoOrient, LetterboxResize
|
|
89
|
+
from ai_vision_tool.detection import ObjectDetector
|
|
90
|
+
from ai_vision_tool.tracking import ByteTracker
|
|
91
|
+
from ai_vision_tool.visualization import BBoxRenderer
|
|
92
|
+
|
|
93
|
+
pipeline = (
|
|
94
|
+
AIVisionPipeline()
|
|
95
|
+
.add(AutoOrient())
|
|
96
|
+
.add(LetterboxResize(width=640, height=640))
|
|
97
|
+
.add(ObjectDetector(model_path="yolov8n.pt", conf_threshold=0.25))
|
|
98
|
+
.add(ByteTracker(track_thresh=0.5))
|
|
99
|
+
.add(BBoxRenderer(show_track_id=True))
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
result = pipeline.execute(initial_data={"frame": frame}, global_config={})
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Why AI Vision Tool?
|
|
108
|
+
|
|
109
|
+
| Concern | How it's solved |
|
|
110
|
+
|---------|----------------|
|
|
111
|
+
| **Complexity** | One unified `.run(data)` interface across 130+ components |
|
|
112
|
+
| **Dependencies** | Lightweight core (`numpy + opencv + pyyaml`), heavy deps are opt-in extras |
|
|
113
|
+
| **Scalability** | Async, parallel, and fan-out pipelines built-in |
|
|
114
|
+
| **Deployment** | CPU / CUDA / MPS / Edge — auto-detected at runtime |
|
|
115
|
+
| **Extensibility** | Subclass `AIVisionComponent`, plug in anywhere |
|
|
116
|
+
|
|
117
|
+
### Supported Implementation Strategies
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
Classical Computer Vision → Pre-trained AI Models → Custom Deep Learning
|
|
121
|
+
↕ ↕ ↕
|
|
122
|
+
Edge AI Inference ←→ Hybrid CV + AI Architectures ←→ Cloud Streaming
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
The framework follows a **core + optional extensions** philosophy:
|
|
126
|
+
|
|
127
|
+
- **Lightweight core** — fast install, minimal footprint, no heavy deps
|
|
128
|
+
- **Optional AI runtimes** — ONNX, PyTorch, TensorFlow Lite via extras
|
|
129
|
+
- **Plugin-style integrations** — cloud storage, Kafka, WebSocket, Gradio dashboards
|
|
130
|
+
- **Edge and cloud deployment** — runs on Raspberry Pi through multi-GPU servers
|
|
131
|
+
|
|
132
|
+
> **Build once. Deploy anywhere. Scale from classical vision pipelines to state-of-the-art AI systems.**
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Table of Contents
|
|
137
|
+
|
|
138
|
+
- [Features](#features)
|
|
139
|
+
- [Installation](#installation)
|
|
140
|
+
- [Quickstart](#quickstart)
|
|
141
|
+
- [Preprocessing](#preprocessing)
|
|
142
|
+
- [Augmentation](#augmentation)
|
|
143
|
+
- [Pipeline](#pipeline)
|
|
144
|
+
- [Detection](#detection)
|
|
145
|
+
- [Tracking](#tracking)
|
|
146
|
+
- [Segmentation](#segmentation)
|
|
147
|
+
- [Enhancement](#enhancement)
|
|
148
|
+
- [I/O](#io)
|
|
149
|
+
- [Streaming](#streaming)
|
|
150
|
+
- [Visualization](#visualization)
|
|
151
|
+
- [Capture Components](#capture-components)
|
|
152
|
+
- [Utilities](#utilities)
|
|
153
|
+
- [Core](#core)
|
|
154
|
+
- [Configuration](#configuration)
|
|
155
|
+
- [Models](#models)
|
|
156
|
+
- [Prebuilt Pipelines](#prebuilt-pipelines)
|
|
157
|
+
- [Capture Templates](#capture-templates)
|
|
158
|
+
- [CLI Reference](#cli-reference)
|
|
159
|
+
- [Component Index](#component-index)
|
|
160
|
+
- [Output Structure](#output-structure)
|
|
161
|
+
- [Testing](#testing)
|
|
162
|
+
- [Build and Publish](#build-and-publish)
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Features
|
|
167
|
+
|
|
168
|
+
<details open>
|
|
169
|
+
<summary><strong>Pipelines & Architecture</strong></summary>
|
|
170
|
+
|
|
171
|
+
- Composable `AIVisionPipeline` — Chain of Responsibility, one interface for all components
|
|
172
|
+
- Async execution via `AsyncPipeline` (`asyncio` + `run_in_executor`)
|
|
173
|
+
- Parallel branches via `ParallelPipeline` and `FanOutPipeline` (`ThreadPoolExecutor`)
|
|
174
|
+
- Pipeline serialization to/from YAML/JSON via `PipelineSerializer`
|
|
175
|
+
- Prebuilt factory pipelines for detection, tracking, enhancement, augmentation
|
|
176
|
+
|
|
177
|
+
</details>
|
|
178
|
+
|
|
179
|
+
<details open>
|
|
180
|
+
<summary><strong>Preprocessing & Augmentation</strong></summary>
|
|
181
|
+
|
|
182
|
+
- **40+ preprocessing transforms** — geometry, intensity, color space, quality gates
|
|
183
|
+
- **70+ augmentation components** — geometric, weather, blur, noise, dropout, multi-image composition
|
|
184
|
+
- Batch processing: `component.run([img_a, img_b, img_c])` → list of results
|
|
185
|
+
- JSON augmentation profiles for CLI-driven training pipelines
|
|
186
|
+
|
|
187
|
+
</details>
|
|
188
|
+
|
|
189
|
+
<details open>
|
|
190
|
+
<summary><strong>Detection, Tracking & Segmentation</strong></summary>
|
|
191
|
+
|
|
192
|
+
- Object detection: YOLO (ultralytics) + ONNX with greedy NMS fallback
|
|
193
|
+
- Face detection: OpenCV Haar cascade or MediaPipe
|
|
194
|
+
- Keypoint/pose detection: MediaPipe 33-landmark or YOLO-pose
|
|
195
|
+
- OCR/text detection: EasyOCR, PaddleOCR
|
|
196
|
+
- Anomaly detection: statistical z-score, PatchCore (HOG + kNN), PCA
|
|
197
|
+
- Multi-object tracking: ByteTracker (two-stage), DeepSORT (HOG + cosine distance)
|
|
198
|
+
- Semantic, instance, and panoptic segmentation: ONNX / YOLO-seg / TorchScript
|
|
199
|
+
- SAM (Segment Anything Model): point, box, and auto-everything prompts
|
|
200
|
+
- Mask post-processing: erode / dilate / fill holes / largest-component / remove-small
|
|
201
|
+
|
|
202
|
+
</details>
|
|
203
|
+
|
|
204
|
+
<details open>
|
|
205
|
+
<summary><strong>Enhancement & Restoration</strong></summary>
|
|
206
|
+
|
|
207
|
+
- Super-resolution: `cv2.dnn_superres`, ONNX, bicubic fallback
|
|
208
|
+
- Denoising: Non-local means, bilateral, Gaussian, DnCNN-ONNX
|
|
209
|
+
- Deblurring: Wiener FFT, Richardson-Lucy, NAFNet-ONNX
|
|
210
|
+
- Low-light enhancement: CLAHE, gamma LUT, multi-scale Retinex, Zero-DCE
|
|
211
|
+
- Colorization: Zhang 2016 LAB-AB, pseudo-color, thermal
|
|
212
|
+
|
|
213
|
+
</details>
|
|
214
|
+
|
|
215
|
+
<details open>
|
|
216
|
+
<summary><strong>I/O, Streaming & Cloud</strong></summary>
|
|
217
|
+
|
|
218
|
+
- Flexible I/O: local images/video, webcam, RTSP, HTTP, AWS S3, GCS
|
|
219
|
+
- Dataset export: YOLO, COCO JSON, VOC XML
|
|
220
|
+
- Real-time streaming: RTSP client, WebSocket sink/source, Kafka producer/consumer
|
|
221
|
+
- Buffered queues with configurable drop policy and sliding window
|
|
222
|
+
|
|
223
|
+
</details>
|
|
224
|
+
|
|
225
|
+
<details open>
|
|
226
|
+
<summary><strong>Visualization & Dashboards</strong></summary>
|
|
227
|
+
|
|
228
|
+
- Live frame viewer with rolling FPS overlay (headless-safe)
|
|
229
|
+
- BBox renderer with consistent per-class colors and semi-transparent fill
|
|
230
|
+
- Heatmap renderer: detection density, anomaly maps, motion, attention
|
|
231
|
+
- Dashboard sink: Gradio or MJPEG HTTP fallback
|
|
232
|
+
- Annotated video export with JSON sidecar
|
|
233
|
+
|
|
234
|
+
</details>
|
|
235
|
+
|
|
236
|
+
<details open>
|
|
237
|
+
<summary><strong>Model Management</strong></summary>
|
|
238
|
+
|
|
239
|
+
- ONNX, TorchScript, TFLite runners as pipeline components
|
|
240
|
+
- Model registry with JSON cache and HuggingFace download support
|
|
241
|
+
- SHA256-verified downloader with progress callbacks
|
|
242
|
+
- Latency benchmarking: p50 / p95 / p99 + tracemalloc memory profiling
|
|
243
|
+
|
|
244
|
+
</details>
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## Installation
|
|
249
|
+
|
|
250
|
+
### pip
|
|
251
|
+
|
|
252
|
+
```bash
|
|
253
|
+
pip install ai-vision-tool
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
With optional extras:
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
# ONNX inference
|
|
260
|
+
pip install "ai-vision-tool[onnx]"
|
|
261
|
+
|
|
262
|
+
# YOLO detection + MediaPipe face/pose
|
|
263
|
+
pip install "ai-vision-tool[detection]"
|
|
264
|
+
|
|
265
|
+
# Everything
|
|
266
|
+
pip install "ai-vision-tool[all]"
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
### uv
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
uv add ai-vision-tool
|
|
273
|
+
uv add "ai-vision-tool[detection]"
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Poetry
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
poetry add ai-vision-tool
|
|
280
|
+
poetry add "ai-vision-tool[detection]"
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Optional extras
|
|
284
|
+
|
|
285
|
+
The base install (`numpy + opencv-python + pyyaml`) has no heavy deps.
|
|
286
|
+
Optional extras install only the libraries each feature needs.
|
|
287
|
+
|
|
288
|
+
| Extra | Installs | Enables |
|
|
289
|
+
|-------|----------|---------|
|
|
290
|
+
| `onnx` | `onnxruntime>=1.18` | `ONNXModel`, ONNX-backed detectors and enhancement |
|
|
291
|
+
| `torch` | `torch>=2.3`, `torchvision>=0.18` | `TorchModel`, TorchScript inference |
|
|
292
|
+
| `tflite` | `tflite-runtime>=2.14` | `TFLiteModel` inference |
|
|
293
|
+
| `detection` | `ultralytics>=8.0`, `mediapipe>=0.10` | `ObjectDetector` (YOLO), `FaceDetector`/`KeypointDetector` (MediaPipe) |
|
|
294
|
+
| `segmentation` | `ultralytics>=8.0`, `segment-anything>=1.0`, `torch>=2.3` | `InstanceSegmenter` (YOLO-seg), `SAMSegmenter` |
|
|
295
|
+
| `tracking` | `onnxruntime>=1.18` | ONNX-backed ReID embeddings in `ReIDExtractor` |
|
|
296
|
+
| `websocket` | `websockets>=12.0` | `WebSocketSink`, `WebSocketSource` |
|
|
297
|
+
| `kafka` | `confluent-kafka>=2.3.0` | `KafkaSink`, `KafkaSource` |
|
|
298
|
+
| `streaming` | websocket + kafka | All real-time streaming components |
|
|
299
|
+
| `cloud` | `boto3>=1.34`, `google-cloud-storage>=2.16` | `S3Source`, `GCSSource` |
|
|
300
|
+
| `api` | `fastapi>=0.115`, `uvicorn>=0.30` | FastAPI REST server |
|
|
301
|
+
| `all` | all of the above | Full feature set |
|
|
302
|
+
|
|
303
|
+
### Development Setup
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
git clone https://github.com/your-org/ai-vision-tool.git
|
|
307
|
+
cd ai-vision-tool
|
|
308
|
+
|
|
309
|
+
# Using uv
|
|
310
|
+
uv sync --dev
|
|
311
|
+
|
|
312
|
+
# Using Poetry
|
|
313
|
+
poetry install --with dev
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
Install pre-commit hooks:
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
pre-commit install
|
|
320
|
+
pre-commit install --hook-type pre-push
|
|
321
|
+
pre-commit install --hook-type commit-msg
|
|
322
|
+
pre-commit run --all-files
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
## Quickstart
|
|
328
|
+
|
|
329
|
+
```python
|
|
330
|
+
import cv2
|
|
331
|
+
from ai_vision_tool.pipelines import AIVisionPipeline
|
|
332
|
+
from ai_vision_tool.preprocessing import AutoOrient, AutoAdjustContrast
|
|
333
|
+
from ai_vision_tool.augmentation import Flip, GaussianBlur
|
|
334
|
+
|
|
335
|
+
image = cv2.imread("images/github/sample.jpg")
|
|
336
|
+
|
|
337
|
+
pipeline = AIVisionPipeline()
|
|
338
|
+
pipeline.add(AutoOrient(rotation=90))
|
|
339
|
+
pipeline.add(AutoAdjustContrast(method="adaptive_equalization", clip_limit=2.0))
|
|
340
|
+
pipeline.add(Flip(horizontal=True))
|
|
341
|
+
pipeline.add(GaussianBlur(kernel_size=5, sigma_x=1.0))
|
|
342
|
+
|
|
343
|
+
result = pipeline.execute(initial_data={"frame": image}, global_config={})
|
|
344
|
+
print(result["frame"].shape) # (height, width, 3)
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
You can also import any component directly from the top-level namespace:
|
|
348
|
+
|
|
349
|
+
```python
|
|
350
|
+
from ai_vision_tool import AutoOrient, Flip, GaussianBlur, AIVisionPipeline
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
All imports use lazy loading — only modules you actually use are loaded.
|
|
354
|
+
|
|
355
|
+
---
|
|
356
|
+
|
|
357
|
+
## Preprocessing
|
|
358
|
+
|
|
359
|
+
Preprocessing transforms prepare raw images for downstream model inference, quality gating,
|
|
360
|
+
or dataset ingestion. Every component accepts either a NumPy array or a payload dictionary
|
|
361
|
+
`{"frame": ndarray, ...}`.
|
|
362
|
+
|
|
363
|
+
```python
|
|
364
|
+
import cv2
|
|
365
|
+
image = cv2.imread("images/github/sample.jpg")
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
### Import Path
|
|
369
|
+
|
|
370
|
+
```python
|
|
371
|
+
from ai_vision_tool.preprocessing import (
|
|
372
|
+
AutoOrient,
|
|
373
|
+
AutoAdjustContrast,
|
|
374
|
+
Resize,
|
|
375
|
+
LetterboxResize,
|
|
376
|
+
CenterCrop,
|
|
377
|
+
PadToSquare,
|
|
378
|
+
Normalize,
|
|
379
|
+
Standardize,
|
|
380
|
+
RescalePixels,
|
|
381
|
+
ConvertColorSpace,
|
|
382
|
+
BGRToRGB,
|
|
383
|
+
RGBToBGR,
|
|
384
|
+
CLAHE,
|
|
385
|
+
HistogramEqualization,
|
|
386
|
+
GammaCorrection,
|
|
387
|
+
WhiteBalance,
|
|
388
|
+
Denoise,
|
|
389
|
+
Sharpen,
|
|
390
|
+
Deblur,
|
|
391
|
+
RemoveBackground,
|
|
392
|
+
Threshold,
|
|
393
|
+
AdaptiveThreshold,
|
|
394
|
+
EdgeDetection,
|
|
395
|
+
ContourExtraction,
|
|
396
|
+
PerspectiveCorrection,
|
|
397
|
+
Deskew,
|
|
398
|
+
AutoCrop,
|
|
399
|
+
FaceAlign,
|
|
400
|
+
ObjectCrop,
|
|
401
|
+
BoundingBoxClamp,
|
|
402
|
+
BoundingBoxNormalize,
|
|
403
|
+
MaskResize,
|
|
404
|
+
ImageQualityCheck,
|
|
405
|
+
BlurDetection,
|
|
406
|
+
BrightnessCheck,
|
|
407
|
+
DuplicateImageCheck,
|
|
408
|
+
CorruptImageCheck,
|
|
409
|
+
AspectRatioFilter,
|
|
410
|
+
MinSizeFilter,
|
|
411
|
+
MaxSizeFilter,
|
|
412
|
+
)
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
417
|
+
### Geometry
|
|
418
|
+
|
|
419
|
+
**`AutoOrient`** — Correct EXIF orientation metadata or apply an explicit rotation and flip.
|
|
420
|
+
|
|
421
|
+
```python
|
|
422
|
+
from ai_vision_tool.preprocessing import AutoOrient
|
|
423
|
+
|
|
424
|
+
result = AutoOrient(rotation=90).run(image)
|
|
425
|
+
result = AutoOrient(flip_horizontal=True).run(image)
|
|
426
|
+
result = AutoOrient(use_exif=True, exif_key="exif_orientation").run(
|
|
427
|
+
{"frame": image, "exif_orientation": 6}
|
|
428
|
+
)
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
**`Resize`** — Resize to an exact target size.
|
|
432
|
+
|
|
433
|
+
```python
|
|
434
|
+
from ai_vision_tool.preprocessing import Resize
|
|
435
|
+
|
|
436
|
+
result = Resize(width=640, height=640).run(image)
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
**`LetterboxResize`** — Resize preserving aspect ratio, padding the shorter axis.
|
|
440
|
+
|
|
441
|
+
```python
|
|
442
|
+
from ai_vision_tool.preprocessing import LetterboxResize
|
|
443
|
+
|
|
444
|
+
result = LetterboxResize(width=640, height=640, pad_value=(114, 114, 114)).run(image)
|
|
445
|
+
```
|
|
446
|
+
|
|
447
|
+
**`CenterCrop`** — Crop the centre region.
|
|
448
|
+
|
|
449
|
+
```python
|
|
450
|
+
from ai_vision_tool.preprocessing import CenterCrop
|
|
451
|
+
|
|
452
|
+
result = CenterCrop(width=224, height=224).run(image)
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
**`PadToSquare`** — Pad a rectangular image to a square canvas.
|
|
456
|
+
|
|
457
|
+
```python
|
|
458
|
+
from ai_vision_tool.preprocessing import PadToSquare
|
|
459
|
+
|
|
460
|
+
result = PadToSquare(pad_value=(0, 0, 0)).run(image)
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
**`PerspectiveCorrection`** — Rectify a quadrilateral document or planar surface.
|
|
464
|
+
|
|
465
|
+
```python
|
|
466
|
+
import numpy as np
|
|
467
|
+
from ai_vision_tool.preprocessing import PerspectiveCorrection
|
|
468
|
+
|
|
469
|
+
source_points = np.float32([[30, 20], [310, 10], [320, 240], [20, 250]])
|
|
470
|
+
result = PerspectiveCorrection(source_points=source_points, output_size=(300, 200)).run(image)
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
**`Deskew`** — Rotate a document back to a levelled angle.
|
|
474
|
+
|
|
475
|
+
```python
|
|
476
|
+
from ai_vision_tool.preprocessing import Deskew
|
|
477
|
+
|
|
478
|
+
result = Deskew().run(image)
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
**`AutoCrop`** — Trim empty or near-black borders.
|
|
482
|
+
|
|
483
|
+
```python
|
|
484
|
+
from ai_vision_tool.preprocessing import AutoCrop
|
|
485
|
+
|
|
486
|
+
result = AutoCrop(threshold=10, padding=4).run(image)
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
**`FaceAlign`** — Align a face using eye landmark coordinates from a payload dict.
|
|
490
|
+
|
|
491
|
+
```python
|
|
492
|
+
from ai_vision_tool.preprocessing import FaceAlign
|
|
493
|
+
|
|
494
|
+
payload = {"frame": image, "metadata": {"left_eye": (40, 50), "right_eye": (90, 50)}}
|
|
495
|
+
result = FaceAlign(output_size=(112, 112)).run(payload)
|
|
496
|
+
```
|
|
497
|
+
|
|
498
|
+
**`ObjectCrop`** — Crop the region described by bounding boxes.
|
|
499
|
+
|
|
500
|
+
```python
|
|
501
|
+
from ai_vision_tool.preprocessing import ObjectCrop
|
|
502
|
+
|
|
503
|
+
payload = {"frame": image, "bboxes": [(10, 20, 120, 80)]}
|
|
504
|
+
result = ObjectCrop().run(payload)
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
**`BoundingBoxClamp`** — Clamp bounding boxes that extend outside image boundaries.
|
|
508
|
+
|
|
509
|
+
```python
|
|
510
|
+
from ai_vision_tool.preprocessing import BoundingBoxClamp
|
|
511
|
+
|
|
512
|
+
payload = {"frame": image, "bboxes": [(-5, -5, 80, 90)]}
|
|
513
|
+
result = BoundingBoxClamp().run(payload)
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
**`BoundingBoxNormalize`** — Normalise absolute pixel bounding boxes to relative coordinates.
|
|
517
|
+
|
|
518
|
+
```python
|
|
519
|
+
from ai_vision_tool.preprocessing import BoundingBoxNormalize
|
|
520
|
+
|
|
521
|
+
payload = {"frame": image, "bboxes": [(10, 20, 120, 80)]}
|
|
522
|
+
result = BoundingBoxNormalize().run(payload)
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
**`MaskResize`** — Resize a payload mask to match a target spatial size.
|
|
526
|
+
|
|
527
|
+
```python
|
|
528
|
+
import numpy as np
|
|
529
|
+
from ai_vision_tool.preprocessing import MaskResize
|
|
530
|
+
|
|
531
|
+
mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)
|
|
532
|
+
payload = {"frame": image, "mask": mask}
|
|
533
|
+
result = MaskResize(width=640, height=640).run(payload)
|
|
534
|
+
```
|
|
535
|
+
|
|
536
|
+
---
|
|
537
|
+
|
|
538
|
+
### Intensity and Color
|
|
539
|
+
|
|
540
|
+
**`AutoAdjustContrast`** — Adaptive equalization, histogram equalization, or contrast stretching.
|
|
541
|
+
|
|
542
|
+
```python
|
|
543
|
+
from ai_vision_tool.preprocessing import AutoAdjustContrast
|
|
544
|
+
|
|
545
|
+
result = AutoAdjustContrast(method="adaptive_equalization", clip_limit=2.0).run(image)
|
|
546
|
+
result = AutoAdjustContrast(method="histogram_equalization").run(image)
|
|
547
|
+
result = AutoAdjustContrast(
|
|
548
|
+
method="contrast_stretching", lower_percentile=2.0, upper_percentile=98.0
|
|
549
|
+
).run(image)
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
**`Normalize`** — Map pixel values into [0, 1].
|
|
553
|
+
|
|
554
|
+
```python
|
|
555
|
+
from ai_vision_tool.preprocessing import Normalize
|
|
556
|
+
|
|
557
|
+
result = Normalize().run(image)
|
|
558
|
+
```
|
|
559
|
+
|
|
560
|
+
**`Standardize`** — z-score standardisation per channel.
|
|
561
|
+
|
|
562
|
+
```python
|
|
563
|
+
from ai_vision_tool.preprocessing import Standardize
|
|
564
|
+
|
|
565
|
+
result = Standardize(per_channel=True).run(image)
|
|
566
|
+
```
|
|
567
|
+
|
|
568
|
+
**`CLAHE`** — Contrast-Limited Adaptive Histogram Equalisation.
|
|
569
|
+
|
|
570
|
+
```python
|
|
571
|
+
from ai_vision_tool.preprocessing import CLAHE
|
|
572
|
+
|
|
573
|
+
result = CLAHE(clip_limit=2.0, tile_grid_size=(8, 8)).run(image)
|
|
574
|
+
```
|
|
575
|
+
|
|
576
|
+
**`GammaCorrection`** — Gamma-based exposure tuning.
|
|
577
|
+
|
|
578
|
+
```python
|
|
579
|
+
from ai_vision_tool.preprocessing import GammaCorrection
|
|
580
|
+
|
|
581
|
+
result = GammaCorrection(gamma=1.4).run(image) # brighten
|
|
582
|
+
result = GammaCorrection(gamma=0.7).run(image) # darken
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
**`WhiteBalance`** — Correct per-channel colour casts.
|
|
586
|
+
|
|
587
|
+
```python
|
|
588
|
+
from ai_vision_tool.preprocessing import WhiteBalance
|
|
589
|
+
|
|
590
|
+
result = WhiteBalance(method="gray_world").run(image)
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
**`EdgeDetection`** — Extract edges via Canny, Sobel, or Laplacian.
|
|
594
|
+
|
|
595
|
+
```python
|
|
596
|
+
from ai_vision_tool.preprocessing import EdgeDetection
|
|
597
|
+
|
|
598
|
+
result = EdgeDetection(method="canny", threshold1=100, threshold2=200).run(image)
|
|
599
|
+
```
|
|
600
|
+
|
|
601
|
+
---
|
|
602
|
+
|
|
603
|
+
### Quality Checks
|
|
604
|
+
|
|
605
|
+
**`ImageQualityCheck`** — Compute blur and brightness quality flags.
|
|
606
|
+
|
|
607
|
+
```python
|
|
608
|
+
from ai_vision_tool.preprocessing import ImageQualityCheck
|
|
609
|
+
|
|
610
|
+
result = ImageQualityCheck().run({"frame": image})
|
|
611
|
+
# result["is_blurry"], result["brightness"]
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
**`BlurDetection`** — Flag frames below a Laplacian variance threshold.
|
|
615
|
+
|
|
616
|
+
```python
|
|
617
|
+
from ai_vision_tool.preprocessing import BlurDetection
|
|
618
|
+
|
|
619
|
+
result = BlurDetection().run({"frame": image})
|
|
620
|
+
```
|
|
621
|
+
|
|
622
|
+
**`MinSizeFilter`** / **`MaxSizeFilter`** — Enforce pixel dimension bounds.
|
|
623
|
+
|
|
624
|
+
```python
|
|
625
|
+
from ai_vision_tool.preprocessing import MinSizeFilter, MaxSizeFilter
|
|
626
|
+
|
|
627
|
+
result = MinSizeFilter(min_width=320, min_height=320).run({"frame": image})
|
|
628
|
+
result = MaxSizeFilter(max_width=2048, max_height=2048).run({"frame": image})
|
|
629
|
+
```
|
|
630
|
+
|
|
631
|
+
---
|
|
632
|
+
|
|
633
|
+
## Augmentation
|
|
634
|
+
|
|
635
|
+
Augmentation components apply stochastic or deterministic transforms for training-time
|
|
636
|
+
variation. Every component exposes the same `.run(input)` interface.
|
|
637
|
+
|
|
638
|
+
```python
|
|
639
|
+
import cv2
|
|
640
|
+
image = cv2.imread("images/github/sample.jpg")
|
|
641
|
+
```
|
|
642
|
+
|
|
643
|
+
### Import Path
|
|
644
|
+
|
|
645
|
+
```python
|
|
646
|
+
from ai_vision_tool.augmentation import (
|
|
647
|
+
Flip, Rotate90, Crop, Rotation, Shear, Translate,
|
|
648
|
+
RandomResize, RandomScale, RandomCrop, RandomResizedCrop, RandomPadding,
|
|
649
|
+
AffineTransform, PerspectiveTransform, ElasticTransform,
|
|
650
|
+
GridDistortion, OpticalDistortion,
|
|
651
|
+
Brightness, Exposure, Hue, Saturation, Greyscale,
|
|
652
|
+
ColorJitter, RandomGamma, RandomBrightnessContrast,
|
|
653
|
+
RandomShadow, RandomSunFlare, RandomFog, RandomRain, RandomSnow,
|
|
654
|
+
ChannelShuffle, RGBShift, HSVShift, ToSepia, InvertImage,
|
|
655
|
+
Blur, GaussianBlur, MedianBlur, GlassBlur, DefocusBlur,
|
|
656
|
+
ZoomBlur, MotionBlur, CameraGain,
|
|
657
|
+
Emboss, Posterize, Solarize, Equalize,
|
|
658
|
+
CompressionArtifacts, JPEGCompression, Downscale, Superpixel,
|
|
659
|
+
Noise, ISONoise, MultiplicativeNoise, SaltPepperNoise,
|
|
660
|
+
CoarseDropout, GridDropout, RandomErasing, PixelDropout, MaskDropout,
|
|
661
|
+
Cutout, Mosaic, Mosaic9, MixUp, CutMix,
|
|
662
|
+
CopyPaste, ObjectPaste, RandomOcclusion, BoundingBoxJitter,
|
|
663
|
+
)
|
|
664
|
+
```
|
|
665
|
+
|
|
666
|
+
### Geometric and Spatial
|
|
667
|
+
|
|
668
|
+
```python
|
|
669
|
+
from ai_vision_tool.augmentation import Flip, Rotate90, Rotation, Shear
|
|
670
|
+
|
|
671
|
+
result = Flip(horizontal=True).run(image)
|
|
672
|
+
result = Rotate90(k=1).run(image)
|
|
673
|
+
result = Rotation(angle=12.0, expand=False, border_mode="constant").run(image)
|
|
674
|
+
result = Shear(shear_x=0.15).run(image)
|
|
675
|
+
```
|
|
676
|
+
|
|
677
|
+
**`RandomResizedCrop`** — Random crop + resize (equivalent to torchvision).
|
|
678
|
+
|
|
679
|
+
```python
|
|
680
|
+
from ai_vision_tool.augmentation import RandomResizedCrop
|
|
681
|
+
|
|
682
|
+
result = RandomResizedCrop(
|
|
683
|
+
output_width=224, output_height=224, scale_min=0.08, scale_max=1.0
|
|
684
|
+
).run(image)
|
|
685
|
+
```
|
|
686
|
+
|
|
687
|
+
**`AffineTransform`** — Combined rotate/scale/translate/shear in one pass.
|
|
688
|
+
|
|
689
|
+
```python
|
|
690
|
+
from ai_vision_tool.augmentation import AffineTransform
|
|
691
|
+
|
|
692
|
+
result = AffineTransform(angle=8.0, scale=1.0, translate_x=10.0, shear_x=0.05).run(image)
|
|
693
|
+
```
|
|
694
|
+
|
|
695
|
+
**`ElasticTransform`** / **`GridDistortion`** / **`OpticalDistortion`** — Spatial warping.
|
|
696
|
+
|
|
697
|
+
```python
|
|
698
|
+
from ai_vision_tool.augmentation import ElasticTransform, GridDistortion, OpticalDistortion
|
|
699
|
+
|
|
700
|
+
result = ElasticTransform(alpha=3.0, sigma=1.0).run(image)
|
|
701
|
+
result = GridDistortion(num_steps=5, distort_limit=0.2).run(image)
|
|
702
|
+
result = OpticalDistortion(k=0.00001).run(image)
|
|
703
|
+
```
|
|
704
|
+
|
|
705
|
+
### Lighting, Color, and Weather
|
|
706
|
+
|
|
707
|
+
```python
|
|
708
|
+
from ai_vision_tool.augmentation import (
|
|
709
|
+
ColorJitter, RandomShadow, RandomFog, RandomRain
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
result = ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=8).run(image)
|
|
713
|
+
result = RandomShadow(shadow_dimension=0.5, intensity=0.5).run(image)
|
|
714
|
+
result = RandomFog(alpha=0.2).run(image)
|
|
715
|
+
result = RandomRain(drops=40, drop_length=12, intensity=0.25).run(image)
|
|
716
|
+
```
|
|
717
|
+
|
|
718
|
+
### Blur, Compression, and Texture
|
|
719
|
+
|
|
720
|
+
```python
|
|
721
|
+
from ai_vision_tool.augmentation import (
|
|
722
|
+
GaussianBlur, MotionBlur, DefocusBlur, JPEGCompression, Superpixel
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
result = GaussianBlur(kernel_size=5, sigma_x=1.0).run(image)
|
|
726
|
+
result = MotionBlur(kernel_size=11, angle=25.0).run(image)
|
|
727
|
+
result = DefocusBlur(radius=5).run(image)
|
|
728
|
+
result = JPEGCompression(quality=40).run(image)
|
|
729
|
+
result = Superpixel(region_size=10).run(image)
|
|
730
|
+
```
|
|
731
|
+
|
|
732
|
+
### Noise and Dropout
|
|
733
|
+
|
|
734
|
+
```python
|
|
735
|
+
from ai_vision_tool.augmentation import (
|
|
736
|
+
Noise, ISONoise, CoarseDropout, GridDropout
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
result = Noise(mode="gaussian", mean=0.0, stddev=8.0).run(image)
|
|
740
|
+
result = ISONoise(color_shift=0.01, intensity=0.5).run(image)
|
|
741
|
+
result = CoarseDropout(holes=8, max_height=8, max_width=8).run(image)
|
|
742
|
+
result = GridDropout(ratio=0.5, unit_size=8).run(image)
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
### Multi-Image and Annotation-Aware
|
|
746
|
+
|
|
747
|
+
```python
|
|
748
|
+
import cv2
|
|
749
|
+
from ai_vision_tool.augmentation import MixUp, CutMix, Mosaic, BoundingBoxJitter
|
|
750
|
+
|
|
751
|
+
image_b = cv2.imread("images/github/sample.jpg")
|
|
752
|
+
|
|
753
|
+
result = MixUp(alpha=0.5).run({"frame": image, "mix_image": image_b})
|
|
754
|
+
result = CutMix(alpha=0.5).run({"frame": image, "mix_image": image_b})
|
|
755
|
+
|
|
756
|
+
tiles = [image] * 3
|
|
757
|
+
result = Mosaic(output_size=(640, 640), mosaic_images=tiles).run(image)
|
|
758
|
+
|
|
759
|
+
payload = {"frame": image, "bboxes": [(10, 10, 100, 60)]}
|
|
760
|
+
result = BoundingBoxJitter(x_jitter=0.05, y_jitter=0.05, size_jitter=0.1).run(payload)
|
|
761
|
+
```
|
|
762
|
+
|
|
763
|
+
### Batch Processing
|
|
764
|
+
|
|
765
|
+
```python
|
|
766
|
+
from ai_vision_tool.augmentation import Flip
|
|
767
|
+
|
|
768
|
+
results = Flip(horizontal=True).run([image, image, image]) # list → list
|
|
769
|
+
```
|
|
770
|
+
|
|
771
|
+
### Augmentation Profile (JSON)
|
|
772
|
+
|
|
773
|
+
```json
|
|
774
|
+
[
|
|
775
|
+
{"name": "RandomResizedCrop", "params": {"output_width": 256, "output_height": 256}},
|
|
776
|
+
{"name": "ColorJitter", "params": {"brightness": 0.2, "contrast": 0.2}},
|
|
777
|
+
{"name": "GaussianBlur", "params": {"kernel_size": 5, "sigma_x": 1.0}}
|
|
778
|
+
]
|
|
779
|
+
```
|
|
780
|
+
|
|
781
|
+
```bash
|
|
782
|
+
ai-vision-tool --augmentation-config examples/augmentation_profile.json
|
|
783
|
+
```
|
|
784
|
+
|
|
785
|
+
---
|
|
786
|
+
|
|
787
|
+
## Pipeline
|
|
788
|
+
|
|
789
|
+
`AIVisionPipeline` implements a Chain of Responsibility pattern.
|
|
790
|
+
|
|
791
|
+
```python
|
|
792
|
+
import cv2
|
|
793
|
+
from ai_vision_tool.pipelines import AIVisionPipeline
|
|
794
|
+
from ai_vision_tool.preprocessing import AutoOrient, Resize
|
|
795
|
+
from ai_vision_tool.augmentation import Flip, ColorJitter
|
|
796
|
+
from ai_vision_tool.visualization import FrameAnnotator
|
|
797
|
+
from ai_vision_tool.capture import MotionDetector
|
|
798
|
+
|
|
799
|
+
image = cv2.imread("images/github/sample.jpg")
|
|
800
|
+
|
|
801
|
+
pipeline = (
|
|
802
|
+
AIVisionPipeline()
|
|
803
|
+
.add(AutoOrient(rotation=90))
|
|
804
|
+
.add(Resize(width=640, height=640))
|
|
805
|
+
.add(Flip(horizontal=True))
|
|
806
|
+
.add(ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=5))
|
|
807
|
+
.add(MotionDetector())
|
|
808
|
+
.add(FrameAnnotator())
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
result = pipeline.execute(
|
|
812
|
+
initial_data={"frame": image, "annotations": []},
|
|
813
|
+
global_config={"min_area": 800},
|
|
814
|
+
)
|
|
815
|
+
output_frame = result["frame"]
|
|
816
|
+
```
|
|
817
|
+
|
|
818
|
+
---
|
|
819
|
+
|
|
820
|
+
## Detection
|
|
821
|
+
|
|
822
|
+
Detection components output `data["bboxes"]` (list of dicts with `x1/y1/x2/y2/label/conf`).
|
|
823
|
+
|
|
824
|
+
```python
|
|
825
|
+
import cv2
|
|
826
|
+
image = cv2.imread("images/github/sample.jpg")
|
|
827
|
+
```
|
|
828
|
+
|
|
829
|
+
### ObjectDetector
|
|
830
|
+
|
|
831
|
+
YOLO (ultralytics) or ONNX backend with greedy NMS fallback.
|
|
832
|
+
|
|
833
|
+
```python
|
|
834
|
+
from ai_vision_tool.detection import ObjectDetector
|
|
835
|
+
|
|
836
|
+
detector = ObjectDetector(
|
|
837
|
+
model_path="yolov8n.pt", # or "model.onnx"
|
|
838
|
+
conf_threshold=0.25,
|
|
839
|
+
iou_threshold=0.45,
|
|
840
|
+
backend="yolo", # "yolo" | "onnx"
|
|
841
|
+
class_names=None, # auto-loaded from ultralytics
|
|
842
|
+
)
|
|
843
|
+
result = detector.run({"frame": image})
|
|
844
|
+
print(result["bboxes"]) # [{"x1": ..., "y1": ..., "x2": ..., "y2": ..., "label": ..., "conf": ...}]
|
|
845
|
+
print(result["detection_count"])
|
|
846
|
+
```
|
|
847
|
+
|
|
848
|
+
### FaceDetector
|
|
849
|
+
|
|
850
|
+
OpenCV Haar cascade (bundled with OpenCV) or MediaPipe.
|
|
851
|
+
|
|
852
|
+
```python
|
|
853
|
+
from ai_vision_tool.detection import FaceDetector
|
|
854
|
+
|
|
855
|
+
detector = FaceDetector(
|
|
856
|
+
backend="opencv", # "opencv" | "mediapipe"
|
|
857
|
+
conf_threshold=0.5,
|
|
858
|
+
min_face_size=20,
|
|
859
|
+
)
|
|
860
|
+
result = detector.run({"frame": image})
|
|
861
|
+
print(result["faces"]) # same schema as bboxes + "face_id" key
|
|
862
|
+
print(result["bboxes"]) # unified bbox list
|
|
863
|
+
```
|
|
864
|
+
|
|
865
|
+
### KeypointDetector
|
|
866
|
+
|
|
867
|
+
MediaPipe 33-landmark pose with pixel coordinates, or YOLO-pose.
|
|
868
|
+
|
|
869
|
+
```python
|
|
870
|
+
from ai_vision_tool.detection import KeypointDetector
|
|
871
|
+
|
|
872
|
+
detector = KeypointDetector(
|
|
873
|
+
backend="mediapipe", # "mediapipe" | "yolo_pose"
|
|
874
|
+
model_complexity=1,
|
|
875
|
+
)
|
|
876
|
+
result = detector.run({"frame": image})
|
|
877
|
+
print(result["poses"]) # list of {"keypoints": [{x, y, z, visibility, name}, ...]}
|
|
878
|
+
```
|
|
879
|
+
|
|
880
|
+
### TextDetector
|
|
881
|
+
|
|
882
|
+
EasyOCR, PaddleOCR, or EAST placeholder.
|
|
883
|
+
|
|
884
|
+
```python
|
|
885
|
+
from ai_vision_tool.detection import TextDetector
|
|
886
|
+
|
|
887
|
+
detector = TextDetector(
|
|
888
|
+
backend="easyocr", # "easyocr" | "paddleocr" | "east"
|
|
889
|
+
conf_threshold=0.5,
|
|
890
|
+
languages=["en"],
|
|
891
|
+
)
|
|
892
|
+
result = detector.run({"frame": image})
|
|
893
|
+
print(result["text_regions"]) # [{"x1", "y1", "x2", "y2", "text", "conf"}]
|
|
894
|
+
```
|
|
895
|
+
|
|
896
|
+
### AnomalyDetector
|
|
897
|
+
|
|
898
|
+
Statistical z-score histogram, PatchCore (HOG + NearestNeighbors), or PCA approximation.
|
|
899
|
+
|
|
900
|
+
```python
|
|
901
|
+
from ai_vision_tool.detection import AnomalyDetector
|
|
902
|
+
|
|
903
|
+
detector = AnomalyDetector(
|
|
904
|
+
method="statistical", # "statistical" | "patchcore" | "pca"
|
|
905
|
+
window=30, # warmup frames for baseline
|
|
906
|
+
threshold=2.0,
|
|
907
|
+
)
|
|
908
|
+
# Feed frames sequentially — detector builds baseline during warmup
|
|
909
|
+
result = detector.run({"frame": image})
|
|
910
|
+
print(result["anomaly_score"])
|
|
911
|
+
print(result["is_anomaly"]) # bool
|
|
912
|
+
print(result["anomaly_map"]) # spatial heatmap (numpy array)
|
|
913
|
+
```
|
|
914
|
+
|
|
915
|
+
---
|
|
916
|
+
|
|
917
|
+
## Tracking
|
|
918
|
+
|
|
919
|
+
Tracking components extend detection output with persistent `track_id` per object.
|
|
920
|
+
Input: `data["bboxes"]` from a detector. Output: `data["tracks"]`.
|
|
921
|
+
|
|
922
|
+
### ByteTracker
|
|
923
|
+
|
|
924
|
+
State-of-the-art two-stage association: high-confidence detections first, then
|
|
925
|
+
low-confidence detections vs. unmatched tracks (Zhang et al. 2022).
|
|
926
|
+
|
|
927
|
+
```python
|
|
928
|
+
from ai_vision_tool.detection import ObjectDetector
|
|
929
|
+
from ai_vision_tool.tracking import ByteTracker
|
|
930
|
+
from ai_vision_tool.pipelines import AIVisionPipeline
|
|
931
|
+
|
|
932
|
+
pipeline = (
|
|
933
|
+
AIVisionPipeline()
|
|
934
|
+
.add(ObjectDetector(model_path="yolov8n.pt", conf_threshold=0.25))
|
|
935
|
+
.add(ByteTracker(
|
|
936
|
+
track_thresh=0.5,
|
|
937
|
+
track_buffer=30, # frames to keep a lost track
|
|
938
|
+
match_thresh=0.8,
|
|
939
|
+
))
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
result = pipeline.execute(initial_data={"frame": image}, global_config={})
|
|
943
|
+
for track in result["tracks"]:
|
|
944
|
+
print(track["track_id"], track["label"], track["x1"], track["y1"])
|
|
945
|
+
```
|
|
946
|
+
|
|
947
|
+
### DeepSORTTracker
|
|
948
|
+
|
|
949
|
+
HOG-based re-identification embedding with cosine distance. Drop-in replacement for
|
|
950
|
+
ByteTracker; use when identity consistency across long occlusions matters.
|
|
951
|
+
|
|
952
|
+
```python
|
|
953
|
+
from ai_vision_tool.tracking import DeepSORTTracker
|
|
954
|
+
|
|
955
|
+
tracker = DeepSORTTracker(
|
|
956
|
+
max_age=30,
|
|
957
|
+
min_hits=3,
|
|
958
|
+
iou_threshold=0.3,
|
|
959
|
+
embedding_method="hog", # "hog" | "osnet_onnx"
|
|
960
|
+
)
|
|
961
|
+
result = tracker.run({"frame": image, "bboxes": [...]})
|
|
962
|
+
print(result["tracks"])
|
|
963
|
+
```
|
|
964
|
+
|
|
965
|
+
### ReIDExtractor
|
|
966
|
+
|
|
967
|
+
Extract appearance embeddings for gallery-matching workflows.
|
|
968
|
+
|
|
969
|
+
```python
|
|
970
|
+
from ai_vision_tool.tracking import ReIDExtractor
|
|
971
|
+
|
|
972
|
+
extractor = ReIDExtractor(method="hog", embedding_dim=128)
|
|
973
|
+
result = extractor.run({"frame": image, "bboxes": [...]})
|
|
974
|
+
print(result["embeddings"]) # list of float arrays, one per bbox
|
|
975
|
+
```
|
|
976
|
+
|
|
977
|
+
### TrackManager
|
|
978
|
+
|
|
979
|
+
Low-level track lifecycle management. Used internally by ByteTracker and DeepSORTTracker
|
|
980
|
+
but accessible directly for custom tracking logic.
|
|
981
|
+
|
|
982
|
+
```python
|
|
983
|
+
from ai_vision_tool.tracking import TrackManager
|
|
984
|
+
|
|
985
|
+
tm = TrackManager(max_age=30, min_hits=3, iou_threshold=0.3)
|
|
986
|
+
tracks = tm.update(bboxes_list, frame_id=42)
|
|
987
|
+
```
|
|
988
|
+
|
|
989
|
+
### KalmanFilter
|
|
990
|
+
|
|
991
|
+
7-state (cx, cy, s, r, vx, vy, vs) Kalman filter used by both built-in trackers.
|
|
992
|
+
|
|
993
|
+
```python
|
|
994
|
+
from ai_vision_tool.tracking import KalmanFilter
|
|
995
|
+
|
|
996
|
+
kf = KalmanFilter()
|
|
997
|
+
mean, cov = kf.initiate([x1, y1, x2, y2])
|
|
998
|
+
mean, cov = kf.predict(mean, cov)
|
|
999
|
+
mean, cov = kf.update(mean, cov, [x1, y1, x2, y2])
|
|
1000
|
+
```
|
|
1001
|
+
|
|
1002
|
+
---
|
|
1003
|
+
|
|
1004
|
+
## Segmentation
|
|
1005
|
+
|
|
1006
|
+
Segmentation components produce pixel-level masks. All follow the same component interface.
|
|
1007
|
+
|
|
1008
|
+
### SemanticSegmenter
|
|
1009
|
+
|
|
1010
|
+
ONNX, OpenCV DNN, or TorchScript backend. Defaults to VOC-21 class names.
|
|
1011
|
+
|
|
1012
|
+
```python
|
|
1013
|
+
from ai_vision_tool.segmentation import SemanticSegmenter
|
|
1014
|
+
|
|
1015
|
+
segmenter = SemanticSegmenter(
|
|
1016
|
+
model_path="deeplabv3.onnx",
|
|
1017
|
+
backend="onnx", # "onnx" | "opencv_dnn" | "torch"
|
|
1018
|
+
num_classes=21,
|
|
1019
|
+
input_size=(513, 513),
|
|
1020
|
+
)
|
|
1021
|
+
result = segmenter.run({"frame": image})
|
|
1022
|
+
print(result["seg_map"]) # (H, W) class index array
|
|
1023
|
+
print(result["seg_overlay"]) # colorized overlay on original frame
|
|
1024
|
+
print(result["masks"]) # list of per-class binary masks
|
|
1025
|
+
```
|
|
1026
|
+
|
|
1027
|
+
### InstanceSegmenter
|
|
1028
|
+
|
|
1029
|
+
YOLO-seg mask output resized to original frame size.
|
|
1030
|
+
|
|
1031
|
+
```python
|
|
1032
|
+
from ai_vision_tool.segmentation import InstanceSegmenter
|
|
1033
|
+
|
|
1034
|
+
segmenter = InstanceSegmenter(
|
|
1035
|
+
model_path="yolov8n-seg.pt",
|
|
1036
|
+
backend="yolo",
|
|
1037
|
+
conf_threshold=0.25,
|
|
1038
|
+
)
|
|
1039
|
+
result = segmenter.run({"frame": image})
|
|
1040
|
+
print(result["masks"]) # list of binary masks
|
|
1041
|
+
print(result["bboxes"]) # aligned with masks
|
|
1042
|
+
print(result["instance_overlay"])
|
|
1043
|
+
```
|
|
1044
|
+
|
|
1045
|
+
### PanopticSegmenter
|
|
1046
|
+
|
|
1047
|
+
Separates stuff (background) and thing (object) classes.
|
|
1048
|
+
|
|
1049
|
+
```python
|
|
1050
|
+
from ai_vision_tool.segmentation import PanopticSegmenter
|
|
1051
|
+
|
|
1052
|
+
segmenter = PanopticSegmenter(model_path="panoptic.onnx")
|
|
1053
|
+
result = segmenter.run({"frame": image})
|
|
1054
|
+
print(result["panoptic_map"]) # (H, W) instance-class encoded
|
|
1055
|
+
print(result["stuff_mask"])
|
|
1056
|
+
print(result["thing_mask"])
|
|
1057
|
+
```
|
|
1058
|
+
|
|
1059
|
+
### SAMSegmenter
|
|
1060
|
+
|
|
1061
|
+
Segment Anything Model — point, box, and auto-everything prompts.
|
|
1062
|
+
|
|
1063
|
+
```python
|
|
1064
|
+
from ai_vision_tool.segmentation import SAMSegmenter
|
|
1065
|
+
|
|
1066
|
+
# Point prompt
|
|
1067
|
+
segmenter = SAMSegmenter(
|
|
1068
|
+
model_path="sam_vit_b.pth",
|
|
1069
|
+
model_type="vit_b",
|
|
1070
|
+
mode="point",
|
|
1071
|
+
device="auto",
|
|
1072
|
+
)
|
|
1073
|
+
result = segmenter.run({"frame": image, "prompt_points": [(320, 240)], "prompt_labels": [1]})
|
|
1074
|
+
print(result["masks"]) # list of binary masks
|
|
1075
|
+
print(result["iou_scores"])
|
|
1076
|
+
|
|
1077
|
+
# Auto-everything (no prompts)
|
|
1078
|
+
segmenter = SAMSegmenter(model_path="sam_vit_b.pth", mode="auto")
|
|
1079
|
+
result = segmenter.run({"frame": image})
|
|
1080
|
+
print(result["masks"]) # all detected segments
|
|
1081
|
+
```
|
|
1082
|
+
|
|
1083
|
+
### MaskPostProcessor
|
|
1084
|
+
|
|
1085
|
+
Morphological cleanup of segmentation masks.
|
|
1086
|
+
|
|
1087
|
+
```python
|
|
1088
|
+
from ai_vision_tool.segmentation import MaskPostProcessor
|
|
1089
|
+
|
|
1090
|
+
processor = MaskPostProcessor(
|
|
1091
|
+
operations=["erode", "dilate", "fill_holes", "remove_small", "largest_only"],
|
|
1092
|
+
kernel_size=5,
|
|
1093
|
+
)
|
|
1094
|
+
result = processor.run({"frame": image, "masks": [binary_mask]})
|
|
1095
|
+
print(result["masks"]) # cleaned masks
|
|
1096
|
+
print(result["polygons"]) # polygon contours per mask
|
|
1097
|
+
```
|
|
1098
|
+
|
|
1099
|
+
---
|
|
1100
|
+
|
|
1101
|
+
## Enhancement
|
|
1102
|
+
|
|
1103
|
+
Enhancement components restore or improve degraded images. All use the same component
|
|
1104
|
+
interface and fall back to pure NumPy/OpenCV if heavy deps are unavailable.
|
|
1105
|
+
|
|
1106
|
+
### SuperResolution
|
|
1107
|
+
|
|
1108
|
+
2× or 4× upscaling. Uses `cv2.dnn_superres` if available, then ONNX, then bicubic.
|
|
1109
|
+
|
|
1110
|
+
```python
|
|
1111
|
+
from ai_vision_tool.enhancement import SuperResolution
|
|
1112
|
+
|
|
1113
|
+
sr = SuperResolution(
|
|
1114
|
+
scale=2,
|
|
1115
|
+
backend="auto", # "auto" | "opencv" | "onnx" | "bicubic"
|
|
1116
|
+
model_path=None, # optional ONNX or OpenCV SR model
|
|
1117
|
+
)
|
|
1118
|
+
result = sr.run({"frame": image})
|
|
1119
|
+
print(result["frame"].shape) # (H*2, W*2, 3)
|
|
1120
|
+
print(result["sr_scale"]) # 2
|
|
1121
|
+
print(result["sr_backend"]) # "bicubic" / "opencv" / "onnx"
|
|
1122
|
+
```
|
|
1123
|
+
|
|
1124
|
+
### Denoiser
|
|
1125
|
+
|
|
1126
|
+
Non-local means, bilateral filter, Gaussian, median, or DnCNN-ONNX.
|
|
1127
|
+
|
|
1128
|
+
```python
|
|
1129
|
+
from ai_vision_tool.enhancement import Denoiser
|
|
1130
|
+
|
|
1131
|
+
result = Denoiser(method="nlmeans", strength=10.0).run({"frame": image})
|
|
1132
|
+
result = Denoiser(method="bilateral", strength=9.0).run({"frame": image})
|
|
1133
|
+
result = Denoiser(method="gaussian", strength=3.0).run({"frame": image})
|
|
1134
|
+
# DnCNN-ONNX
|
|
1135
|
+
result = Denoiser(method="dncnn", model_path="dncnn.onnx").run({"frame": image})
|
|
1136
|
+
print(result["denoise_method"])
|
|
1137
|
+
```
|
|
1138
|
+
|
|
1139
|
+
### Deblurrer
|
|
1140
|
+
|
|
1141
|
+
Wiener deconvolution (FFT), Richardson-Lucy iterative, unsharp mask, or NAFNet-ONNX.
|
|
1142
|
+
|
|
1143
|
+
```python
|
|
1144
|
+
from ai_vision_tool.enhancement import Deblurrer
|
|
1145
|
+
|
|
1146
|
+
result = Deblurrer(method="wiener", kernel_size=5).run({"frame": image})
|
|
1147
|
+
result = Deblurrer(method="richardson_lucy", kernel_size=5, iterations=10).run({"frame": image})
|
|
1148
|
+
result = Deblurrer(method="unsharp", strength=1.0).run({"frame": image})
|
|
1149
|
+
result = Deblurrer(method="nafnet", model_path="nafnet.onnx").run({"frame": image})
|
|
1150
|
+
```
|
|
1151
|
+
|
|
1152
|
+
### LowLightEnhancer
|
|
1153
|
+
|
|
1154
|
+
CLAHE on LAB L-channel, gamma LUT, histogram stretch, single/multi-scale Retinex,
|
|
1155
|
+
Zero-DCE brightness curve approximation, or ONNX model.
|
|
1156
|
+
|
|
1157
|
+
```python
|
|
1158
|
+
from ai_vision_tool.enhancement import LowLightEnhancer
|
|
1159
|
+
|
|
1160
|
+
result = LowLightEnhancer(method="clahe", clip_limit=3.0).run({"frame": image})
|
|
1161
|
+
result = LowLightEnhancer(method="gamma", gamma=0.5).run({"frame": image})
|
|
1162
|
+
result = LowLightEnhancer(method="msr").run({"frame": image}) # multi-scale Retinex
|
|
1163
|
+
result = LowLightEnhancer(method="zero_dce").run({"frame": image})
|
|
1164
|
+
result = LowLightEnhancer(method="onnx", model_path="llnet.onnx").run({"frame": image})
|
|
1165
|
+
```
|
|
1166
|
+
|
|
1167
|
+
### Colorizer
|
|
1168
|
+
|
|
1169
|
+
Zhang 2016 LAB-AB network colorization, pseudo-color (VIRIDIS), thermal (JET), or ONNX.
|
|
1170
|
+
|
|
1171
|
+
```python
|
|
1172
|
+
from ai_vision_tool.enhancement import Colorizer
|
|
1173
|
+
|
|
1174
|
+
result = Colorizer(method="opencv_dnn", model_path="colorization.caffemodel").run({"frame": gray_image})
|
|
1175
|
+
result = Colorizer(method="pseudo_color").run({"frame": gray_image})
|
|
1176
|
+
result = Colorizer(method="thermal").run({"frame": gray_image})
|
|
1177
|
+
print(result["is_grayscale_input"]) # True if input was single-channel
|
|
1178
|
+
```
|
|
1179
|
+
|
|
1180
|
+
---
|
|
1181
|
+
|
|
1182
|
+
## I/O
|
|
1183
|
+
|
|
1184
|
+
I/O components read images, videos, and cloud blobs, or export annotated datasets.
|
|
1185
|
+
|
|
1186
|
+
### ImageReader / ImageWriter
|
|
1187
|
+
|
|
1188
|
+
```python
|
|
1189
|
+
from ai_vision_tool.io import ImageReader, ImageWriter
|
|
1190
|
+
|
|
1191
|
+
# Read a single image
|
|
1192
|
+
reader = ImageReader(path="image.jpg", color_mode="bgr") # "bgr" | "rgb" | "gray"
|
|
1193
|
+
result = reader.run({})
|
|
1194
|
+
image = result["frame"]
|
|
1195
|
+
|
|
1196
|
+
# Write frames — {index}, {timestamp}, {label} tokens in filename
|
|
1197
|
+
writer = ImageWriter(
|
|
1198
|
+
output_dir="output/frames",
|
|
1199
|
+
filename_pattern="{index:06d}.jpg",
|
|
1200
|
+
quality=95,
|
|
1201
|
+
)
|
|
1202
|
+
writer.run({"frame": image})
|
|
1203
|
+
writer.cleanup()
|
|
1204
|
+
```
|
|
1205
|
+
|
|
1206
|
+
### VideoReader / VideoWriter
|
|
1207
|
+
|
|
1208
|
+
```python
|
|
1209
|
+
from ai_vision_tool.io import VideoReader, VideoWriter
|
|
1210
|
+
|
|
1211
|
+
# Stream frames from a video file
|
|
1212
|
+
reader = VideoReader("video.mp4", start_frame=0, step=1)
|
|
1213
|
+
for payload in reader:
|
|
1214
|
+
if payload.get("eof"):
|
|
1215
|
+
break
|
|
1216
|
+
frame = payload["frame"]
|
|
1217
|
+
|
|
1218
|
+
# Write annotated frames to video
|
|
1219
|
+
writer = VideoWriter(output_path="out.mp4", fps=30.0, codec="mp4v")
|
|
1220
|
+
writer.run({"frame": frame})
|
|
1221
|
+
writer.cleanup()
|
|
1222
|
+
```
|
|
1223
|
+
|
|
1224
|
+
### CameraSource
|
|
1225
|
+
|
|
1226
|
+
Live webcam, RTSP, or HTTP stream reader.
|
|
1227
|
+
|
|
1228
|
+
```python
|
|
1229
|
+
from ai_vision_tool.io import CameraSource
|
|
1230
|
+
|
|
1231
|
+
cam = CameraSource(
|
|
1232
|
+
source=0, # 0 = webcam, "rtsp://..." = RTSP, "http://..." = HTTP
|
|
1233
|
+
width=1280,
|
|
1234
|
+
height=720,
|
|
1235
|
+
fps=30.0,
|
|
1236
|
+
buffer_size=1,
|
|
1237
|
+
)
|
|
1238
|
+
cam.setup({})
|
|
1239
|
+
|
|
1240
|
+
payload = {"frame": None}
|
|
1241
|
+
result = cam.run(payload)
|
|
1242
|
+
frame = result["frame"]
|
|
1243
|
+
print(result["fps_actual"])
|
|
1244
|
+
cam.cleanup()
|
|
1245
|
+
```
|
|
1246
|
+
|
|
1247
|
+
### S3Source / GCSSource
|
|
1248
|
+
|
|
1249
|
+
Stream images from cloud storage as pipeline inputs.
|
|
1250
|
+
|
|
1251
|
+
```python
|
|
1252
|
+
from ai_vision_tool.integrations.cloud import S3Source
|
|
1253
|
+
|
|
1254
|
+
source = S3Source(
|
|
1255
|
+
bucket="my-bucket",
|
|
1256
|
+
prefix="images/train/",
|
|
1257
|
+
extensions=(".jpg", ".png"),
|
|
1258
|
+
aws_region="ap-southeast-1",
|
|
1259
|
+
)
|
|
1260
|
+
source.setup({})
|
|
1261
|
+
result = source.run({}) # reads next image from bucket
|
|
1262
|
+
frame = result["frame"]
|
|
1263
|
+
print(result["s3_key"])
|
|
1264
|
+
```
|
|
1265
|
+
|
|
1266
|
+
```python
|
|
1267
|
+
from ai_vision_tool.integrations.cloud import GCSSource
|
|
1268
|
+
|
|
1269
|
+
source = GCSSource(
|
|
1270
|
+
bucket="my-gcs-bucket",
|
|
1271
|
+
prefix="frames/",
|
|
1272
|
+
credentials_path="/path/to/sa.json", # None = use ADC
|
|
1273
|
+
)
|
|
1274
|
+
result = source.run({})
|
|
1275
|
+
```
|
|
1276
|
+
|
|
1277
|
+
### DatasetExporter
|
|
1278
|
+
|
|
1279
|
+
Export detections as YOLO txt, COCO JSON, or VOC XML.
|
|
1280
|
+
|
|
1281
|
+
```python
|
|
1282
|
+
from ai_vision_tool.io import DatasetExporter
|
|
1283
|
+
|
|
1284
|
+
exporter = DatasetExporter(
|
|
1285
|
+
output_dir="dataset/",
|
|
1286
|
+
format="yolo", # "yolo" | "coco" | "voc"
|
|
1287
|
+
split="train",
|
|
1288
|
+
class_names=["cat", "dog"],
|
|
1289
|
+
)
|
|
1290
|
+
exporter.run({
|
|
1291
|
+
"frame": image,
|
|
1292
|
+
"bboxes": [{"x1": 10, "y1": 20, "x2": 120, "y2": 80, "label": "cat", "conf": 0.9}],
|
|
1293
|
+
})
|
|
1294
|
+
exporter.cleanup() # flushes COCO JSON / VOC XML to disk
|
|
1295
|
+
```
|
|
1296
|
+
|
|
1297
|
+
---
|
|
1298
|
+
|
|
1299
|
+
## Streaming
|
|
1300
|
+
|
|
1301
|
+
Streaming components connect real-time sources and sinks to pipelines.
|
|
1302
|
+
|
|
1303
|
+
### FrameStream / DirectoryStream
|
|
1304
|
+
|
|
1305
|
+
Unified iterator over webcam index, video path, list of paths, or image directory.
|
|
1306
|
+
|
|
1307
|
+
```python
|
|
1308
|
+
from ai_vision_tool.streaming import FrameStream, DirectoryStream
|
|
1309
|
+
|
|
1310
|
+
# Iterate a video
|
|
1311
|
+
with FrameStream("video.mp4", max_frames=100) as stream:
|
|
1312
|
+
for payload in stream:
|
|
1313
|
+
frame = payload["frame"]
|
|
1314
|
+
|
|
1315
|
+
# Iterate sorted images from a directory
|
|
1316
|
+
for payload in DirectoryStream("data/frames/", extensions=(".jpg", ".png")):
|
|
1317
|
+
frame = payload["frame"]
|
|
1318
|
+
```
|
|
1319
|
+
|
|
1320
|
+
### RTSPClient
|
|
1321
|
+
|
|
1322
|
+
Background-threaded RTSP reader with auto-reconnect.
|
|
1323
|
+
|
|
1324
|
+
```python
|
|
1325
|
+
from ai_vision_tool.streaming import RTSPClient
|
|
1326
|
+
|
|
1327
|
+
client = RTSPClient(
|
|
1328
|
+
url="rtsp://192.168.1.10:554/stream",
|
|
1329
|
+
reconnect=True,
|
|
1330
|
+
reconnect_delay=2.0,
|
|
1331
|
+
max_retries=3,
|
|
1332
|
+
)
|
|
1333
|
+
client.setup({})
|
|
1334
|
+
result = client.run({}) # returns latest buffered frame
|
|
1335
|
+
frame = result["frame"]
|
|
1336
|
+
client.cleanup()
|
|
1337
|
+
```
|
|
1338
|
+
|
|
1339
|
+
### WebSocketSink / WebSocketSource
|
|
1340
|
+
|
|
1341
|
+
Broadcast frames as base64 JPEG over WebSocket. Falls back to MJPEG HTTP when
|
|
1342
|
+
`websockets` is not installed.
|
|
1343
|
+
|
|
1344
|
+
```python
|
|
1345
|
+
from ai_vision_tool.integrations.streaming import WebSocketSink
|
|
1346
|
+
|
|
1347
|
+
sink = WebSocketSink(host="0.0.0.0", port=8765, quality=80)
|
|
1348
|
+
sink.setup({})
|
|
1349
|
+
|
|
1350
|
+
sink.run({"frame": frame}) # broadcast to all connected clients
|
|
1351
|
+
sink.cleanup()
|
|
1352
|
+
```
|
|
1353
|
+
|
|
1354
|
+
```python
|
|
1355
|
+
from ai_vision_tool.integrations.streaming import WebSocketSource
|
|
1356
|
+
|
|
1357
|
+
source = WebSocketSource(url="ws://localhost:8765")
|
|
1358
|
+
source.setup({})
|
|
1359
|
+
result = source.run({})
|
|
1360
|
+
frame = result["frame"]
|
|
1361
|
+
```
|
|
1362
|
+
|
|
1363
|
+
### KafkaSource / KafkaSink
|
|
1364
|
+
|
|
1365
|
+
Stream frames as base64-JPEG JSON messages through Kafka. Requires the `kafka` extra
|
|
1366
|
+
(`pip install "ai-vision-tool[kafka]"`).
|
|
1367
|
+
|
|
1368
|
+
```python
|
|
1369
|
+
from ai_vision_tool.integrations.streaming import KafkaSink, KafkaSource
|
|
1370
|
+
|
|
1371
|
+
sink = KafkaSink(bootstrap_servers="localhost:9092", topic="vision_frames", quality=80)
|
|
1372
|
+
sink.setup({})
|
|
1373
|
+
sink.run({"frame": frame})
|
|
1374
|
+
|
|
1375
|
+
source = KafkaSource(
|
|
1376
|
+
bootstrap_servers="localhost:9092",
|
|
1377
|
+
topic="vision_frames",
|
|
1378
|
+
group_id="ai_vision",
|
|
1379
|
+
)
|
|
1380
|
+
source.setup({})
|
|
1381
|
+
result = source.run({})
|
|
1382
|
+
frame = result["frame"]
|
|
1383
|
+
```
|
|
1384
|
+
|
|
1385
|
+
### BufferedStream / SlidingWindowBuffer
|
|
1386
|
+
|
|
1387
|
+
Decouple producer and consumer speeds with a frame buffer.
|
|
1388
|
+
|
|
1389
|
+
```python
|
|
1390
|
+
from ai_vision_tool.streaming import BufferedStream, SlidingWindowBuffer
|
|
1391
|
+
|
|
1392
|
+
# Buffer with "oldest" drop policy when full
|
|
1393
|
+
buf = BufferedStream(buffer_size=30, drop_policy="oldest", emit_rate=None)
|
|
1394
|
+
buf.run({"frame": frame}) # push frame
|
|
1395
|
+
result = buf.run({}) # pop frame
|
|
1396
|
+
|
|
1397
|
+
# Sliding window — yields batches of `window` frames with optional overlap
|
|
1398
|
+
window = SlidingWindowBuffer(window=16, overlap=8)
|
|
1399
|
+
window.push(frame)
|
|
1400
|
+
if window.ready():
|
|
1401
|
+
batch = window.get() # list of 16 frames
|
|
1402
|
+
```
|
|
1403
|
+
|
|
1404
|
+
---
|
|
1405
|
+
|
|
1406
|
+
## Visualization
|
|
1407
|
+
|
|
1408
|
+
Visualization components render annotations, serve dashboards, and export annotated video.
|
|
1409
|
+
|
|
1410
|
+
### FrameViewer
|
|
1411
|
+
|
|
1412
|
+
Display frames in a cv2 window with rolling FPS. Sets `data["stop"] = True` on `q`.
|
|
1413
|
+
|
|
1414
|
+
```python
|
|
1415
|
+
from ai_vision_tool.visualization import FrameViewer
|
|
1416
|
+
|
|
1417
|
+
viewer = FrameViewer(window_name="Preview", fps_window=30)
|
|
1418
|
+
viewer.setup({})
|
|
1419
|
+
|
|
1420
|
+
for payload in FrameStream("video.mp4"):
|
|
1421
|
+
result = viewer.run(payload)
|
|
1422
|
+
if result.get("stop"):
|
|
1423
|
+
break
|
|
1424
|
+
viewer.cleanup()
|
|
1425
|
+
```
|
|
1426
|
+
|
|
1427
|
+
### BBoxRenderer
|
|
1428
|
+
|
|
1429
|
+
Render bounding boxes with consistent per-class colors, optional semi-transparent fill,
|
|
1430
|
+
and label/confidence/track-id text.
|
|
1431
|
+
|
|
1432
|
+
```python
|
|
1433
|
+
from ai_vision_tool.visualization import BBoxRenderer
|
|
1434
|
+
|
|
1435
|
+
renderer = BBoxRenderer(
|
|
1436
|
+
thickness=2,
|
|
1437
|
+
font_scale=0.5,
|
|
1438
|
+
show_conf=True,
|
|
1439
|
+
show_label=True,
|
|
1440
|
+
show_track_id=True,
|
|
1441
|
+
alpha=0.25, # semi-transparent fill; 0 = no fill
|
|
1442
|
+
)
|
|
1443
|
+
result = renderer.run({
|
|
1444
|
+
"frame": image,
|
|
1445
|
+
"bboxes": [{"x1": 10, "y1": 20, "x2": 200, "y2": 150, "label": "person", "conf": 0.87}],
|
|
1446
|
+
})
|
|
1447
|
+
output = result["rendered_frame"]
|
|
1448
|
+
```
|
|
1449
|
+
|
|
1450
|
+
### HeatmapRenderer
|
|
1451
|
+
|
|
1452
|
+
Accumulate and overlay spatial heatmaps from detections, anomaly maps, attention, or
|
|
1453
|
+
optical flow.
|
|
1454
|
+
|
|
1455
|
+
```python
|
|
1456
|
+
from ai_vision_tool.visualization import HeatmapRenderer
|
|
1457
|
+
import cv2
|
|
1458
|
+
|
|
1459
|
+
renderer = HeatmapRenderer(
|
|
1460
|
+
source="detections", # "detections" | "anomaly_map" | "attention" | "motion"
|
|
1461
|
+
colormap=cv2.COLORMAP_JET,
|
|
1462
|
+
alpha=0.5,
|
|
1463
|
+
accumulate=True, # keep cumulative density
|
|
1464
|
+
decay=0.95,
|
|
1465
|
+
)
|
|
1466
|
+
result = renderer.run({"frame": image, "bboxes": [...]})
|
|
1467
|
+
print(result["heatmap"]) # raw density float array
|
|
1468
|
+
print(result["heatmap_overlay"]) # blended on original frame
|
|
1469
|
+
```
|
|
1470
|
+
|
|
1471
|
+
### DashboardSink
|
|
1472
|
+
|
|
1473
|
+
Serve a live stream dashboard. Uses Gradio if installed; falls back to MJPEG HTTP.
|
|
1474
|
+
|
|
1475
|
+
```python
|
|
1476
|
+
from ai_vision_tool.visualization import DashboardSink
|
|
1477
|
+
|
|
1478
|
+
sink = DashboardSink(host="0.0.0.0", port=7860, quality=80, title="Vision Dashboard")
|
|
1479
|
+
sink.setup({})
|
|
1480
|
+
# Opens http://0.0.0.0:7860/ — update by pushing frames in your loop
|
|
1481
|
+
sink.run({"frame": frame})
|
|
1482
|
+
```
|
|
1483
|
+
|
|
1484
|
+
### VideoAnnotationExporter
|
|
1485
|
+
|
|
1486
|
+
Write an annotated output video with optional JSON sidecar containing per-frame bbox data.
|
|
1487
|
+
|
|
1488
|
+
```python
|
|
1489
|
+
from ai_vision_tool.visualization import VideoAnnotationExporter
|
|
1490
|
+
|
|
1491
|
+
exporter = VideoAnnotationExporter(
|
|
1492
|
+
output_path="output/annotated.mp4",
|
|
1493
|
+
fps=30.0,
|
|
1494
|
+
codec="mp4v",
|
|
1495
|
+
burn_annotations=True, # render bboxes/tracks onto frames
|
|
1496
|
+
export_json=True, # write annotated.mp4 + annotated_annotations.json
|
|
1497
|
+
)
|
|
1498
|
+
exporter.setup({})
|
|
1499
|
+
|
|
1500
|
+
for payload in FrameStream("video.mp4"):
|
|
1501
|
+
# payload["bboxes"] or payload["tracks"] added by upstream detector/tracker
|
|
1502
|
+
exporter.run(payload)
|
|
1503
|
+
|
|
1504
|
+
exporter.cleanup() # flushes video + JSON
|
|
1505
|
+
```
|
|
1506
|
+
|
|
1507
|
+
---
|
|
1508
|
+
|
|
1509
|
+
## Capture Components
|
|
1510
|
+
|
|
1511
|
+
Stateful capture and annotation helpers. Import from their domain modules.
|
|
1512
|
+
|
|
1513
|
+
```python
|
|
1514
|
+
import cv2
|
|
1515
|
+
image = cv2.imread("images/github/sample.jpg")
|
|
1516
|
+
```
|
|
1517
|
+
|
|
1518
|
+
### Frame Processors
|
|
1519
|
+
|
|
1520
|
+
**`FrameEnhancer`** — Brightness, contrast, sharpening, denoising in a single pass.
|
|
1521
|
+
|
|
1522
|
+
```python
|
|
1523
|
+
from ai_vision_tool.enhancement import FrameEnhancer
|
|
1524
|
+
|
|
1525
|
+
result = FrameEnhancer().run(
|
|
1526
|
+
{"frame": image},
|
|
1527
|
+
{"brightness": 10, "contrast": 1.15, "sharpen": True, "denoise": False},
|
|
1528
|
+
)
|
|
1529
|
+
```
|
|
1530
|
+
|
|
1531
|
+
**`MotionDetector`** — Detect motion regions using background subtraction.
|
|
1532
|
+
|
|
1533
|
+
```python
|
|
1534
|
+
from ai_vision_tool.capture import MotionDetector
|
|
1535
|
+
|
|
1536
|
+
result = MotionDetector().run({"frame": image}, {"min_area": 800, "draw_motion": True})
|
|
1537
|
+
print(result["motion_boxes"])
|
|
1538
|
+
```
|
|
1539
|
+
|
|
1540
|
+
**`FrameAnnotator`** — Render payload-driven annotations (text, boxes, lines).
|
|
1541
|
+
|
|
1542
|
+
```python
|
|
1543
|
+
from ai_vision_tool.visualization import FrameAnnotator
|
|
1544
|
+
|
|
1545
|
+
result = FrameAnnotator().run(
|
|
1546
|
+
{"frame": image, "annotations": [{"type": "text", "text": "Demo", "pos": (20, 30)}]},
|
|
1547
|
+
{},
|
|
1548
|
+
)
|
|
1549
|
+
```
|
|
1550
|
+
|
|
1551
|
+
### Capture Helpers
|
|
1552
|
+
|
|
1553
|
+
```python
|
|
1554
|
+
from ai_vision_tool.capture import PictureTaker, BurstPictureTaker, VideoTaker, FrameGrabber
|
|
1555
|
+
|
|
1556
|
+
PictureTaker().run(None, {"imgdir": "output/stills", "camera_id": 0})
|
|
1557
|
+
BurstPictureTaker(burst_count=5, interval_seconds=0.2)
|
|
1558
|
+
VideoTaker().run(None, {"viddir": "output/videos", "fps": 30.0})
|
|
1559
|
+
FrameGrabber().run("video.mp4", {"output_folder": "output/frames", "skip_frames": 90})
|
|
1560
|
+
```
|
|
1561
|
+
|
|
1562
|
+
### Dataset and Export
|
|
1563
|
+
|
|
1564
|
+
```python
|
|
1565
|
+
from ai_vision_tool.io import DatasetCollector, ImageExporter
|
|
1566
|
+
from ai_vision_tool.capture import TimeLapseCapture
|
|
1567
|
+
|
|
1568
|
+
DatasetCollector().run(
|
|
1569
|
+
{"frame": image},
|
|
1570
|
+
{"save_sample": True, "output_dir": "output/dataset", "label": "forklift"},
|
|
1571
|
+
)
|
|
1572
|
+
TimeLapseCapture(output_dir="output/timelapse", interval_seconds=5).run({"frame": image}, {})
|
|
1573
|
+
ImageExporter(output_dir="output/exports").run({"frame": image}, {"export_gray": True})
|
|
1574
|
+
```
|
|
1575
|
+
|
|
1576
|
+
### Auto-Labeling
|
|
1577
|
+
|
|
1578
|
+
```python
|
|
1579
|
+
from ai_vision_tool.integrations.labeling import DarknetAutoLabeler, TensorFlowAutoLabeler
|
|
1580
|
+
|
|
1581
|
+
DarknetAutoLabeler().run({"frame": image}, {"output_dir": "output/labels"})
|
|
1582
|
+
TensorFlowAutoLabeler().run({"frame": image}, {"output_dir": "output/labels"})
|
|
1583
|
+
```
|
|
1584
|
+
|
|
1585
|
+
---
|
|
1586
|
+
|
|
1587
|
+
## Utilities
|
|
1588
|
+
|
|
1589
|
+
Utility classes provide shared infrastructure used across components.
|
|
1590
|
+
|
|
1591
|
+
### ColorPalette
|
|
1592
|
+
|
|
1593
|
+
Golden-ratio hue HSV→BGR palette for consistent per-class coloring.
|
|
1594
|
+
|
|
1595
|
+
```python
|
|
1596
|
+
from ai_vision_tool.utils import ColorPalette
|
|
1597
|
+
|
|
1598
|
+
palette = ColorPalette(n_colors=80, seed=42)
|
|
1599
|
+
color = palette.get("person") # (B, G, R) tuple, stable per label string
|
|
1600
|
+
color = palette[0] # by integer class index
|
|
1601
|
+
print(palette.as_dict()) # {label: (B, G, R), ...}
|
|
1602
|
+
```
|
|
1603
|
+
|
|
1604
|
+
### MetricsLogger / MetricsLoggerComponent
|
|
1605
|
+
|
|
1606
|
+
Thread-safe rolling metrics logger.
|
|
1607
|
+
|
|
1608
|
+
```python
|
|
1609
|
+
from ai_vision_tool.utils import MetricsLogger, MetricsLoggerComponent
|
|
1610
|
+
|
|
1611
|
+
# Standalone
|
|
1612
|
+
logger = MetricsLogger(window=30)
|
|
1613
|
+
logger.tick()
|
|
1614
|
+
logger.log_latency(12.5) # ms
|
|
1615
|
+
print(logger.fps())
|
|
1616
|
+
print(logger.report())
|
|
1617
|
+
|
|
1618
|
+
# As a pipeline component — attaches data["metrics"] to payload
|
|
1619
|
+
component = MetricsLoggerComponent(window=30)
|
|
1620
|
+
result = component.run({"frame": image})
|
|
1621
|
+
print(result["metrics"]) # {"fps": ..., "mean_latency_ms": ..., "frame_count": ...}
|
|
1622
|
+
```
|
|
1623
|
+
|
|
1624
|
+
### FrameSampler
|
|
1625
|
+
|
|
1626
|
+
Throttle pipeline throughput by skipping frames.
|
|
1627
|
+
|
|
1628
|
+
```python
|
|
1629
|
+
from ai_vision_tool.utils import FrameSampler
|
|
1630
|
+
|
|
1631
|
+
sampler = FrameSampler(
|
|
1632
|
+
every_n=3, # mode="count": process every 3rd frame
|
|
1633
|
+
mode="count", # "count" | "fps" | "random"
|
|
1634
|
+
target_fps=10.0, # mode="fps": target output rate
|
|
1635
|
+
prob=0.5, # mode="random": pass-through probability
|
|
1636
|
+
)
|
|
1637
|
+
result = sampler.run({"frame": image})
|
|
1638
|
+
print(result.get("skip")) # True → downstream should skip this frame
|
|
1639
|
+
```
|
|
1640
|
+
|
|
1641
|
+
### ImageHash
|
|
1642
|
+
|
|
1643
|
+
Perceptual hashing for duplicate detection.
|
|
1644
|
+
|
|
1645
|
+
```python
|
|
1646
|
+
from ai_vision_tool.utils import ImageHash
|
|
1647
|
+
|
|
1648
|
+
hasher = ImageHash(
|
|
1649
|
+
method="phash", # "phash" | "ahash" | "dhash"
|
|
1650
|
+
hash_size=8,
|
|
1651
|
+
threshold=10, # Hamming distance threshold
|
|
1652
|
+
)
|
|
1653
|
+
result = hasher.run({"frame": image})
|
|
1654
|
+
print(result["hash"]) # hex string
|
|
1655
|
+
print(result["hash_distance"]) # distance to reference (if reference set)
|
|
1656
|
+
print(result["is_duplicate"]) # bool
|
|
1657
|
+
```
|
|
1658
|
+
|
|
1659
|
+
### DrawUtils
|
|
1660
|
+
|
|
1661
|
+
Render bboxes, masks, and keypoints from payload data.
|
|
1662
|
+
|
|
1663
|
+
```python
|
|
1664
|
+
from ai_vision_tool.utils import DrawUtils
|
|
1665
|
+
|
|
1666
|
+
drawer = DrawUtils(font_scale=0.5, thickness=1, alpha=0.4)
|
|
1667
|
+
result = drawer.run({
|
|
1668
|
+
"frame": image,
|
|
1669
|
+
"bboxes": [{"x1": 10, "y1": 10, "x2": 200, "y2": 150, "label": "car", "conf": 0.92}],
|
|
1670
|
+
"masks": [binary_mask],
|
|
1671
|
+
"poses": [{"keypoints": [...]}],
|
|
1672
|
+
})
|
|
1673
|
+
output = result["frame"]
|
|
1674
|
+
```
|
|
1675
|
+
|
|
1676
|
+
---
|
|
1677
|
+
|
|
1678
|
+
## Core
|
|
1679
|
+
|
|
1680
|
+
Core utilities provide device management, typed data structures, batch processing, and
|
|
1681
|
+
rate limiting.
|
|
1682
|
+
|
|
1683
|
+
### Device
|
|
1684
|
+
|
|
1685
|
+
Auto-select CUDA, MPS (Apple Silicon), or CPU.
|
|
1686
|
+
|
|
1687
|
+
```python
|
|
1688
|
+
from ai_vision_tool.core import Device
|
|
1689
|
+
|
|
1690
|
+
dev = Device("auto") # "auto" | "cuda" | "mps" | "cpu"
|
|
1691
|
+
print(dev.name) # "cuda:0" / "mps" / "cpu"
|
|
1692
|
+
tensor = dev.to_torch(numpy_array)
|
|
1693
|
+
backend = dev.to_cv_backend() # cv2 DNN target constant
|
|
1694
|
+
|
|
1695
|
+
# Singleton — shares device across the process
|
|
1696
|
+
default_dev = Device.default()
|
|
1697
|
+
```
|
|
1698
|
+
|
|
1699
|
+
### Data Types
|
|
1700
|
+
|
|
1701
|
+
Typed dataclasses for detections, poses, masks, and tracks.
|
|
1702
|
+
|
|
1703
|
+
```python
|
|
1704
|
+
from ai_vision_tool.core import BBox, Detection, Keypoint, Pose, Mask, Track
|
|
1705
|
+
|
|
1706
|
+
bbox = BBox(x1=10, y1=20, x2=100, y2=80, label="car", conf=0.9)
|
|
1707
|
+
print(bbox.iou(BBox(x1=15, y1=25, x2=110, y2=85)))
|
|
1708
|
+
print(bbox.to_xywh())
|
|
1709
|
+
print(bbox.clip(width=640, height=480).as_dict())
|
|
1710
|
+
|
|
1711
|
+
mask = Mask(data=binary_array, label="person")
|
|
1712
|
+
polygon = mask.to_polygon() # contour points
|
|
1713
|
+
|
|
1714
|
+
track = Track(track_id=7, bbox=bbox, state="active", age=12)
|
|
1715
|
+
```
|
|
1716
|
+
|
|
1717
|
+
### BatchProcessor
|
|
1718
|
+
|
|
1719
|
+
Process image directories or lists in parallel.
|
|
1720
|
+
|
|
1721
|
+
```python
|
|
1722
|
+
from ai_vision_tool.core import BatchProcessor
|
|
1723
|
+
from ai_vision_tool.pipelines import AIVisionPipeline
|
|
1724
|
+
from ai_vision_tool.preprocessing import Resize
|
|
1725
|
+
|
|
1726
|
+
pipeline = AIVisionPipeline().add(Resize(width=640, height=640))
|
|
1727
|
+
|
|
1728
|
+
processor = BatchProcessor(pipeline, batch_size=8, num_workers=4)
|
|
1729
|
+
results = processor.process([image_a, image_b, image_c])
|
|
1730
|
+
results = processor.process_directory("data/images/", extensions=(".jpg", ".png"))
|
|
1731
|
+
```
|
|
1732
|
+
|
|
1733
|
+
### Scheduler / RateLimiter
|
|
1734
|
+
|
|
1735
|
+
Token-bucket rate limiting. `Scheduler` is a pipeline component that skips or blocks
|
|
1736
|
+
frames to enforce a target FPS. `RateLimiter` is a standalone utility.
|
|
1737
|
+
|
|
1738
|
+
```python
|
|
1739
|
+
from ai_vision_tool.core import Scheduler, RateLimiter
|
|
1740
|
+
|
|
1741
|
+
scheduler = Scheduler(target_fps=10.0, drop_policy="skip") # "skip" | "block"
|
|
1742
|
+
result = scheduler.run({"frame": image})
|
|
1743
|
+
if result.get("skip"):
|
|
1744
|
+
continue
|
|
1745
|
+
|
|
1746
|
+
limiter = RateLimiter(calls_per_second=5.0)
|
|
1747
|
+
limiter.acquire() # blocks until token available
|
|
1748
|
+
```
|
|
1749
|
+
|
|
1750
|
+
### MemoryManager / GPUMemoryTracker
|
|
1751
|
+
|
|
1752
|
+
Pre-allocated buffer pool for zero-copy frame passing.
|
|
1753
|
+
|
|
1754
|
+
```python
|
|
1755
|
+
from ai_vision_tool.core import MemoryManager, GPUMemoryTracker
|
|
1756
|
+
|
|
1757
|
+
pool = MemoryManager(pool_size=10, shape=(720, 1280, 3))
|
|
1758
|
+
buf = pool.acquire() # numpy array from pool
|
|
1759
|
+
# ... fill buf ...
|
|
1760
|
+
pool.release(buf)
|
|
1761
|
+
|
|
1762
|
+
with pool.context() as buf: # auto-release on exit
|
|
1763
|
+
buf[:] = frame
|
|
1764
|
+
|
|
1765
|
+
tracker = GPUMemoryTracker()
|
|
1766
|
+
tracker.snapshot()
|
|
1767
|
+
print(tracker.delta_mb())
|
|
1768
|
+
```
|
|
1769
|
+
|
|
1770
|
+
---
|
|
1771
|
+
|
|
1772
|
+
## Configuration
|
|
1773
|
+
|
|
1774
|
+
Configuration utilities manage YAML/JSON configs, component discovery, and environment
|
|
1775
|
+
variable injection.
|
|
1776
|
+
|
|
1777
|
+
### YAMLConfig
|
|
1778
|
+
|
|
1779
|
+
```python
|
|
1780
|
+
from ai_vision_tool.config import YAMLConfig
|
|
1781
|
+
|
|
1782
|
+
cfg = YAMLConfig("config/pipeline.yaml")
|
|
1783
|
+
fps = cfg.get("stream.fps", default=30)
|
|
1784
|
+
cfg.merge({"stream": {"fps": 25}})
|
|
1785
|
+
cfg.validate(schema={"stream": {"fps": int}})
|
|
1786
|
+
cfg.reload() # re-read file on disk
|
|
1787
|
+
```
|
|
1788
|
+
|
|
1789
|
+
### JSONConfig
|
|
1790
|
+
|
|
1791
|
+
```python
|
|
1792
|
+
from ai_vision_tool.config import JSONConfig
|
|
1793
|
+
|
|
1794
|
+
cfg = JSONConfig("config/settings.json")
|
|
1795
|
+
cfg.set("model.threshold", 0.3)
|
|
1796
|
+
cfg.save()
|
|
1797
|
+
|
|
1798
|
+
cfg2 = JSONConfig.from_dict({"model": {"threshold": 0.5}})
|
|
1799
|
+
```
|
|
1800
|
+
|
|
1801
|
+
### ComponentRegistry
|
|
1802
|
+
|
|
1803
|
+
Singleton registry. Supports decorator-style registration and config-driven `build()`.
|
|
1804
|
+
|
|
1805
|
+
```python
|
|
1806
|
+
from ai_vision_tool.config import ComponentRegistry
|
|
1807
|
+
|
|
1808
|
+
registry = ComponentRegistry()
|
|
1809
|
+
|
|
1810
|
+
@registry.register("MyPreprocessor")
|
|
1811
|
+
class MyPreprocessor:
|
|
1812
|
+
...
|
|
1813
|
+
|
|
1814
|
+
# Build by name (auto-registers all ai_vision_tool exports)
|
|
1815
|
+
component = registry.build("Resize", width=640, height=640)
|
|
1816
|
+
|
|
1817
|
+
# Build a pipeline from a list of dicts
|
|
1818
|
+
pipeline = registry.build_from_config([
|
|
1819
|
+
{"name": "Resize", "params": {"width": 640, "height": 640}},
|
|
1820
|
+
{"name": "Flip", "params": {"horizontal": True}},
|
|
1821
|
+
])
|
|
1822
|
+
```
|
|
1823
|
+
|
|
1824
|
+
### ProfileLoader
|
|
1825
|
+
|
|
1826
|
+
Load named profiles from YAML/JSON files in search paths.
|
|
1827
|
+
|
|
1828
|
+
```python
|
|
1829
|
+
from ai_vision_tool.config import ProfileLoader
|
|
1830
|
+
|
|
1831
|
+
loader = ProfileLoader(search_paths=["profiles/", "~/.ai_vision/"])
|
|
1832
|
+
profile = loader.load("augmentation_heavy") # loads augmentation_heavy.yaml
|
|
1833
|
+
pipeline = loader.load_pipeline("detection_rtsp") # builds AIVisionPipeline
|
|
1834
|
+
loader.save_profile({"name": "custom"}, "profiles/custom.yaml")
|
|
1835
|
+
```
|
|
1836
|
+
|
|
1837
|
+
### EnvConfig
|
|
1838
|
+
|
|
1839
|
+
Read configuration from environment variables with type casting.
|
|
1840
|
+
|
|
1841
|
+
```python
|
|
1842
|
+
from ai_vision_tool.config import EnvConfig
|
|
1843
|
+
import os
|
|
1844
|
+
|
|
1845
|
+
os.environ["AI_VISION_DEVICE"] = "cuda"
|
|
1846
|
+
os.environ["AI_VISION_API_PORT"] = "8080"
|
|
1847
|
+
|
|
1848
|
+
env = EnvConfig(prefix="AI_VISION")
|
|
1849
|
+
device = env.get("DEVICE", default="cpu") # → "cuda"
|
|
1850
|
+
port = env.get("API_PORT", cast=int, default=8300) # → 8080
|
|
1851
|
+
env.require("MODEL_PATH") # raises if missing
|
|
1852
|
+
|
|
1853
|
+
print(env.device) # shorthand property
|
|
1854
|
+
print(env.api_port)
|
|
1855
|
+
```
|
|
1856
|
+
|
|
1857
|
+
---
|
|
1858
|
+
|
|
1859
|
+
## Models
|
|
1860
|
+
|
|
1861
|
+
Model runners, registry, downloader, and benchmarking utilities.
|
|
1862
|
+
|
|
1863
|
+
### ModelRegistry
|
|
1864
|
+
|
|
1865
|
+
JSON-cached model registry stored at `~/.cache/ai_vision_tool/model_registry.json`.
|
|
1866
|
+
|
|
1867
|
+
```python
|
|
1868
|
+
from ai_vision_tool.models import ModelRegistry
|
|
1869
|
+
|
|
1870
|
+
registry = ModelRegistry()
|
|
1871
|
+
registry.register("yolov8n", path="/models/yolov8n.pt", format="torch", tags=["detection"])
|
|
1872
|
+
component = registry.load("yolov8n") # returns TorchModel / ONNXModel / TFLiteModel
|
|
1873
|
+
component.setup({})
|
|
1874
|
+
|
|
1875
|
+
component2 = registry.from_huggingface("Salesforce/blip-image-captioning-base")
|
|
1876
|
+
```
|
|
1877
|
+
|
|
1878
|
+
### ONNXModel
|
|
1879
|
+
|
|
1880
|
+
Run any ONNX model as a pipeline component.
|
|
1881
|
+
|
|
1882
|
+
```python
|
|
1883
|
+
from ai_vision_tool.models import ONNXModel
|
|
1884
|
+
|
|
1885
|
+
model = ONNXModel(
|
|
1886
|
+
model_path="model.onnx",
|
|
1887
|
+
input_name=None, # auto-detected
|
|
1888
|
+
input_size=(640, 640),
|
|
1889
|
+
providers=None, # ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
|
1890
|
+
)
|
|
1891
|
+
result = model.run({"frame": image})
|
|
1892
|
+
print(result["model_output"]) # raw ONNX output arrays
|
|
1893
|
+
print(result["model_name"])
|
|
1894
|
+
```
|
|
1895
|
+
|
|
1896
|
+
### TorchModel
|
|
1897
|
+
|
|
1898
|
+
Run a TorchScript model as a pipeline component.
|
|
1899
|
+
|
|
1900
|
+
```python
|
|
1901
|
+
from ai_vision_tool.models import TorchModel
|
|
1902
|
+
|
|
1903
|
+
model = TorchModel(
|
|
1904
|
+
model_path="model.torchscript",
|
|
1905
|
+
device="auto",
|
|
1906
|
+
half_precision=False,
|
|
1907
|
+
)
|
|
1908
|
+
result = model.run({"frame": image})
|
|
1909
|
+
print(result["model_output"])
|
|
1910
|
+
```
|
|
1911
|
+
|
|
1912
|
+
### TFLiteModel
|
|
1913
|
+
|
|
1914
|
+
Run a TFLite model (tflite-runtime or tensorflow fallback).
|
|
1915
|
+
|
|
1916
|
+
```python
|
|
1917
|
+
from ai_vision_tool.models import TFLiteModel
|
|
1918
|
+
|
|
1919
|
+
model = TFLiteModel(model_path="model.tflite", num_threads=4)
|
|
1920
|
+
result = model.run({"frame": image})
|
|
1921
|
+
print(result["model_output"])
|
|
1922
|
+
print(result["inference_time_ms"])
|
|
1923
|
+
```
|
|
1924
|
+
|
|
1925
|
+
### ModelDownloader
|
|
1926
|
+
|
|
1927
|
+
Download models with progress callback and SHA256 verification.
|
|
1928
|
+
|
|
1929
|
+
```python
|
|
1930
|
+
from ai_vision_tool.models import ModelDownloader
|
|
1931
|
+
|
|
1932
|
+
downloader = ModelDownloader(cache_dir="~/.cache/ai_vision_tool/models")
|
|
1933
|
+
path = downloader.download(
|
|
1934
|
+
url="https://example.com/model.onnx",
|
|
1935
|
+
sha256="abc123...",
|
|
1936
|
+
filename="model.onnx",
|
|
1937
|
+
progress=True,
|
|
1938
|
+
)
|
|
1939
|
+
hf_path = downloader.from_huggingface(
|
|
1940
|
+
repo_id="microsoft/resnet-50",
|
|
1941
|
+
filename="pytorch_model.bin",
|
|
1942
|
+
)
|
|
1943
|
+
```
|
|
1944
|
+
|
|
1945
|
+
### ModelBenchmark
|
|
1946
|
+
|
|
1947
|
+
Latency and memory profiling with p50/p95/p99 percentiles.
|
|
1948
|
+
|
|
1949
|
+
```python
|
|
1950
|
+
from ai_vision_tool.models import ModelBenchmark, ONNXModel
|
|
1951
|
+
|
|
1952
|
+
model = ONNXModel(model_path="model.onnx")
|
|
1953
|
+
bench = ModelBenchmark(model, warmup_runs=5, benchmark_runs=100)
|
|
1954
|
+
|
|
1955
|
+
latency_report = bench.run({"frame": image})
|
|
1956
|
+
# {"p50_ms": ..., "p95_ms": ..., "p99_ms": ..., "mean_ms": ..., "fps": ...}
|
|
1957
|
+
|
|
1958
|
+
memory_report = bench.run_memory({"frame": image})
|
|
1959
|
+
# {"peak_mb": ..., "current_mb": ...}
|
|
1960
|
+
|
|
1961
|
+
bench.print_report() # ASCII table to stdout
|
|
1962
|
+
```
|
|
1963
|
+
|
|
1964
|
+
---
|
|
1965
|
+
|
|
1966
|
+
## Prebuilt Pipelines
|
|
1967
|
+
|
|
1968
|
+
`PrebuiltPipelines` provides factory classmethods that instantiate common pipeline
|
|
1969
|
+
configurations. All return an `AIVisionPipeline` ready for `.execute()`.
|
|
1970
|
+
|
|
1971
|
+
```python
|
|
1972
|
+
from ai_vision_tool.pipelines import PrebuiltPipelines
|
|
1973
|
+
import cv2
|
|
1974
|
+
|
|
1975
|
+
image = cv2.imread("images/github/sample.jpg")
|
|
1976
|
+
```
|
|
1977
|
+
|
|
1978
|
+
### Detection Pipeline
|
|
1979
|
+
|
|
1980
|
+
```python
|
|
1981
|
+
pipeline = PrebuiltPipelines.detection_pipeline(
|
|
1982
|
+
model_path="yolov8n.pt",
|
|
1983
|
+
conf_threshold=0.25,
|
|
1984
|
+
render=True,
|
|
1985
|
+
)
|
|
1986
|
+
result = pipeline.execute(initial_data={"frame": image}, global_config={})
|
|
1987
|
+
print(result["bboxes"])
|
|
1988
|
+
print(result["rendered_frame"])
|
|
1989
|
+
```
|
|
1990
|
+
|
|
1991
|
+
### Augmentation Pipeline
|
|
1992
|
+
|
|
1993
|
+
Loads from an augmentation JSON profile.
|
|
1994
|
+
|
|
1995
|
+
```python
|
|
1996
|
+
pipeline = PrebuiltPipelines.augmentation_pipeline(profile="examples/augmentation_profile.json")
|
|
1997
|
+
result = pipeline.execute(initial_data={"frame": image}, global_config={})
|
|
1998
|
+
```
|
|
1999
|
+
|
|
2000
|
+
### Preprocessing Pipeline
|
|
2001
|
+
|
|
2002
|
+
Standard resize + normalize + quality check chain.
|
|
2003
|
+
|
|
2004
|
+
```python
|
|
2005
|
+
pipeline = PrebuiltPipelines.preprocessing_pipeline(width=640, height=640)
|
|
2006
|
+
result = pipeline.execute(initial_data={"frame": image}, global_config={})
|
|
2007
|
+
```
|
|
2008
|
+
|
|
2009
|
+
### Tracking Pipeline
|
|
2010
|
+
|
|
2011
|
+
Detection + ByteTracker + BBoxRenderer.
|
|
2012
|
+
|
|
2013
|
+
```python
|
|
2014
|
+
pipeline = PrebuiltPipelines.tracking_pipeline(
|
|
2015
|
+
model_path="yolov8n.pt",
|
|
2016
|
+
conf_threshold=0.25,
|
|
2017
|
+
)
|
|
2018
|
+
result = pipeline.execute(initial_data={"frame": image}, global_config={})
|
|
2019
|
+
print(result["tracks"])
|
|
2020
|
+
```
|
|
2021
|
+
|
|
2022
|
+
### Enhancement Pipeline
|
|
2023
|
+
|
|
2024
|
+
Low-light enhancement + super-resolution.
|
|
2025
|
+
|
|
2026
|
+
```python
|
|
2027
|
+
pipeline = PrebuiltPipelines.enhancement_pipeline(enhance_method="clahe", sr_scale=2)
|
|
2028
|
+
result = pipeline.execute(initial_data={"frame": image}, global_config={})
|
|
2029
|
+
```
|
|
2030
|
+
|
|
2031
|
+
### PipelineSerializer
|
|
2032
|
+
|
|
2033
|
+
Save and reload a pipeline configuration to/from YAML or JSON.
|
|
2034
|
+
|
|
2035
|
+
```python
|
|
2036
|
+
from ai_vision_tool.pipelines import PipelineSerializer
|
|
2037
|
+
from ai_vision_tool.pipelines import AIVisionPipeline
|
|
2038
|
+
from ai_vision_tool.preprocessing import Resize
|
|
2039
|
+
from ai_vision_tool.augmentation import Flip
|
|
2040
|
+
|
|
2041
|
+
pipeline = AIVisionPipeline().add(Resize(width=640, height=640)).add(Flip(horizontal=True))
|
|
2042
|
+
|
|
2043
|
+
serializer = PipelineSerializer()
|
|
2044
|
+
config_dict = serializer.to_dict(pipeline)
|
|
2045
|
+
serializer.save(pipeline, "pipeline.yaml")
|
|
2046
|
+
|
|
2047
|
+
pipeline2 = serializer.load("pipeline.yaml")
|
|
2048
|
+
result = pipeline2.execute(initial_data={"frame": image}, global_config={})
|
|
2049
|
+
```
|
|
2050
|
+
|
|
2051
|
+
### AsyncPipeline
|
|
2052
|
+
|
|
2053
|
+
Execute pipeline steps concurrently using `asyncio` + `run_in_executor`.
|
|
2054
|
+
|
|
2055
|
+
```python
|
|
2056
|
+
import asyncio
|
|
2057
|
+
from ai_vision_tool.pipelines import AsyncPipeline
|
|
2058
|
+
from ai_vision_tool.preprocessing import Resize
|
|
2059
|
+
from ai_vision_tool.augmentation import Flip
|
|
2060
|
+
|
|
2061
|
+
async def main():
|
|
2062
|
+
apipe = AsyncPipeline(
|
|
2063
|
+
components=[Resize(width=640, height=640), Flip(horizontal=True)],
|
|
2064
|
+
global_config={},
|
|
2065
|
+
)
|
|
2066
|
+
result = await apipe.execute({"frame": image})
|
|
2067
|
+
|
|
2068
|
+
# Process multiple frames concurrently
|
|
2069
|
+
results = await apipe.execute_batch([{"frame": image}] * 8)
|
|
2070
|
+
|
|
2071
|
+
# Async generator for streaming
|
|
2072
|
+
async for result in apipe.stream([{"frame": image}] * 100):
|
|
2073
|
+
print(result["frame"].shape)
|
|
2074
|
+
|
|
2075
|
+
asyncio.run(main())
|
|
2076
|
+
```
|
|
2077
|
+
|
|
2078
|
+
### ParallelPipeline / FanOutPipeline
|
|
2079
|
+
|
|
2080
|
+
Branch into independent sub-pipelines and merge results.
|
|
2081
|
+
|
|
2082
|
+
```python
|
|
2083
|
+
from ai_vision_tool.pipelines import ParallelPipeline, FanOutPipeline
|
|
2084
|
+
from ai_vision_tool.pipelines.parallel_pipeline import merge_bboxes
|
|
2085
|
+
from ai_vision_tool.detection import ObjectDetector, FaceDetector
|
|
2086
|
+
|
|
2087
|
+
# Two independent detector branches merged
|
|
2088
|
+
parallel = ParallelPipeline(
|
|
2089
|
+
branches=[
|
|
2090
|
+
[ObjectDetector(model_path="yolov8n.pt")],
|
|
2091
|
+
[FaceDetector(backend="opencv")],
|
|
2092
|
+
],
|
|
2093
|
+
merge_fn=merge_bboxes, # or "first" | "vote" | custom callable
|
|
2094
|
+
)
|
|
2095
|
+
result = parallel.execute({"frame": image})
|
|
2096
|
+
|
|
2097
|
+
# Shared preprocessing → parallel branches
|
|
2098
|
+
from ai_vision_tool.preprocessing import Resize
|
|
2099
|
+
|
|
2100
|
+
fanout = FanOutPipeline(
|
|
2101
|
+
shared=[Resize(width=640, height=640)],
|
|
2102
|
+
branches=[
|
|
2103
|
+
[ObjectDetector(model_path="yolov8n.pt")],
|
|
2104
|
+
[FaceDetector()],
|
|
2105
|
+
],
|
|
2106
|
+
)
|
|
2107
|
+
result = fanout.execute({"frame": image})
|
|
2108
|
+
```
|
|
2109
|
+
|
|
2110
|
+
---
|
|
2111
|
+
|
|
2112
|
+
## Capture Templates
|
|
2113
|
+
|
|
2114
|
+
Capture templates are standalone helper functions for quick image display or live video
|
|
2115
|
+
loops without building a full pipeline.
|
|
2116
|
+
|
|
2117
|
+
**`image_template`** — Display a still image with optional custom frame logic.
|
|
2118
|
+
|
|
2119
|
+
```python
|
|
2120
|
+
from ai_vision_tool.capture.image_template import image_template
|
|
2121
|
+
|
|
2122
|
+
image_template(
|
|
2123
|
+
image_path="images/github/sample.jpg",
|
|
2124
|
+
custom_logic=lambda frame: frame,
|
|
2125
|
+
window_name="Preview",
|
|
2126
|
+
resolution=(1280, 720),
|
|
2127
|
+
)
|
|
2128
|
+
```
|
|
2129
|
+
|
|
2130
|
+
**`video_capture_template`** — Run a live webcam loop with custom per-frame logic.
|
|
2131
|
+
|
|
2132
|
+
```python
|
|
2133
|
+
from ai_vision_tool.capture.video_template import video_capture_template
|
|
2134
|
+
|
|
2135
|
+
video_capture_template(
|
|
2136
|
+
video_source=0,
|
|
2137
|
+
custom_logic=lambda frame: frame,
|
|
2138
|
+
window_name="Live",
|
|
2139
|
+
resolution=(1280, 720),
|
|
2140
|
+
enable_recording=False,
|
|
2141
|
+
enable_screenshot=True,
|
|
2142
|
+
)
|
|
2143
|
+
```
|
|
2144
|
+
|
|
2145
|
+
**`save_screenshot`** — Save a frame to disk from within a template loop.
|
|
2146
|
+
|
|
2147
|
+
```python
|
|
2148
|
+
from ai_vision_tool.capture.video_template import save_screenshot
|
|
2149
|
+
|
|
2150
|
+
save_screenshot(frame, output_dir="output/screenshots", prefix="capture")
|
|
2151
|
+
```
|
|
2152
|
+
|
|
2153
|
+
---
|
|
2154
|
+
|
|
2155
|
+
## CLI Reference
|
|
2156
|
+
|
|
2157
|
+
### Process a Local Image File
|
|
2158
|
+
|
|
2159
|
+
```bash
|
|
2160
|
+
ai-vision-tool \
|
|
2161
|
+
--process-image-path \
|
|
2162
|
+
--component-category preprocessing \
|
|
2163
|
+
--component-name AutoOrient \
|
|
2164
|
+
--image-path images/github/sample.jpg \
|
|
2165
|
+
--init-args-json '{"rotation": 90}' \
|
|
2166
|
+
--save-output-image output/oriented.png
|
|
2167
|
+
|
|
2168
|
+
ai-vision-tool \
|
|
2169
|
+
--process-image-path \
|
|
2170
|
+
--component-category augmentation \
|
|
2171
|
+
--component-name Flip \
|
|
2172
|
+
--image-path images/github/sample.jpg \
|
|
2173
|
+
--init-args-json '{"horizontal": true}' \
|
|
2174
|
+
--save-output-image output/flipped.png
|
|
2175
|
+
```
|
|
2176
|
+
|
|
2177
|
+
### Browse Built-In Examples
|
|
2178
|
+
|
|
2179
|
+
```bash
|
|
2180
|
+
ai-vision-tool --show-examples
|
|
2181
|
+
ai-vision-tool --show-examples --example-category preprocessing
|
|
2182
|
+
ai-vision-tool --show-examples --example-name GaussianBlur
|
|
2183
|
+
```
|
|
2184
|
+
|
|
2185
|
+
### Webcam Application
|
|
2186
|
+
|
|
2187
|
+
```bash
|
|
2188
|
+
ai-vision-tool
|
|
2189
|
+
ai-vision-tool --enhance --brightness 12 --contrast 1.15 --sharpen
|
|
2190
|
+
ai-vision-tool --flip-horizontal --rotation-angle 12 --blur --blur-kernel-size 7
|
|
2191
|
+
ai-vision-tool --motion --motion-area 1200 --annotate
|
|
2192
|
+
ai-vision-tool --augmentation-config examples/augmentation_profile.json
|
|
2193
|
+
```
|
|
2194
|
+
|
|
2195
|
+
#### Webcam Hotkeys
|
|
2196
|
+
|
|
2197
|
+
| Key | Action |
|
|
2198
|
+
|-----|--------|
|
|
2199
|
+
| `p` | Capture a single processed frame |
|
|
2200
|
+
| `b` | Capture a burst of frames |
|
|
2201
|
+
| `r` | Start or stop video recording |
|
|
2202
|
+
| `d` | Save a dataset sample |
|
|
2203
|
+
| `e` | Export grayscale and edge images |
|
|
2204
|
+
| `o` | Save the configured ROI crop |
|
|
2205
|
+
| `q` | Quit |
|
|
2206
|
+
|
|
2207
|
+
---
|
|
2208
|
+
|
|
2209
|
+
## Component Index
|
|
2210
|
+
|
|
2211
|
+
### Preprocessing
|
|
2212
|
+
|
|
2213
|
+
| Component | Purpose |
|
|
2214
|
+
|-----------|---------|
|
|
2215
|
+
| `AutoOrient` | EXIF or explicit rotation correction |
|
|
2216
|
+
| `AutoAdjustContrast` | Adaptive, histogram, or stretch contrast |
|
|
2217
|
+
| `Resize` | Exact spatial resize |
|
|
2218
|
+
| `LetterboxResize` | Aspect-preserving resize with padding |
|
|
2219
|
+
| `CenterCrop` | Centre crop for model inputs |
|
|
2220
|
+
| `PadToSquare` | Square canvas padding |
|
|
2221
|
+
| `Normalize` | Normalise pixel range |
|
|
2222
|
+
| `Standardize` | z-score standardisation |
|
|
2223
|
+
| `RescalePixels` | Explicit pixel scale and offset |
|
|
2224
|
+
| `ConvertColorSpace` | Color-space conversion |
|
|
2225
|
+
| `BGRToRGB` / `RGBToBGR` | Channel-order swap |
|
|
2226
|
+
| `CLAHE` | Local contrast enhancement |
|
|
2227
|
+
| `HistogramEqualization` | Global histogram equalisation |
|
|
2228
|
+
| `GammaCorrection` | Gamma-based exposure tuning |
|
|
2229
|
+
| `WhiteBalance` | Colour cast correction |
|
|
2230
|
+
| `Denoise` | Sensor or compression noise reduction |
|
|
2231
|
+
| `Sharpen` | Edge sharpening |
|
|
2232
|
+
| `Deblur` | Unsharp-mask deblur |
|
|
2233
|
+
| `RemoveBackground` | Foreground isolation |
|
|
2234
|
+
| `Threshold` / `AdaptiveThreshold` | Binary thresholding |
|
|
2235
|
+
| `EdgeDetection` | Edge extraction |
|
|
2236
|
+
| `ContourExtraction` | Contour metadata generation |
|
|
2237
|
+
| `PerspectiveCorrection` | Document or planar rectification |
|
|
2238
|
+
| `Deskew` | Skew correction |
|
|
2239
|
+
| `AutoCrop` | Trim empty borders |
|
|
2240
|
+
| `FaceAlign` | Face normalisation from eye landmarks |
|
|
2241
|
+
| `ObjectCrop` | Bounding-box crop extraction |
|
|
2242
|
+
| `BoundingBoxClamp` | Clamp boxes to image bounds |
|
|
2243
|
+
| `BoundingBoxNormalize` | Normalise bounding boxes |
|
|
2244
|
+
| `MaskResize` | Payload mask resizing |
|
|
2245
|
+
| `ImageQualityCheck` | Blur and brightness quality flags |
|
|
2246
|
+
| `BlurDetection` | Blur threshold check |
|
|
2247
|
+
| `BrightnessCheck` | Brightness range check |
|
|
2248
|
+
| `DuplicateImageCheck` | Duplicate detection by hash |
|
|
2249
|
+
| `CorruptImageCheck` | Corrupt or empty frame check |
|
|
2250
|
+
| `AspectRatioFilter` | Aspect-ratio validation |
|
|
2251
|
+
| `MinSizeFilter` / `MaxSizeFilter` | Dimension validation |
|
|
2252
|
+
|
|
2253
|
+
### Augmentation
|
|
2254
|
+
|
|
2255
|
+
| Component | Purpose |
|
|
2256
|
+
|-----------|---------|
|
|
2257
|
+
| `Flip` | Mirror augmentation |
|
|
2258
|
+
| `Rotate90` | 90-degree rotation |
|
|
2259
|
+
| `Crop` | Deterministic crop |
|
|
2260
|
+
| `Rotation` | Arbitrary-angle rotation |
|
|
2261
|
+
| `Shear` | Affine shear |
|
|
2262
|
+
| `Translate` | Spatial translation |
|
|
2263
|
+
| `RandomResize` / `RandomScale` | Random size/scale jitter |
|
|
2264
|
+
| `RandomCrop` / `RandomResizedCrop` | Random crop variants |
|
|
2265
|
+
| `RandomPadding` | Random padding |
|
|
2266
|
+
| `AffineTransform` | Combined affine transform |
|
|
2267
|
+
| `PerspectiveTransform` | Perspective warp |
|
|
2268
|
+
| `ElasticTransform` | Elastic distortion |
|
|
2269
|
+
| `GridDistortion` | Grid warp |
|
|
2270
|
+
| `OpticalDistortion` | Lens distortion |
|
|
2271
|
+
| `Greyscale` / `Hue` / `Saturation` / `Brightness` / `Exposure` | Color/tone adjustments |
|
|
2272
|
+
| `ColorJitter` | Compound color jitter |
|
|
2273
|
+
| `RandomGamma` / `RandomBrightnessContrast` | Randomised tone |
|
|
2274
|
+
| `RandomShadow` / `RandomSunFlare` / `RandomFog` / `RandomRain` / `RandomSnow` | Weather effects |
|
|
2275
|
+
| `ChannelShuffle` / `RGBShift` / `HSVShift` | Channel manipulation |
|
|
2276
|
+
| `ToSepia` / `InvertImage` | Color effects |
|
|
2277
|
+
| `Blur` / `GaussianBlur` / `MedianBlur` / `GlassBlur` / `DefocusBlur` / `ZoomBlur` | Blur types |
|
|
2278
|
+
| `MotionBlur` / `CameraGain` | Camera simulation |
|
|
2279
|
+
| `Emboss` / `Posterize` / `Solarize` / `Equalize` | Texture and tone effects |
|
|
2280
|
+
| `CompressionArtifacts` / `JPEGCompression` / `Downscale` / `Superpixel` | Degradation simulation |
|
|
2281
|
+
| `Noise` / `ISONoise` / `MultiplicativeNoise` / `SaltPepperNoise` | Noise types |
|
|
2282
|
+
| `CoarseDropout` / `GridDropout` / `RandomErasing` / `PixelDropout` / `MaskDropout` | Dropout variants |
|
|
2283
|
+
| `Cutout` / `Mosaic` / `Mosaic9` / `MixUp` / `CutMix` | Composition augmentations |
|
|
2284
|
+
| `CopyPaste` / `ObjectPaste` / `RandomOcclusion` / `BoundingBoxJitter` | Object manipulation |
|
|
2285
|
+
|
|
2286
|
+
### Detection
|
|
2287
|
+
|
|
2288
|
+
| Component | Purpose |
|
|
2289
|
+
|-----------|---------|
|
|
2290
|
+
| `ObjectDetector` | YOLO / ONNX object detection with greedy NMS |
|
|
2291
|
+
| `FaceDetector` | OpenCV Haar or MediaPipe face detection |
|
|
2292
|
+
| `KeypointDetector` | MediaPipe / YOLO-pose 33-keypoint estimation |
|
|
2293
|
+
| `TextDetector` | EasyOCR / PaddleOCR text detection and recognition |
|
|
2294
|
+
| `AnomalyDetector` | Statistical / PatchCore / PCA anomaly scoring |
|
|
2295
|
+
|
|
2296
|
+
### Tracking
|
|
2297
|
+
|
|
2298
|
+
| Component | Purpose |
|
|
2299
|
+
|-----------|---------|
|
|
2300
|
+
| `ByteTracker` | Two-stage high/low-confidence multi-object tracking |
|
|
2301
|
+
| `DeepSORTTracker` | HOG re-ID embedding + cosine distance tracking |
|
|
2302
|
+
| `ReIDExtractor` | Appearance embedding extraction for gallery search |
|
|
2303
|
+
| `TrackManager` | IoU Hungarian assignment + track lifecycle management |
|
|
2304
|
+
| `KalmanFilter` | 7-state SORT Kalman filter (cx, cy, s, r, vx, vy, vs) |
|
|
2305
|
+
|
|
2306
|
+
### Segmentation
|
|
2307
|
+
|
|
2308
|
+
| Component | Purpose |
|
|
2309
|
+
|-----------|---------|
|
|
2310
|
+
| `SemanticSegmenter` | ONNX / DNN / TorchScript semantic segmentation |
|
|
2311
|
+
| `InstanceSegmenter` | YOLO-seg instance masks |
|
|
2312
|
+
| `PanopticSegmenter` | Stuff + thing panoptic segmentation |
|
|
2313
|
+
| `SAMSegmenter` | Segment Anything Model: point, box, auto-everything |
|
|
2314
|
+
| `MaskPostProcessor` | Erode/dilate/fill/largest-component/remove-small |
|
|
2315
|
+
|
|
2316
|
+
### Enhancement
|
|
2317
|
+
|
|
2318
|
+
| Component | Purpose |
|
|
2319
|
+
|-----------|---------|
|
|
2320
|
+
| `SuperResolution` | 2× / 4× upscaling: OpenCV DNN SR / ONNX / bicubic |
|
|
2321
|
+
| `Denoiser` | NLM / bilateral / DnCNN-ONNX denoising |
|
|
2322
|
+
| `Deblurrer` | Wiener FFT / Richardson-Lucy / NAFNet-ONNX deblurring |
|
|
2323
|
+
| `LowLightEnhancer` | CLAHE / gamma / MSR / Zero-DCE / ONNX enhancement |
|
|
2324
|
+
| `Colorizer` | Zhang 2016 LAB-AB / pseudo-color / thermal colorization |
|
|
2325
|
+
|
|
2326
|
+
### I/O
|
|
2327
|
+
|
|
2328
|
+
| Component | Purpose |
|
|
2329
|
+
|-----------|---------|
|
|
2330
|
+
| `ImageReader` | Read images from disk |
|
|
2331
|
+
| `ImageWriter` | Write frames to disk with pattern filenames |
|
|
2332
|
+
| `VideoReader` | Stream frames from video files with seek support |
|
|
2333
|
+
| `VideoWriter` | Write frames to video file |
|
|
2334
|
+
| `CameraSource` | Live webcam, RTSP, or HTTP camera source |
|
|
2335
|
+
| `S3Source` | Stream images from AWS S3 |
|
|
2336
|
+
| `GCSSource` | Stream images from Google Cloud Storage |
|
|
2337
|
+
| `DatasetExporter` | Export YOLO / COCO / VOC annotated datasets |
|
|
2338
|
+
|
|
2339
|
+
### Streaming
|
|
2340
|
+
|
|
2341
|
+
| Component | Purpose |
|
|
2342
|
+
|-----------|---------|
|
|
2343
|
+
| `FrameStream` | Unified iterator over webcam / video / path list |
|
|
2344
|
+
| `DirectoryStream` | Stream sorted images from a directory |
|
|
2345
|
+
| `RTSPClient` | Background-threaded RTSP reader with reconnect |
|
|
2346
|
+
| `WebSocketSink` | Broadcast frames over WebSocket (MJPEG fallback) |
|
|
2347
|
+
| `WebSocketSource` | Receive frames from WebSocket source |
|
|
2348
|
+
| `KafkaSink` | Publish frames to Kafka topic |
|
|
2349
|
+
| `KafkaSource` | Consume frames from Kafka topic |
|
|
2350
|
+
| `BufferedStream` | Producer-consumer frame buffer with drop policy |
|
|
2351
|
+
| `SlidingWindowBuffer` | Temporal sliding window for batch processing |
|
|
2352
|
+
|
|
2353
|
+
### Visualization
|
|
2354
|
+
|
|
2355
|
+
| Component | Purpose |
|
|
2356
|
+
|-----------|---------|
|
|
2357
|
+
| `FrameViewer` | Display frames with FPS overlay (headless-safe) |
|
|
2358
|
+
| `BBoxRenderer` | Render bboxes with color palette and label text |
|
|
2359
|
+
| `HeatmapRenderer` | Accumulate and overlay spatial heatmaps |
|
|
2360
|
+
| `DashboardSink` | Live web dashboard: Gradio or MJPEG HTTP |
|
|
2361
|
+
| `VideoAnnotationExporter` | Write annotated video + JSON sidecar |
|
|
2362
|
+
|
|
2363
|
+
### Utilities
|
|
2364
|
+
|
|
2365
|
+
| Component | Purpose |
|
|
2366
|
+
|-----------|---------|
|
|
2367
|
+
| `ColorPalette` | Golden-ratio hue palette for consistent class colors |
|
|
2368
|
+
| `MetricsLogger` | Thread-safe rolling FPS and latency logger |
|
|
2369
|
+
| `MetricsLoggerComponent` | Pipeline component wrapper for MetricsLogger |
|
|
2370
|
+
| `FrameSampler` | Frame throttling by count, FPS, or probability |
|
|
2371
|
+
| `ImageHash` | Perceptual hashing (pHash/aHash/dHash) for deduplication |
|
|
2372
|
+
| `DrawUtils` | Render bboxes, masks, keypoints from payload |
|
|
2373
|
+
|
|
2374
|
+
### Core
|
|
2375
|
+
|
|
2376
|
+
| Class | Purpose |
|
|
2377
|
+
|-------|---------|
|
|
2378
|
+
| `Device` | Auto CUDA/MPS/CPU device selector (singleton) |
|
|
2379
|
+
| `BBox` | Bounding box dataclass with IoU, clip, normalize |
|
|
2380
|
+
| `Detection` | Detection result (BBox + label + conf) |
|
|
2381
|
+
| `Keypoint` | Single keypoint (x, y, z, visibility, name) |
|
|
2382
|
+
| `Pose` | Full body pose (list of Keypoints) |
|
|
2383
|
+
| `Mask` | Binary segmentation mask with to_polygon() |
|
|
2384
|
+
| `Track` | Track state (id, bbox, age, state) |
|
|
2385
|
+
| `BatchProcessor` | Parallel directory / list processing |
|
|
2386
|
+
| `Scheduler` | Token-bucket FPS limiter (pipeline component) |
|
|
2387
|
+
| `RateLimiter` | Standalone calls-per-second limiter |
|
|
2388
|
+
| `MemoryManager` | Pre-allocated numpy buffer pool |
|
|
2389
|
+
| `GPUMemoryTracker` | CUDA memory delta tracker |
|
|
2390
|
+
|
|
2391
|
+
### Configuration
|
|
2392
|
+
|
|
2393
|
+
| Class | Purpose |
|
|
2394
|
+
|-------|---------|
|
|
2395
|
+
| `YAMLConfig` | YAML config with dot-notation access, merge, validate, reload |
|
|
2396
|
+
| `JSONConfig` | JSON config with same interface + save |
|
|
2397
|
+
| `ComponentRegistry` | Singleton component registry with decorator registration |
|
|
2398
|
+
| `ProfileLoader` | Named pipeline profile loader from search paths |
|
|
2399
|
+
| `EnvConfig` | Prefix-based environment variable config reader |
|
|
2400
|
+
|
|
2401
|
+
### Models
|
|
2402
|
+
|
|
2403
|
+
| Class | Purpose |
|
|
2404
|
+
|-------|---------|
|
|
2405
|
+
| `ModelRegistry` | JSON-cached model registry with HuggingFace support |
|
|
2406
|
+
| `ONNXModel` | ONNX runtime pipeline component |
|
|
2407
|
+
| `TorchModel` | TorchScript pipeline component |
|
|
2408
|
+
| `TFLiteModel` | TFLite runtime pipeline component |
|
|
2409
|
+
| `ModelDownloader` | urllib downloader with SHA256 and HF URL builder |
|
|
2410
|
+
| `ModelBenchmark` | p50/p95/p99 latency + tracemalloc memory benchmark |
|
|
2411
|
+
|
|
2412
|
+
### Prebuilt Pipelines
|
|
2413
|
+
|
|
2414
|
+
| Class | Purpose |
|
|
2415
|
+
|-------|---------|
|
|
2416
|
+
| `PrebuiltPipelines` | Factory classmethods for common pipeline configurations |
|
|
2417
|
+
| `PipelineSerializer` | Serialize / deserialize pipelines to YAML/JSON |
|
|
2418
|
+
| `AsyncPipeline` | Async execution with asyncio run_in_executor |
|
|
2419
|
+
| `AsyncComponent` | Mixin for implementing async pipeline stages |
|
|
2420
|
+
| `ParallelPipeline` | Parallel branch execution with merge strategies |
|
|
2421
|
+
| `FanOutPipeline` | Shared sequential preprocessing → parallel branches |
|
|
2422
|
+
|
|
2423
|
+
---
|
|
2424
|
+
|
|
2425
|
+
## Output Structure
|
|
2426
|
+
|
|
2427
|
+
```text
|
|
2428
|
+
output/
|
|
2429
|
+
├── captures/ — still images (p key, burst)
|
|
2430
|
+
├── dataset/ — labelled training samples (d key)
|
|
2431
|
+
├── exports/ — grayscale and edge exports (e key)
|
|
2432
|
+
├── timelapse/ — periodic time-lapse frames
|
|
2433
|
+
└── videos/ — recorded video files (r key)
|
|
2434
|
+
```
|
|
2435
|
+
|
|
2436
|
+
---
|
|
2437
|
+
|
|
2438
|
+
## Testing
|
|
2439
|
+
|
|
2440
|
+
```bash
|
|
2441
|
+
pytest
|
|
2442
|
+
pytest tests/test_preprocessing_components.py
|
|
2443
|
+
pytest tests/test_basic_augmentations.py
|
|
2444
|
+
pytest tests/test_advanced_augmentations.py
|
|
2445
|
+
pytest tests/test_capture_components.py
|
|
2446
|
+
pytest tests/test_core_components.py
|
|
2447
|
+
pytest tests/test_labeler_components.py
|
|
2448
|
+
pytest tests/test_cli_file_processing.py
|
|
2449
|
+
```
|
|
2450
|
+
|
|
2451
|
+
---
|
|
2452
|
+
|
|
2453
|
+
## Build and Publish
|
|
2454
|
+
|
|
2455
|
+
```bash
|
|
2456
|
+
python -m pip install --upgrade build
|
|
2457
|
+
python -m build
|
|
2458
|
+
```
|
|
2459
|
+
|
|
2460
|
+
The wheel and source distribution are written to `dist/`.
|
|
2461
|
+
|
|
2462
|
+
See `PUBLISHING.md` for the release checklist and PyPI upload commands.
|
|
2463
|
+
|
|
2464
|
+
---
|
|
2465
|
+
|
|
2466
|
+
<p align="center">
|
|
2467
|
+
<strong>Build once. Deploy anywhere.</strong><br>
|
|
2468
|
+
Scale from classical vision pipelines to state-of-the-art AI systems.
|
|
2469
|
+
</p>
|
|
2470
|
+
|