ai-vision-tool 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. ai_vision_tool-0.4.2/PKG-INFO +2470 -0
  2. ai_vision_tool-0.4.2/README.md +2404 -0
  3. ai_vision_tool-0.4.2/ai_vision_tool/__init__.py +264 -0
  4. ai_vision_tool-0.4.2/ai_vision_tool/__main__.py +5 -0
  5. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/__init__.py +100 -0
  6. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/blur.py +42 -0
  7. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/blur_artifact.py +527 -0
  8. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/brightness.py +39 -0
  9. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/camera_gain.py +44 -0
  10. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/common.py +152 -0
  11. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/composite.py +320 -0
  12. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/crop.py +63 -0
  13. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/cutout.py +62 -0
  14. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/exposure.py +44 -0
  15. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/flip.py +52 -0
  16. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/geometric_random.py +635 -0
  17. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/grayscale.py +41 -0
  18. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/hue.py +41 -0
  19. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/mosaic.py +70 -0
  20. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/motion_blur.py +54 -0
  21. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/noise.py +89 -0
  22. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/noise_dropout.py +336 -0
  23. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/rotate90.py +38 -0
  24. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/rotation.py +84 -0
  25. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/saturation.py +42 -0
  26. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/shear.py +67 -0
  27. ai_vision_tool-0.4.2/ai_vision_tool/augmentation/weather_light.py +476 -0
  28. ai_vision_tool-0.4.2/ai_vision_tool/capture/__init__.py +1 -0
  29. ai_vision_tool-0.4.2/ai_vision_tool/capture/burst_image_capture.py +68 -0
  30. ai_vision_tool-0.4.2/ai_vision_tool/capture/frame_grabber.py +75 -0
  31. ai_vision_tool-0.4.2/ai_vision_tool/capture/image_capture.py +73 -0
  32. ai_vision_tool-0.4.2/ai_vision_tool/capture/image_template.py +65 -0
  33. ai_vision_tool-0.4.2/ai_vision_tool/capture/motion_detector.py +63 -0
  34. ai_vision_tool-0.4.2/ai_vision_tool/capture/roi_capture.py +67 -0
  35. ai_vision_tool-0.4.2/ai_vision_tool/capture/screen_capture.py +30 -0
  36. ai_vision_tool-0.4.2/ai_vision_tool/capture/time_lapse.py +64 -0
  37. ai_vision_tool-0.4.2/ai_vision_tool/capture/time_lapse_capture.py +3 -0
  38. ai_vision_tool-0.4.2/ai_vision_tool/capture/video_capture.py +90 -0
  39. ai_vision_tool-0.4.2/ai_vision_tool/capture/video_recorder.py +130 -0
  40. ai_vision_tool-0.4.2/ai_vision_tool/capture/video_template.py +206 -0
  41. ai_vision_tool-0.4.2/ai_vision_tool/cli/__init__.py +0 -0
  42. ai_vision_tool-0.4.2/ai_vision_tool/cli/main.py +1300 -0
  43. ai_vision_tool-0.4.2/ai_vision_tool/config/__init__.py +1 -0
  44. ai_vision_tool-0.4.2/ai_vision_tool/config/env_config.py +71 -0
  45. ai_vision_tool-0.4.2/ai_vision_tool/config/json_config.py +87 -0
  46. ai_vision_tool-0.4.2/ai_vision_tool/config/profile_loader.py +79 -0
  47. ai_vision_tool-0.4.2/ai_vision_tool/config/registry.py +65 -0
  48. ai_vision_tool-0.4.2/ai_vision_tool/config/yaml_config.py +75 -0
  49. ai_vision_tool-0.4.2/ai_vision_tool/core/__init__.py +1 -0
  50. ai_vision_tool-0.4.2/ai_vision_tool/core/base.py +111 -0
  51. ai_vision_tool-0.4.2/ai_vision_tool/core/batch_processor.py +65 -0
  52. ai_vision_tool-0.4.2/ai_vision_tool/core/data_types.py +110 -0
  53. ai_vision_tool-0.4.2/ai_vision_tool/core/device.py +73 -0
  54. ai_vision_tool-0.4.2/ai_vision_tool/core/memory_manager.py +76 -0
  55. ai_vision_tool-0.4.2/ai_vision_tool/core/scheduler.py +69 -0
  56. ai_vision_tool-0.4.2/ai_vision_tool/detection/__init__.py +1 -0
  57. ai_vision_tool-0.4.2/ai_vision_tool/detection/anomaly_detector.py +127 -0
  58. ai_vision_tool-0.4.2/ai_vision_tool/detection/face_detector.py +76 -0
  59. ai_vision_tool-0.4.2/ai_vision_tool/detection/keypoint_detector.py +90 -0
  60. ai_vision_tool-0.4.2/ai_vision_tool/detection/object_detector.py +133 -0
  61. ai_vision_tool-0.4.2/ai_vision_tool/detection/text_detector.py +90 -0
  62. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/__init__.py +1 -0
  63. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/denoiser.py +80 -0
  64. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/frame_enhancer.py +58 -0
  65. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/low_light.py +119 -0
  66. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/__init__.py +0 -0
  67. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/colorization.py +96 -0
  68. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/deblurring.py +100 -0
  69. ai_vision_tool-0.4.2/ai_vision_tool/enhancement/models/super_resolution.py +77 -0
  70. ai_vision_tool-0.4.2/ai_vision_tool/integrations/__init__.py +0 -0
  71. ai_vision_tool-0.4.2/ai_vision_tool/integrations/cloud/__init__.py +0 -0
  72. ai_vision_tool-0.4.2/ai_vision_tool/integrations/cloud/gcs_source.py +72 -0
  73. ai_vision_tool-0.4.2/ai_vision_tool/integrations/cloud/s3_source.py +72 -0
  74. ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/__init__.py +0 -0
  75. ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/auto_labeller.py +42 -0
  76. ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/darknet_auto_labeler.py +265 -0
  77. ai_vision_tool-0.4.2/ai_vision_tool/integrations/labeling/tensorflow_auto_labeler.py +172 -0
  78. ai_vision_tool-0.4.2/ai_vision_tool/integrations/streaming/__init__.py +0 -0
  79. ai_vision_tool-0.4.2/ai_vision_tool/integrations/streaming/kafka_io.py +157 -0
  80. ai_vision_tool-0.4.2/ai_vision_tool/integrations/streaming/websocket_sink.py +167 -0
  81. ai_vision_tool-0.4.2/ai_vision_tool/io/__init__.py +1 -0
  82. ai_vision_tool-0.4.2/ai_vision_tool/io/camera_source.py +66 -0
  83. ai_vision_tool-0.4.2/ai_vision_tool/io/dataset_collector.py +79 -0
  84. ai_vision_tool-0.4.2/ai_vision_tool/io/dataset_exporter.py +125 -0
  85. ai_vision_tool-0.4.2/ai_vision_tool/io/image_exporter.py +78 -0
  86. ai_vision_tool-0.4.2/ai_vision_tool/io/image_io.py +99 -0
  87. ai_vision_tool-0.4.2/ai_vision_tool/io/video_io.py +129 -0
  88. ai_vision_tool-0.4.2/ai_vision_tool/models/__init__.py +7 -0
  89. ai_vision_tool-0.4.2/ai_vision_tool/models/backends/__init__.py +0 -0
  90. ai_vision_tool-0.4.2/ai_vision_tool/models/backends/onnx_model.py +81 -0
  91. ai_vision_tool-0.4.2/ai_vision_tool/models/backends/tflite_model.py +74 -0
  92. ai_vision_tool-0.4.2/ai_vision_tool/models/backends/torch_model.py +87 -0
  93. ai_vision_tool-0.4.2/ai_vision_tool/models/benchmark.py +83 -0
  94. ai_vision_tool-0.4.2/ai_vision_tool/models/downloader.py +73 -0
  95. ai_vision_tool-0.4.2/ai_vision_tool/models/registry.py +74 -0
  96. ai_vision_tool-0.4.2/ai_vision_tool/pipelines/__init__.py +5 -0
  97. ai_vision_tool-0.4.2/ai_vision_tool/pipelines/async_pipeline.py +54 -0
  98. ai_vision_tool-0.4.2/ai_vision_tool/pipelines/parallel_pipeline.py +110 -0
  99. ai_vision_tool-0.4.2/ai_vision_tool/pipelines/prebuilt.py +90 -0
  100. ai_vision_tool-0.4.2/ai_vision_tool/pipelines/serializer.py +91 -0
  101. ai_vision_tool-0.4.2/ai_vision_tool/pipelines/vision_pipeline.py +51 -0
  102. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/__init__.py +93 -0
  103. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/auto_adjust_contrast.py +158 -0
  104. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/auto_orient.py +125 -0
  105. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/classical_segmentation.py +87 -0
  106. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/frame_resizer.py +62 -0
  107. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/geometry.py +677 -0
  108. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/intensity.py +851 -0
  109. ai_vision_tool-0.4.2/ai_vision_tool/preprocessing/quality.py +369 -0
  110. ai_vision_tool-0.4.2/ai_vision_tool/segmentation/__init__.py +1 -0
  111. ai_vision_tool-0.4.2/ai_vision_tool/segmentation/instance_segmenter.py +98 -0
  112. ai_vision_tool-0.4.2/ai_vision_tool/segmentation/mask_post_processor.py +95 -0
  113. ai_vision_tool-0.4.2/ai_vision_tool/segmentation/panoptic_segmenter.py +94 -0
  114. ai_vision_tool-0.4.2/ai_vision_tool/segmentation/sam_segmenter.py +120 -0
  115. ai_vision_tool-0.4.2/ai_vision_tool/segmentation/semantic_segmenter.py +116 -0
  116. ai_vision_tool-0.4.2/ai_vision_tool/streaming/__init__.py +1 -0
  117. ai_vision_tool-0.4.2/ai_vision_tool/streaming/buffered_stream.py +99 -0
  118. ai_vision_tool-0.4.2/ai_vision_tool/streaming/frame_stream.py +110 -0
  119. ai_vision_tool-0.4.2/ai_vision_tool/streaming/rtsp_client.py +128 -0
  120. ai_vision_tool-0.4.2/ai_vision_tool/tracking/__init__.py +1 -0
  121. ai_vision_tool-0.4.2/ai_vision_tool/tracking/byte_tracker.py +144 -0
  122. ai_vision_tool-0.4.2/ai_vision_tool/tracking/deepsort_tracker.py +87 -0
  123. ai_vision_tool-0.4.2/ai_vision_tool/tracking/kalman_filter.py +81 -0
  124. ai_vision_tool-0.4.2/ai_vision_tool/tracking/reid_extractor.py +111 -0
  125. ai_vision_tool-0.4.2/ai_vision_tool/tracking/track_manager.py +131 -0
  126. ai_vision_tool-0.4.2/ai_vision_tool/utils/__init__.py +1 -0
  127. ai_vision_tool-0.4.2/ai_vision_tool/utils/color_palette.py +41 -0
  128. ai_vision_tool-0.4.2/ai_vision_tool/utils/draw_utils.py +86 -0
  129. ai_vision_tool-0.4.2/ai_vision_tool/utils/frame_sampler.py +56 -0
  130. ai_vision_tool-0.4.2/ai_vision_tool/utils/image_hash.py +74 -0
  131. ai_vision_tool-0.4.2/ai_vision_tool/utils/image_utils.py +147 -0
  132. ai_vision_tool-0.4.2/ai_vision_tool/utils/metrics_logger.py +70 -0
  133. ai_vision_tool-0.4.2/ai_vision_tool/visualization/__init__.py +1 -0
  134. ai_vision_tool-0.4.2/ai_vision_tool/visualization/bbox_renderer.py +90 -0
  135. ai_vision_tool-0.4.2/ai_vision_tool/visualization/dashboard_view.py +109 -0
  136. ai_vision_tool-0.4.2/ai_vision_tool/visualization/frame_annotator.py +82 -0
  137. ai_vision_tool-0.4.2/ai_vision_tool/visualization/frame_viewer.py +81 -0
  138. ai_vision_tool-0.4.2/ai_vision_tool/visualization/heatmap_renderer.py +94 -0
  139. ai_vision_tool-0.4.2/ai_vision_tool/visualization/video_annotation_exporter.py +110 -0
  140. ai_vision_tool-0.4.2/pyproject.toml +171 -0
@@ -0,0 +1,2470 @@
1
+ Metadata-Version: 2.4
2
+ Name: ai-vision-tool
3
+ Version: 0.4.2
4
+ Summary: Composable computer-vision pipeline components for image enhancement, motion analysis, capture, and dataset collection.
5
+ Keywords: computer-vision,opencv,image-processing,dataset,pipeline
6
+ Author: AI Vision Flow Maintainers
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Provides-Extra: all
19
+ Provides-Extra: api
20
+ Provides-Extra: cloud
21
+ Provides-Extra: detection
22
+ Provides-Extra: kafka
23
+ Provides-Extra: onnx
24
+ Provides-Extra: segmentation
25
+ Provides-Extra: streaming
26
+ Provides-Extra: tflite
27
+ Provides-Extra: torch
28
+ Provides-Extra: tracking
29
+ Provides-Extra: websocket
30
+ Requires-Dist: boto3 (>=1.34) ; extra == "all"
31
+ Requires-Dist: boto3 (>=1.34) ; extra == "cloud"
32
+ Requires-Dist: confluent-kafka (>=2.3.0) ; extra == "all"
33
+ Requires-Dist: confluent-kafka (>=2.3.0) ; extra == "kafka"
34
+ Requires-Dist: confluent-kafka (>=2.3.0) ; extra == "streaming"
35
+ Requires-Dist: fastapi (>=0.115) ; extra == "all"
36
+ Requires-Dist: fastapi (>=0.115) ; extra == "api"
37
+ Requires-Dist: google-cloud-storage (>=2.16) ; extra == "all"
38
+ Requires-Dist: google-cloud-storage (>=2.16) ; extra == "cloud"
39
+ Requires-Dist: mediapipe (>=0.10) ; extra == "all"
40
+ Requires-Dist: mediapipe (>=0.10) ; extra == "detection"
41
+ Requires-Dist: numpy (>=1.26)
42
+ Requires-Dist: onnxruntime (>=1.18) ; extra == "all"
43
+ Requires-Dist: onnxruntime (>=1.18) ; extra == "onnx"
44
+ Requires-Dist: onnxruntime (>=1.18) ; extra == "tracking"
45
+ Requires-Dist: opencv-python (>=4.8)
46
+ Requires-Dist: pyyaml (>=6.0)
47
+ Requires-Dist: segment-anything (>=1.0) ; extra == "all"
48
+ Requires-Dist: segment-anything (>=1.0) ; extra == "segmentation"
49
+ Requires-Dist: tflite-runtime (>=2.14) ; extra == "tflite"
50
+ Requires-Dist: torch (>=2.3) ; extra == "all"
51
+ Requires-Dist: torch (>=2.3) ; extra == "segmentation"
52
+ Requires-Dist: torch (>=2.3) ; extra == "torch"
53
+ Requires-Dist: torchvision (>=0.18) ; extra == "all"
54
+ Requires-Dist: torchvision (>=0.18) ; extra == "segmentation"
55
+ Requires-Dist: torchvision (>=0.18) ; extra == "torch"
56
+ Requires-Dist: ultralytics (>=8.0) ; extra == "all"
57
+ Requires-Dist: ultralytics (>=8.0) ; extra == "detection"
58
+ Requires-Dist: ultralytics (>=8.0) ; extra == "segmentation"
59
+ Requires-Dist: uvicorn (>=0.30) ; extra == "all"
60
+ Requires-Dist: uvicorn (>=0.30) ; extra == "api"
61
+ Requires-Dist: websockets (>=12.0) ; extra == "all"
62
+ Requires-Dist: websockets (>=12.0) ; extra == "streaming"
63
+ Requires-Dist: websockets (>=12.0) ; extra == "websocket"
64
+ Description-Content-Type: text/markdown
65
+
66
+ # AI Vision Tool
67
+ ### Build Scalable, Real-Time Computer Vision Systems with OpenCV, AI Models, and Hybrid Pipelines
68
+
69
+ <p align="center">
70
+ <a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/v/ai-vision-tool?style=flat-square&color=blue&label=PyPI" alt="PyPI version"></a>
71
+ <a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/pyversions/ai-vision-tool?style=flat-square" alt="Python"></a>
72
+ <a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/l/ai-vision-tool?style=flat-square&color=green" alt="License"></a>
73
+ <a href="https://pypi.org/project/ai-vision-tool/"><img src="https://img.shields.io/pypi/dm/ai-vision-tool?style=flat-square&color=orange" alt="Downloads"></a>
74
+ </p>
75
+
76
+ <p align="center">
77
+ <img src="images/github/ai-vision-tool.png" alt="AI Vision Tool" width="100%">
78
+ </p>
79
+
80
+ ---
81
+
82
+ **AI Vision Tool** is a modular, extensible, and production-ready computer vision framework designed for modern AI-powered image and video processing workflows.
83
+
84
+ Built with a **lightweight OpenCV-first architecture**, it provides a unified ecosystem for preprocessing, augmentation, enhancement, visualization, streaming, capture pipelines, and AI model integration — enabling developers to rapidly build scalable vision applications ranging from classical computer vision systems to advanced deep learning pipelines.
85
+
86
+ ```python
87
+ from ai_vision_tool.pipelines import AIVisionPipeline, PrebuiltPipelines
88
+ from ai_vision_tool.preprocessing import AutoOrient, LetterboxResize
89
+ from ai_vision_tool.detection import ObjectDetector
90
+ from ai_vision_tool.tracking import ByteTracker
91
+ from ai_vision_tool.visualization import BBoxRenderer
92
+
93
+ pipeline = (
94
+ AIVisionPipeline()
95
+ .add(AutoOrient())
96
+ .add(LetterboxResize(width=640, height=640))
97
+ .add(ObjectDetector(model_path="yolov8n.pt", conf_threshold=0.25))
98
+ .add(ByteTracker(track_thresh=0.5))
99
+ .add(BBoxRenderer(show_track_id=True))
100
+ )
101
+
102
+ result = pipeline.execute(initial_data={"frame": frame}, global_config={})
103
+ ```
104
+
105
+ ---
106
+
107
+ ## Why AI Vision Tool?
108
+
109
+ | Concern | How it's solved |
110
+ |---------|----------------|
111
+ | **Complexity** | One unified `.run(data)` interface across 130+ components |
112
+ | **Dependencies** | Lightweight core (`numpy + opencv + pyyaml`), heavy deps are opt-in extras |
113
+ | **Scalability** | Async, parallel, and fan-out pipelines built-in |
114
+ | **Deployment** | CPU / CUDA / MPS / Edge — auto-detected at runtime |
115
+ | **Extensibility** | Subclass `AIVisionComponent`, plug in anywhere |
116
+
117
+ ### Supported Implementation Strategies
118
+
119
+ ```
120
+ Classical Computer Vision → Pre-trained AI Models → Custom Deep Learning
121
+ ↕ ↕ ↕
122
+ Edge AI Inference ←→ Hybrid CV + AI Architectures ←→ Cloud Streaming
123
+ ```
124
+
125
+ The framework follows a **core + optional extensions** philosophy:
126
+
127
+ - **Lightweight core** — fast install, minimal footprint, no heavy deps
128
+ - **Optional AI runtimes** — ONNX, PyTorch, TensorFlow Lite via extras
129
+ - **Plugin-style integrations** — cloud storage, Kafka, WebSocket, Gradio dashboards
130
+ - **Edge and cloud deployment** — runs on Raspberry Pi through multi-GPU servers
131
+
132
+ > **Build once. Deploy anywhere. Scale from classical vision pipelines to state-of-the-art AI systems.**
133
+
134
+ ---
135
+
136
+ ## Table of Contents
137
+
138
+ - [Features](#features)
139
+ - [Installation](#installation)
140
+ - [Quickstart](#quickstart)
141
+ - [Preprocessing](#preprocessing)
142
+ - [Augmentation](#augmentation)
143
+ - [Pipeline](#pipeline)
144
+ - [Detection](#detection)
145
+ - [Tracking](#tracking)
146
+ - [Segmentation](#segmentation)
147
+ - [Enhancement](#enhancement)
148
+ - [I/O](#io)
149
+ - [Streaming](#streaming)
150
+ - [Visualization](#visualization)
151
+ - [Capture Components](#capture-components)
152
+ - [Utilities](#utilities)
153
+ - [Core](#core)
154
+ - [Configuration](#configuration)
155
+ - [Models](#models)
156
+ - [Prebuilt Pipelines](#prebuilt-pipelines)
157
+ - [Capture Templates](#capture-templates)
158
+ - [CLI Reference](#cli-reference)
159
+ - [Component Index](#component-index)
160
+ - [Output Structure](#output-structure)
161
+ - [Testing](#testing)
162
+ - [Build and Publish](#build-and-publish)
163
+
164
+ ---
165
+
166
+ ## Features
167
+
168
+ <details open>
169
+ <summary><strong>Pipelines & Architecture</strong></summary>
170
+
171
+ - Composable `AIVisionPipeline` — Chain of Responsibility, one interface for all components
172
+ - Async execution via `AsyncPipeline` (`asyncio` + `run_in_executor`)
173
+ - Parallel branches via `ParallelPipeline` and `FanOutPipeline` (`ThreadPoolExecutor`)
174
+ - Pipeline serialization to/from YAML/JSON via `PipelineSerializer`
175
+ - Prebuilt factory pipelines for detection, tracking, enhancement, augmentation
176
+
177
+ </details>
178
+
179
+ <details open>
180
+ <summary><strong>Preprocessing & Augmentation</strong></summary>
181
+
182
+ - **40+ preprocessing transforms** — geometry, intensity, color space, quality gates
183
+ - **70+ augmentation components** — geometric, weather, blur, noise, dropout, multi-image composition
184
+ - Batch processing: `component.run([img_a, img_b, img_c])` → list of results
185
+ - JSON augmentation profiles for CLI-driven training pipelines
186
+
187
+ </details>
188
+
189
+ <details open>
190
+ <summary><strong>Detection, Tracking & Segmentation</strong></summary>
191
+
192
+ - Object detection: YOLO (ultralytics) + ONNX with greedy NMS fallback
193
+ - Face detection: OpenCV Haar cascade or MediaPipe
194
+ - Keypoint/pose detection: MediaPipe 33-landmark or YOLO-pose
195
+ - OCR/text detection: EasyOCR, PaddleOCR
196
+ - Anomaly detection: statistical z-score, PatchCore (HOG + kNN), PCA
197
+ - Multi-object tracking: ByteTracker (two-stage), DeepSORT (HOG + cosine distance)
198
+ - Semantic, instance, and panoptic segmentation: ONNX / YOLO-seg / TorchScript
199
+ - SAM (Segment Anything Model): point, box, and auto-everything prompts
200
+ - Mask post-processing: erode / dilate / fill holes / largest-component / remove-small
201
+
202
+ </details>
203
+
204
+ <details open>
205
+ <summary><strong>Enhancement & Restoration</strong></summary>
206
+
207
+ - Super-resolution: `cv2.dnn_superres`, ONNX, bicubic fallback
208
+ - Denoising: Non-local means, bilateral, Gaussian, DnCNN-ONNX
209
+ - Deblurring: Wiener FFT, Richardson-Lucy, NAFNet-ONNX
210
+ - Low-light enhancement: CLAHE, gamma LUT, multi-scale Retinex, Zero-DCE
211
+ - Colorization: Zhang 2016 LAB-AB, pseudo-color, thermal
212
+
213
+ </details>
214
+
215
+ <details open>
216
+ <summary><strong>I/O, Streaming & Cloud</strong></summary>
217
+
218
+ - Flexible I/O: local images/video, webcam, RTSP, HTTP, AWS S3, GCS
219
+ - Dataset export: YOLO, COCO JSON, VOC XML
220
+ - Real-time streaming: RTSP client, WebSocket sink/source, Kafka producer/consumer
221
+ - Buffered queues with configurable drop policy and sliding window
222
+
223
+ </details>
224
+
225
+ <details open>
226
+ <summary><strong>Visualization & Dashboards</strong></summary>
227
+
228
+ - Live frame viewer with rolling FPS overlay (headless-safe)
229
+ - BBox renderer with consistent per-class colors and semi-transparent fill
230
+ - Heatmap renderer: detection density, anomaly maps, motion, attention
231
+ - Dashboard sink: Gradio or MJPEG HTTP fallback
232
+ - Annotated video export with JSON sidecar
233
+
234
+ </details>
235
+
236
+ <details open>
237
+ <summary><strong>Model Management</strong></summary>
238
+
239
+ - ONNX, TorchScript, TFLite runners as pipeline components
240
+ - Model registry with JSON cache and HuggingFace download support
241
+ - SHA256-verified downloader with progress callbacks
242
+ - Latency benchmarking: p50 / p95 / p99 + tracemalloc memory profiling
243
+
244
+ </details>
245
+
246
+ ---
247
+
248
+ ## Installation
249
+
250
+ ### pip
251
+
252
+ ```bash
253
+ pip install ai-vision-tool
254
+ ```
255
+
256
+ With optional extras:
257
+
258
+ ```bash
259
+ # ONNX inference
260
+ pip install "ai-vision-tool[onnx]"
261
+
262
+ # YOLO detection + MediaPipe face/pose
263
+ pip install "ai-vision-tool[detection]"
264
+
265
+ # Everything
266
+ pip install "ai-vision-tool[all]"
267
+ ```
268
+
269
+ ### uv
270
+
271
+ ```bash
272
+ uv add ai-vision-tool
273
+ uv add "ai-vision-tool[detection]"
274
+ ```
275
+
276
+ ### Poetry
277
+
278
+ ```bash
279
+ poetry add ai-vision-tool
280
+ poetry add "ai-vision-tool[detection]"
281
+ ```
282
+
283
+ ### Optional extras
284
+
285
+ The base install (`numpy + opencv-python + pyyaml`) has no heavy deps.
286
+ Optional extras install only the libraries each feature needs.
287
+
288
+ | Extra | Installs | Enables |
289
+ |-------|----------|---------|
290
+ | `onnx` | `onnxruntime>=1.18` | `ONNXModel`, ONNX-backed detectors and enhancement |
291
+ | `torch` | `torch>=2.3`, `torchvision>=0.18` | `TorchModel`, TorchScript inference |
292
+ | `tflite` | `tflite-runtime>=2.14` | `TFLiteModel` inference |
293
+ | `detection` | `ultralytics>=8.0`, `mediapipe>=0.10` | `ObjectDetector` (YOLO), `FaceDetector`/`KeypointDetector` (MediaPipe) |
294
+ | `segmentation` | `ultralytics>=8.0`, `segment-anything>=1.0`, `torch>=2.3` | `InstanceSegmenter` (YOLO-seg), `SAMSegmenter` |
295
+ | `tracking` | `onnxruntime>=1.18` | ONNX-backed ReID embeddings in `ReIDExtractor` |
296
+ | `websocket` | `websockets>=12.0` | `WebSocketSink`, `WebSocketSource` |
297
+ | `kafka` | `confluent-kafka>=2.3.0` | `KafkaSink`, `KafkaSource` |
298
+ | `streaming` | websocket + kafka | All real-time streaming components |
299
+ | `cloud` | `boto3>=1.34`, `google-cloud-storage>=2.16` | `S3Source`, `GCSSource` |
300
+ | `api` | `fastapi>=0.115`, `uvicorn>=0.30` | FastAPI REST server |
301
+ | `all` | all of the above | Full feature set |
302
+
303
+ ### Development Setup
304
+
305
+ ```bash
306
+ git clone https://github.com/your-org/ai-vision-tool.git
307
+ cd ai-vision-tool
308
+
309
+ # Using uv
310
+ uv sync --dev
311
+
312
+ # Using Poetry
313
+ poetry install --with dev
314
+ ```
315
+
316
+ Install pre-commit hooks:
317
+
318
+ ```bash
319
+ pre-commit install
320
+ pre-commit install --hook-type pre-push
321
+ pre-commit install --hook-type commit-msg
322
+ pre-commit run --all-files
323
+ ```
324
+
325
+ ---
326
+
327
+ ## Quickstart
328
+
329
+ ```python
330
+ import cv2
331
+ from ai_vision_tool.pipelines import AIVisionPipeline
332
+ from ai_vision_tool.preprocessing import AutoOrient, AutoAdjustContrast
333
+ from ai_vision_tool.augmentation import Flip, GaussianBlur
334
+
335
+ image = cv2.imread("images/github/sample.jpg")
336
+
337
+ pipeline = AIVisionPipeline()
338
+ pipeline.add(AutoOrient(rotation=90))
339
+ pipeline.add(AutoAdjustContrast(method="adaptive_equalization", clip_limit=2.0))
340
+ pipeline.add(Flip(horizontal=True))
341
+ pipeline.add(GaussianBlur(kernel_size=5, sigma_x=1.0))
342
+
343
+ result = pipeline.execute(initial_data={"frame": image}, global_config={})
344
+ print(result["frame"].shape) # (height, width, 3)
345
+ ```
346
+
347
+ You can also import any component directly from the top-level namespace:
348
+
349
+ ```python
350
+ from ai_vision_tool import AutoOrient, Flip, GaussianBlur, AIVisionPipeline
351
+ ```
352
+
353
+ All imports use lazy loading — only modules you actually use are loaded.
354
+
355
+ ---
356
+
357
+ ## Preprocessing
358
+
359
+ Preprocessing transforms prepare raw images for downstream model inference, quality gating,
360
+ or dataset ingestion. Every component accepts either a NumPy array or a payload dictionary
361
+ `{"frame": ndarray, ...}`.
362
+
363
+ ```python
364
+ import cv2
365
+ image = cv2.imread("images/github/sample.jpg")
366
+ ```
367
+
368
+ ### Import Path
369
+
370
+ ```python
371
+ from ai_vision_tool.preprocessing import (
372
+ AutoOrient,
373
+ AutoAdjustContrast,
374
+ Resize,
375
+ LetterboxResize,
376
+ CenterCrop,
377
+ PadToSquare,
378
+ Normalize,
379
+ Standardize,
380
+ RescalePixels,
381
+ ConvertColorSpace,
382
+ BGRToRGB,
383
+ RGBToBGR,
384
+ CLAHE,
385
+ HistogramEqualization,
386
+ GammaCorrection,
387
+ WhiteBalance,
388
+ Denoise,
389
+ Sharpen,
390
+ Deblur,
391
+ RemoveBackground,
392
+ Threshold,
393
+ AdaptiveThreshold,
394
+ EdgeDetection,
395
+ ContourExtraction,
396
+ PerspectiveCorrection,
397
+ Deskew,
398
+ AutoCrop,
399
+ FaceAlign,
400
+ ObjectCrop,
401
+ BoundingBoxClamp,
402
+ BoundingBoxNormalize,
403
+ MaskResize,
404
+ ImageQualityCheck,
405
+ BlurDetection,
406
+ BrightnessCheck,
407
+ DuplicateImageCheck,
408
+ CorruptImageCheck,
409
+ AspectRatioFilter,
410
+ MinSizeFilter,
411
+ MaxSizeFilter,
412
+ )
413
+ ```
414
+
415
+ ---
416
+
417
+ ### Geometry
418
+
419
+ **`AutoOrient`** — Correct EXIF orientation metadata or apply an explicit rotation and flip.
420
+
421
+ ```python
422
+ from ai_vision_tool.preprocessing import AutoOrient
423
+
424
+ result = AutoOrient(rotation=90).run(image)
425
+ result = AutoOrient(flip_horizontal=True).run(image)
426
+ result = AutoOrient(use_exif=True, exif_key="exif_orientation").run(
427
+ {"frame": image, "exif_orientation": 6}
428
+ )
429
+ ```
430
+
431
+ **`Resize`** — Resize to an exact target size.
432
+
433
+ ```python
434
+ from ai_vision_tool.preprocessing import Resize
435
+
436
+ result = Resize(width=640, height=640).run(image)
437
+ ```
438
+
439
+ **`LetterboxResize`** — Resize preserving aspect ratio, padding the shorter axis.
440
+
441
+ ```python
442
+ from ai_vision_tool.preprocessing import LetterboxResize
443
+
444
+ result = LetterboxResize(width=640, height=640, pad_value=(114, 114, 114)).run(image)
445
+ ```
446
+
447
+ **`CenterCrop`** — Crop the centre region.
448
+
449
+ ```python
450
+ from ai_vision_tool.preprocessing import CenterCrop
451
+
452
+ result = CenterCrop(width=224, height=224).run(image)
453
+ ```
454
+
455
+ **`PadToSquare`** — Pad a rectangular image to a square canvas.
456
+
457
+ ```python
458
+ from ai_vision_tool.preprocessing import PadToSquare
459
+
460
+ result = PadToSquare(pad_value=(0, 0, 0)).run(image)
461
+ ```
462
+
463
+ **`PerspectiveCorrection`** — Rectify a quadrilateral document or planar surface.
464
+
465
+ ```python
466
+ import numpy as np
467
+ from ai_vision_tool.preprocessing import PerspectiveCorrection
468
+
469
+ source_points = np.float32([[30, 20], [310, 10], [320, 240], [20, 250]])
470
+ result = PerspectiveCorrection(source_points=source_points, output_size=(300, 200)).run(image)
471
+ ```
472
+
473
+ **`Deskew`** — Rotate a document back to a levelled angle.
474
+
475
+ ```python
476
+ from ai_vision_tool.preprocessing import Deskew
477
+
478
+ result = Deskew().run(image)
479
+ ```
480
+
481
+ **`AutoCrop`** — Trim empty or near-black borders.
482
+
483
+ ```python
484
+ from ai_vision_tool.preprocessing import AutoCrop
485
+
486
+ result = AutoCrop(threshold=10, padding=4).run(image)
487
+ ```
488
+
489
+ **`FaceAlign`** — Align a face using eye landmark coordinates from a payload dict.
490
+
491
+ ```python
492
+ from ai_vision_tool.preprocessing import FaceAlign
493
+
494
+ payload = {"frame": image, "metadata": {"left_eye": (40, 50), "right_eye": (90, 50)}}
495
+ result = FaceAlign(output_size=(112, 112)).run(payload)
496
+ ```
497
+
498
+ **`ObjectCrop`** — Crop the region described by bounding boxes.
499
+
500
+ ```python
501
+ from ai_vision_tool.preprocessing import ObjectCrop
502
+
503
+ payload = {"frame": image, "bboxes": [(10, 20, 120, 80)]}
504
+ result = ObjectCrop().run(payload)
505
+ ```
506
+
507
+ **`BoundingBoxClamp`** — Clamp bounding boxes that extend outside image boundaries.
508
+
509
+ ```python
510
+ from ai_vision_tool.preprocessing import BoundingBoxClamp
511
+
512
+ payload = {"frame": image, "bboxes": [(-5, -5, 80, 90)]}
513
+ result = BoundingBoxClamp().run(payload)
514
+ ```
515
+
516
+ **`BoundingBoxNormalize`** — Normalise absolute pixel bounding boxes to relative coordinates.
517
+
518
+ ```python
519
+ from ai_vision_tool.preprocessing import BoundingBoxNormalize
520
+
521
+ payload = {"frame": image, "bboxes": [(10, 20, 120, 80)]}
522
+ result = BoundingBoxNormalize().run(payload)
523
+ ```
524
+
525
+ **`MaskResize`** — Resize a payload mask to match a target spatial size.
526
+
527
+ ```python
528
+ import numpy as np
529
+ from ai_vision_tool.preprocessing import MaskResize
530
+
531
+ mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)
532
+ payload = {"frame": image, "mask": mask}
533
+ result = MaskResize(width=640, height=640).run(payload)
534
+ ```
535
+
536
+ ---
537
+
538
+ ### Intensity and Color
539
+
540
+ **`AutoAdjustContrast`** — Adaptive equalization, histogram equalization, or contrast stretching.
541
+
542
+ ```python
543
+ from ai_vision_tool.preprocessing import AutoAdjustContrast
544
+
545
+ result = AutoAdjustContrast(method="adaptive_equalization", clip_limit=2.0).run(image)
546
+ result = AutoAdjustContrast(method="histogram_equalization").run(image)
547
+ result = AutoAdjustContrast(
548
+ method="contrast_stretching", lower_percentile=2.0, upper_percentile=98.0
549
+ ).run(image)
550
+ ```
551
+
552
+ **`Normalize`** — Map pixel values into [0, 1].
553
+
554
+ ```python
555
+ from ai_vision_tool.preprocessing import Normalize
556
+
557
+ result = Normalize().run(image)
558
+ ```
559
+
560
+ **`Standardize`** — z-score standardisation per channel.
561
+
562
+ ```python
563
+ from ai_vision_tool.preprocessing import Standardize
564
+
565
+ result = Standardize(per_channel=True).run(image)
566
+ ```
567
+
568
+ **`CLAHE`** — Contrast-Limited Adaptive Histogram Equalisation.
569
+
570
+ ```python
571
+ from ai_vision_tool.preprocessing import CLAHE
572
+
573
+ result = CLAHE(clip_limit=2.0, tile_grid_size=(8, 8)).run(image)
574
+ ```
575
+
576
+ **`GammaCorrection`** — Gamma-based exposure tuning.
577
+
578
+ ```python
579
+ from ai_vision_tool.preprocessing import GammaCorrection
580
+
581
+ result = GammaCorrection(gamma=1.4).run(image) # brighten
582
+ result = GammaCorrection(gamma=0.7).run(image) # darken
583
+ ```
584
+
585
+ **`WhiteBalance`** — Correct per-channel colour casts.
586
+
587
+ ```python
588
+ from ai_vision_tool.preprocessing import WhiteBalance
589
+
590
+ result = WhiteBalance(method="gray_world").run(image)
591
+ ```
592
+
593
+ **`EdgeDetection`** — Extract edges via Canny, Sobel, or Laplacian.
594
+
595
+ ```python
596
+ from ai_vision_tool.preprocessing import EdgeDetection
597
+
598
+ result = EdgeDetection(method="canny", threshold1=100, threshold2=200).run(image)
599
+ ```
600
+
601
+ ---
602
+
603
+ ### Quality Checks
604
+
605
+ **`ImageQualityCheck`** — Compute blur and brightness quality flags.
606
+
607
+ ```python
608
+ from ai_vision_tool.preprocessing import ImageQualityCheck
609
+
610
+ result = ImageQualityCheck().run({"frame": image})
611
+ # result["is_blurry"], result["brightness"]
612
+ ```
613
+
614
+ **`BlurDetection`** — Flag frames below a Laplacian variance threshold.
615
+
616
+ ```python
617
+ from ai_vision_tool.preprocessing import BlurDetection
618
+
619
+ result = BlurDetection().run({"frame": image})
620
+ ```
621
+
622
+ **`MinSizeFilter`** / **`MaxSizeFilter`** — Enforce pixel dimension bounds.
623
+
624
+ ```python
625
+ from ai_vision_tool.preprocessing import MinSizeFilter, MaxSizeFilter
626
+
627
+ result = MinSizeFilter(min_width=320, min_height=320).run({"frame": image})
628
+ result = MaxSizeFilter(max_width=2048, max_height=2048).run({"frame": image})
629
+ ```
630
+
631
+ ---
632
+
633
+ ## Augmentation
634
+
635
+ Augmentation components apply stochastic or deterministic transforms for training-time
636
+ variation. Every component exposes the same `.run(input)` interface.
637
+
638
+ ```python
639
+ import cv2
640
+ image = cv2.imread("images/github/sample.jpg")
641
+ ```
642
+
643
+ ### Import Path
644
+
645
+ ```python
646
+ from ai_vision_tool.augmentation import (
647
+ Flip, Rotate90, Crop, Rotation, Shear, Translate,
648
+ RandomResize, RandomScale, RandomCrop, RandomResizedCrop, RandomPadding,
649
+ AffineTransform, PerspectiveTransform, ElasticTransform,
650
+ GridDistortion, OpticalDistortion,
651
+ Brightness, Exposure, Hue, Saturation, Greyscale,
652
+ ColorJitter, RandomGamma, RandomBrightnessContrast,
653
+ RandomShadow, RandomSunFlare, RandomFog, RandomRain, RandomSnow,
654
+ ChannelShuffle, RGBShift, HSVShift, ToSepia, InvertImage,
655
+ Blur, GaussianBlur, MedianBlur, GlassBlur, DefocusBlur,
656
+ ZoomBlur, MotionBlur, CameraGain,
657
+ Emboss, Posterize, Solarize, Equalize,
658
+ CompressionArtifacts, JPEGCompression, Downscale, Superpixel,
659
+ Noise, ISONoise, MultiplicativeNoise, SaltPepperNoise,
660
+ CoarseDropout, GridDropout, RandomErasing, PixelDropout, MaskDropout,
661
+ Cutout, Mosaic, Mosaic9, MixUp, CutMix,
662
+ CopyPaste, ObjectPaste, RandomOcclusion, BoundingBoxJitter,
663
+ )
664
+ ```
665
+
666
+ ### Geometric and Spatial
667
+
668
+ ```python
669
+ from ai_vision_tool.augmentation import Flip, Rotate90, Rotation, Shear
670
+
671
+ result = Flip(horizontal=True).run(image)
672
+ result = Rotate90(k=1).run(image)
673
+ result = Rotation(angle=12.0, expand=False, border_mode="constant").run(image)
674
+ result = Shear(shear_x=0.15).run(image)
675
+ ```
676
+
677
+ **`RandomResizedCrop`** — Random crop + resize (equivalent to torchvision).
678
+
679
+ ```python
680
+ from ai_vision_tool.augmentation import RandomResizedCrop
681
+
682
+ result = RandomResizedCrop(
683
+ output_width=224, output_height=224, scale_min=0.08, scale_max=1.0
684
+ ).run(image)
685
+ ```
686
+
687
+ **`AffineTransform`** — Combined rotate/scale/translate/shear in one pass.
688
+
689
+ ```python
690
+ from ai_vision_tool.augmentation import AffineTransform
691
+
692
+ result = AffineTransform(angle=8.0, scale=1.0, translate_x=10.0, shear_x=0.05).run(image)
693
+ ```
694
+
695
+ **`ElasticTransform`** / **`GridDistortion`** / **`OpticalDistortion`** — Spatial warping.
696
+
697
+ ```python
698
+ from ai_vision_tool.augmentation import ElasticTransform, GridDistortion, OpticalDistortion
699
+
700
+ result = ElasticTransform(alpha=3.0, sigma=1.0).run(image)
701
+ result = GridDistortion(num_steps=5, distort_limit=0.2).run(image)
702
+ result = OpticalDistortion(k=0.00001).run(image)
703
+ ```
704
+
705
+ ### Lighting, Color, and Weather
706
+
707
+ ```python
708
+ from ai_vision_tool.augmentation import (
709
+ ColorJitter, RandomShadow, RandomFog, RandomRain
710
+ )
711
+
712
+ result = ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=8).run(image)
713
+ result = RandomShadow(shadow_dimension=0.5, intensity=0.5).run(image)
714
+ result = RandomFog(alpha=0.2).run(image)
715
+ result = RandomRain(drops=40, drop_length=12, intensity=0.25).run(image)
716
+ ```
717
+
718
+ ### Blur, Compression, and Texture
719
+
720
+ ```python
721
+ from ai_vision_tool.augmentation import (
722
+ GaussianBlur, MotionBlur, DefocusBlur, JPEGCompression, Superpixel
723
+ )
724
+
725
+ result = GaussianBlur(kernel_size=5, sigma_x=1.0).run(image)
726
+ result = MotionBlur(kernel_size=11, angle=25.0).run(image)
727
+ result = DefocusBlur(radius=5).run(image)
728
+ result = JPEGCompression(quality=40).run(image)
729
+ result = Superpixel(region_size=10).run(image)
730
+ ```
731
+
732
+ ### Noise and Dropout
733
+
734
+ ```python
735
+ from ai_vision_tool.augmentation import (
736
+ Noise, ISONoise, CoarseDropout, GridDropout
737
+ )
738
+
739
+ result = Noise(mode="gaussian", mean=0.0, stddev=8.0).run(image)
740
+ result = ISONoise(color_shift=0.01, intensity=0.5).run(image)
741
+ result = CoarseDropout(holes=8, max_height=8, max_width=8).run(image)
742
+ result = GridDropout(ratio=0.5, unit_size=8).run(image)
743
+ ```
744
+
745
+ ### Multi-Image and Annotation-Aware
746
+
747
+ ```python
748
+ import cv2
749
+ from ai_vision_tool.augmentation import MixUp, CutMix, Mosaic, BoundingBoxJitter
750
+
751
+ image_b = cv2.imread("images/github/sample.jpg")
752
+
753
+ result = MixUp(alpha=0.5).run({"frame": image, "mix_image": image_b})
754
+ result = CutMix(alpha=0.5).run({"frame": image, "mix_image": image_b})
755
+
756
+ tiles = [image] * 3
757
+ result = Mosaic(output_size=(640, 640), mosaic_images=tiles).run(image)
758
+
759
+ payload = {"frame": image, "bboxes": [(10, 10, 100, 60)]}
760
+ result = BoundingBoxJitter(x_jitter=0.05, y_jitter=0.05, size_jitter=0.1).run(payload)
761
+ ```
762
+
763
+ ### Batch Processing
764
+
765
+ ```python
766
+ from ai_vision_tool.augmentation import Flip
767
+
768
+ results = Flip(horizontal=True).run([image, image, image]) # list → list
769
+ ```
770
+
771
+ ### Augmentation Profile (JSON)
772
+
773
+ ```json
774
+ [
775
+ {"name": "RandomResizedCrop", "params": {"output_width": 256, "output_height": 256}},
776
+ {"name": "ColorJitter", "params": {"brightness": 0.2, "contrast": 0.2}},
777
+ {"name": "GaussianBlur", "params": {"kernel_size": 5, "sigma_x": 1.0}}
778
+ ]
779
+ ```
780
+
781
+ ```bash
782
+ ai-vision-tool --augmentation-config examples/augmentation_profile.json
783
+ ```
784
+
785
+ ---
786
+
787
+ ## Pipeline
788
+
789
+ `AIVisionPipeline` implements a Chain of Responsibility pattern.
790
+
791
+ ```python
792
+ import cv2
793
+ from ai_vision_tool.pipelines import AIVisionPipeline
794
+ from ai_vision_tool.preprocessing import AutoOrient, Resize
795
+ from ai_vision_tool.augmentation import Flip, ColorJitter
796
+ from ai_vision_tool.visualization import FrameAnnotator
797
+ from ai_vision_tool.capture import MotionDetector
798
+
799
+ image = cv2.imread("images/github/sample.jpg")
800
+
801
+ pipeline = (
802
+ AIVisionPipeline()
803
+ .add(AutoOrient(rotation=90))
804
+ .add(Resize(width=640, height=640))
805
+ .add(Flip(horizontal=True))
806
+ .add(ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=5))
807
+ .add(MotionDetector())
808
+ .add(FrameAnnotator())
809
+ )
810
+
811
+ result = pipeline.execute(
812
+ initial_data={"frame": image, "annotations": []},
813
+ global_config={"min_area": 800},
814
+ )
815
+ output_frame = result["frame"]
816
+ ```
817
+
818
+ ---
819
+
820
+ ## Detection
821
+
822
+ Detection components output `data["bboxes"]` (list of dicts with `x1/y1/x2/y2/label/conf`).
823
+
824
+ ```python
825
+ import cv2
826
+ image = cv2.imread("images/github/sample.jpg")
827
+ ```
828
+
829
+ ### ObjectDetector
830
+
831
+ YOLO (ultralytics) or ONNX backend with greedy NMS fallback.
832
+
833
+ ```python
834
+ from ai_vision_tool.detection import ObjectDetector
835
+
836
+ detector = ObjectDetector(
837
+ model_path="yolov8n.pt", # or "model.onnx"
838
+ conf_threshold=0.25,
839
+ iou_threshold=0.45,
840
+ backend="yolo", # "yolo" | "onnx"
841
+ class_names=None, # auto-loaded from ultralytics
842
+ )
843
+ result = detector.run({"frame": image})
844
+ print(result["bboxes"]) # [{"x1": ..., "y1": ..., "x2": ..., "y2": ..., "label": ..., "conf": ...}]
845
+ print(result["detection_count"])
846
+ ```
847
+
848
+ ### FaceDetector
849
+
850
+ OpenCV Haar cascade (bundled with OpenCV) or MediaPipe.
851
+
852
+ ```python
853
+ from ai_vision_tool.detection import FaceDetector
854
+
855
+ detector = FaceDetector(
856
+ backend="opencv", # "opencv" | "mediapipe"
857
+ conf_threshold=0.5,
858
+ min_face_size=20,
859
+ )
860
+ result = detector.run({"frame": image})
861
+ print(result["faces"]) # same schema as bboxes + "face_id" key
862
+ print(result["bboxes"]) # unified bbox list
863
+ ```
864
+
865
+ ### KeypointDetector
866
+
867
+ MediaPipe 33-landmark pose with pixel coordinates, or YOLO-pose.
868
+
869
+ ```python
870
+ from ai_vision_tool.detection import KeypointDetector
871
+
872
+ detector = KeypointDetector(
873
+ backend="mediapipe", # "mediapipe" | "yolo_pose"
874
+ model_complexity=1,
875
+ )
876
+ result = detector.run({"frame": image})
877
+ print(result["poses"]) # list of {"keypoints": [{x, y, z, visibility, name}, ...]}
878
+ ```
879
+
880
+ ### TextDetector
881
+
882
+ EasyOCR, PaddleOCR, or EAST placeholder.
883
+
884
+ ```python
885
+ from ai_vision_tool.detection import TextDetector
886
+
887
+ detector = TextDetector(
888
+ backend="easyocr", # "easyocr" | "paddleocr" | "east"
889
+ conf_threshold=0.5,
890
+ languages=["en"],
891
+ )
892
+ result = detector.run({"frame": image})
893
+ print(result["text_regions"]) # [{"x1", "y1", "x2", "y2", "text", "conf"}]
894
+ ```
895
+
896
+ ### AnomalyDetector
897
+
898
+ Statistical z-score histogram, PatchCore (HOG + NearestNeighbors), or PCA approximation.
899
+
900
+ ```python
901
+ from ai_vision_tool.detection import AnomalyDetector
902
+
903
+ detector = AnomalyDetector(
904
+ method="statistical", # "statistical" | "patchcore" | "pca"
905
+ window=30, # warmup frames for baseline
906
+ threshold=2.0,
907
+ )
908
+ # Feed frames sequentially — detector builds baseline during warmup
909
+ result = detector.run({"frame": image})
910
+ print(result["anomaly_score"])
911
+ print(result["is_anomaly"]) # bool
912
+ print(result["anomaly_map"]) # spatial heatmap (numpy array)
913
+ ```
914
+
915
+ ---
916
+
917
+ ## Tracking
918
+
919
+ Tracking components extend detection output with persistent `track_id` per object.
920
+ Input: `data["bboxes"]` from a detector. Output: `data["tracks"]`.
921
+
922
+ ### ByteTracker
923
+
924
+ State-of-the-art two-stage association: high-confidence detections first, then
925
+ low-confidence detections vs. unmatched tracks (Zhang et al. 2022).
926
+
927
+ ```python
928
+ from ai_vision_tool.detection import ObjectDetector
929
+ from ai_vision_tool.tracking import ByteTracker
930
+ from ai_vision_tool.pipelines import AIVisionPipeline
931
+
932
+ pipeline = (
933
+ AIVisionPipeline()
934
+ .add(ObjectDetector(model_path="yolov8n.pt", conf_threshold=0.25))
935
+ .add(ByteTracker(
936
+ track_thresh=0.5,
937
+ track_buffer=30, # frames to keep a lost track
938
+ match_thresh=0.8,
939
+ ))
940
+ )
941
+
942
+ result = pipeline.execute(initial_data={"frame": image}, global_config={})
943
+ for track in result["tracks"]:
944
+ print(track["track_id"], track["label"], track["x1"], track["y1"])
945
+ ```
946
+
947
+ ### DeepSORTTracker
948
+
949
+ HOG-based re-identification embedding with cosine distance. Drop-in replacement for
950
+ ByteTracker; use when identity consistency across long occlusions matters.
951
+
952
+ ```python
953
+ from ai_vision_tool.tracking import DeepSORTTracker
954
+
955
+ tracker = DeepSORTTracker(
956
+ max_age=30,
957
+ min_hits=3,
958
+ iou_threshold=0.3,
959
+ embedding_method="hog", # "hog" | "osnet_onnx"
960
+ )
961
+ result = tracker.run({"frame": image, "bboxes": [...]})
962
+ print(result["tracks"])
963
+ ```
964
+
965
+ ### ReIDExtractor
966
+
967
+ Extract appearance embeddings for gallery-matching workflows.
968
+
969
+ ```python
970
+ from ai_vision_tool.tracking import ReIDExtractor
971
+
972
+ extractor = ReIDExtractor(method="hog", embedding_dim=128)
973
+ result = extractor.run({"frame": image, "bboxes": [...]})
974
+ print(result["embeddings"]) # list of float arrays, one per bbox
975
+ ```
976
+
977
+ ### TrackManager
978
+
979
+ Low-level track lifecycle management. Used internally by ByteTracker and DeepSORTTracker
980
+ but accessible directly for custom tracking logic.
981
+
982
+ ```python
983
+ from ai_vision_tool.tracking import TrackManager
984
+
985
+ tm = TrackManager(max_age=30, min_hits=3, iou_threshold=0.3)
986
+ tracks = tm.update(bboxes_list, frame_id=42)
987
+ ```
988
+
989
+ ### KalmanFilter
990
+
991
+ 7-state (cx, cy, s, r, vx, vy, vs) Kalman filter used by both built-in trackers.
992
+
993
+ ```python
994
+ from ai_vision_tool.tracking import KalmanFilter
995
+
996
+ kf = KalmanFilter()
997
+ mean, cov = kf.initiate([x1, y1, x2, y2])
998
+ mean, cov = kf.predict(mean, cov)
999
+ mean, cov = kf.update(mean, cov, [x1, y1, x2, y2])
1000
+ ```
1001
+
1002
+ ---
1003
+
1004
+ ## Segmentation
1005
+
1006
+ Segmentation components produce pixel-level masks. All follow the same component interface.
1007
+
1008
+ ### SemanticSegmenter
1009
+
1010
+ ONNX, OpenCV DNN, or TorchScript backend. Defaults to VOC-21 class names.
1011
+
1012
+ ```python
1013
+ from ai_vision_tool.segmentation import SemanticSegmenter
1014
+
1015
+ segmenter = SemanticSegmenter(
1016
+ model_path="deeplabv3.onnx",
1017
+ backend="onnx", # "onnx" | "opencv_dnn" | "torch"
1018
+ num_classes=21,
1019
+ input_size=(513, 513),
1020
+ )
1021
+ result = segmenter.run({"frame": image})
1022
+ print(result["seg_map"]) # (H, W) class index array
1023
+ print(result["seg_overlay"]) # colorized overlay on original frame
1024
+ print(result["masks"]) # list of per-class binary masks
1025
+ ```
1026
+
1027
+ ### InstanceSegmenter
1028
+
1029
+ YOLO-seg mask output resized to original frame size.
1030
+
1031
+ ```python
1032
+ from ai_vision_tool.segmentation import InstanceSegmenter
1033
+
1034
+ segmenter = InstanceSegmenter(
1035
+ model_path="yolov8n-seg.pt",
1036
+ backend="yolo",
1037
+ conf_threshold=0.25,
1038
+ )
1039
+ result = segmenter.run({"frame": image})
1040
+ print(result["masks"]) # list of binary masks
1041
+ print(result["bboxes"]) # aligned with masks
1042
+ print(result["instance_overlay"])
1043
+ ```
1044
+
1045
+ ### PanopticSegmenter
1046
+
1047
+ Separates stuff (background) and thing (object) classes.
1048
+
1049
+ ```python
1050
+ from ai_vision_tool.segmentation import PanopticSegmenter
1051
+
1052
+ segmenter = PanopticSegmenter(model_path="panoptic.onnx")
1053
+ result = segmenter.run({"frame": image})
1054
+ print(result["panoptic_map"]) # (H, W) instance-class encoded
1055
+ print(result["stuff_mask"])
1056
+ print(result["thing_mask"])
1057
+ ```
1058
+
1059
+ ### SAMSegmenter
1060
+
1061
+ Segment Anything Model — point, box, and auto-everything prompts.
1062
+
1063
+ ```python
1064
+ from ai_vision_tool.segmentation import SAMSegmenter
1065
+
1066
+ # Point prompt
1067
+ segmenter = SAMSegmenter(
1068
+ model_path="sam_vit_b.pth",
1069
+ model_type="vit_b",
1070
+ mode="point",
1071
+ device="auto",
1072
+ )
1073
+ result = segmenter.run({"frame": image, "prompt_points": [(320, 240)], "prompt_labels": [1]})
1074
+ print(result["masks"]) # list of binary masks
1075
+ print(result["iou_scores"])
1076
+
1077
+ # Auto-everything (no prompts)
1078
+ segmenter = SAMSegmenter(model_path="sam_vit_b.pth", mode="auto")
1079
+ result = segmenter.run({"frame": image})
1080
+ print(result["masks"]) # all detected segments
1081
+ ```
1082
+
1083
+ ### MaskPostProcessor
1084
+
1085
+ Morphological cleanup of segmentation masks.
1086
+
1087
+ ```python
1088
+ from ai_vision_tool.segmentation import MaskPostProcessor
1089
+
1090
+ processor = MaskPostProcessor(
1091
+ operations=["erode", "dilate", "fill_holes", "remove_small", "largest_only"],
1092
+ kernel_size=5,
1093
+ )
1094
+ result = processor.run({"frame": image, "masks": [binary_mask]})
1095
+ print(result["masks"]) # cleaned masks
1096
+ print(result["polygons"]) # polygon contours per mask
1097
+ ```
1098
+
1099
+ ---
1100
+
1101
+ ## Enhancement
1102
+
1103
+ Enhancement components restore or improve degraded images. All use the same component
1104
+ interface and fall back to pure NumPy/OpenCV if heavy deps are unavailable.
1105
+
1106
+ ### SuperResolution
1107
+
1108
+ 2× or 4× upscaling. Uses `cv2.dnn_superres` if available, then ONNX, then bicubic.
1109
+
1110
+ ```python
1111
+ from ai_vision_tool.enhancement import SuperResolution
1112
+
1113
+ sr = SuperResolution(
1114
+ scale=2,
1115
+ backend="auto", # "auto" | "opencv" | "onnx" | "bicubic"
1116
+ model_path=None, # optional ONNX or OpenCV SR model
1117
+ )
1118
+ result = sr.run({"frame": image})
1119
+ print(result["frame"].shape) # (H*2, W*2, 3)
1120
+ print(result["sr_scale"]) # 2
1121
+ print(result["sr_backend"]) # "bicubic" / "opencv" / "onnx"
1122
+ ```
1123
+
1124
+ ### Denoiser
1125
+
1126
+ Non-local means, bilateral filter, Gaussian, median, or DnCNN-ONNX.
1127
+
1128
+ ```python
1129
+ from ai_vision_tool.enhancement import Denoiser
1130
+
1131
+ result = Denoiser(method="nlmeans", strength=10.0).run({"frame": image})
1132
+ result = Denoiser(method="bilateral", strength=9.0).run({"frame": image})
1133
+ result = Denoiser(method="gaussian", strength=3.0).run({"frame": image})
1134
+ # DnCNN-ONNX
1135
+ result = Denoiser(method="dncnn", model_path="dncnn.onnx").run({"frame": image})
1136
+ print(result["denoise_method"])
1137
+ ```
1138
+
1139
+ ### Deblurrer
1140
+
1141
+ Wiener deconvolution (FFT), Richardson-Lucy iterative, unsharp mask, or NAFNet-ONNX.
1142
+
1143
+ ```python
1144
+ from ai_vision_tool.enhancement import Deblurrer
1145
+
1146
+ result = Deblurrer(method="wiener", kernel_size=5).run({"frame": image})
1147
+ result = Deblurrer(method="richardson_lucy", kernel_size=5, iterations=10).run({"frame": image})
1148
+ result = Deblurrer(method="unsharp", strength=1.0).run({"frame": image})
1149
+ result = Deblurrer(method="nafnet", model_path="nafnet.onnx").run({"frame": image})
1150
+ ```
1151
+
1152
+ ### LowLightEnhancer
1153
+
1154
+ CLAHE on LAB L-channel, gamma LUT, histogram stretch, single/multi-scale Retinex,
1155
+ Zero-DCE brightness curve approximation, or ONNX model.
1156
+
1157
+ ```python
1158
+ from ai_vision_tool.enhancement import LowLightEnhancer
1159
+
1160
+ result = LowLightEnhancer(method="clahe", clip_limit=3.0).run({"frame": image})
1161
+ result = LowLightEnhancer(method="gamma", gamma=0.5).run({"frame": image})
1162
+ result = LowLightEnhancer(method="msr").run({"frame": image}) # multi-scale Retinex
1163
+ result = LowLightEnhancer(method="zero_dce").run({"frame": image})
1164
+ result = LowLightEnhancer(method="onnx", model_path="llnet.onnx").run({"frame": image})
1165
+ ```
1166
+
1167
+ ### Colorizer
1168
+
1169
+ Zhang 2016 LAB-AB network colorization, pseudo-color (VIRIDIS), thermal (JET), or ONNX.
1170
+
1171
+ ```python
1172
+ from ai_vision_tool.enhancement import Colorizer
1173
+
1174
+ result = Colorizer(method="opencv_dnn", model_path="colorization.caffemodel").run({"frame": gray_image})
1175
+ result = Colorizer(method="pseudo_color").run({"frame": gray_image})
1176
+ result = Colorizer(method="thermal").run({"frame": gray_image})
1177
+ print(result["is_grayscale_input"]) # True if input was single-channel
1178
+ ```
1179
+
1180
+ ---
1181
+
1182
+ ## I/O
1183
+
1184
+ I/O components read images, videos, and cloud blobs, or export annotated datasets.
1185
+
1186
+ ### ImageReader / ImageWriter
1187
+
1188
+ ```python
1189
+ from ai_vision_tool.io import ImageReader, ImageWriter
1190
+
1191
+ # Read a single image
1192
+ reader = ImageReader(path="image.jpg", color_mode="bgr") # "bgr" | "rgb" | "gray"
1193
+ result = reader.run({})
1194
+ image = result["frame"]
1195
+
1196
+ # Write frames — {index}, {timestamp}, {label} tokens in filename
1197
+ writer = ImageWriter(
1198
+ output_dir="output/frames",
1199
+ filename_pattern="{index:06d}.jpg",
1200
+ quality=95,
1201
+ )
1202
+ writer.run({"frame": image})
1203
+ writer.cleanup()
1204
+ ```
1205
+
1206
+ ### VideoReader / VideoWriter
1207
+
1208
+ ```python
1209
+ from ai_vision_tool.io import VideoReader, VideoWriter
1210
+
1211
+ # Stream frames from a video file
1212
+ reader = VideoReader("video.mp4", start_frame=0, step=1)
1213
+ for payload in reader:
1214
+ if payload.get("eof"):
1215
+ break
1216
+ frame = payload["frame"]
1217
+
1218
+ # Write annotated frames to video
1219
+ writer = VideoWriter(output_path="out.mp4", fps=30.0, codec="mp4v")
1220
+ writer.run({"frame": frame})
1221
+ writer.cleanup()
1222
+ ```
1223
+
1224
+ ### CameraSource
1225
+
1226
+ Live webcam, RTSP, or HTTP stream reader.
1227
+
1228
+ ```python
1229
+ from ai_vision_tool.io import CameraSource
1230
+
1231
+ cam = CameraSource(
1232
+ source=0, # 0 = webcam, "rtsp://..." = RTSP, "http://..." = HTTP
1233
+ width=1280,
1234
+ height=720,
1235
+ fps=30.0,
1236
+ buffer_size=1,
1237
+ )
1238
+ cam.setup({})
1239
+
1240
+ payload = {"frame": None}
1241
+ result = cam.run(payload)
1242
+ frame = result["frame"]
1243
+ print(result["fps_actual"])
1244
+ cam.cleanup()
1245
+ ```
1246
+
1247
+ ### S3Source / GCSSource
1248
+
1249
+ Stream images from cloud storage as pipeline inputs.
1250
+
1251
+ ```python
1252
+ from ai_vision_tool.integrations.cloud import S3Source
1253
+
1254
+ source = S3Source(
1255
+ bucket="my-bucket",
1256
+ prefix="images/train/",
1257
+ extensions=(".jpg", ".png"),
1258
+ aws_region="ap-southeast-1",
1259
+ )
1260
+ source.setup({})
1261
+ result = source.run({}) # reads next image from bucket
1262
+ frame = result["frame"]
1263
+ print(result["s3_key"])
1264
+ ```
1265
+
1266
+ ```python
1267
+ from ai_vision_tool.integrations.cloud import GCSSource
1268
+
1269
+ source = GCSSource(
1270
+ bucket="my-gcs-bucket",
1271
+ prefix="frames/",
1272
+ credentials_path="/path/to/sa.json", # None = use ADC
1273
+ )
1274
+ result = source.run({})
1275
+ ```
1276
+
1277
+ ### DatasetExporter
1278
+
1279
+ Export detections as YOLO txt, COCO JSON, or VOC XML.
1280
+
1281
+ ```python
1282
+ from ai_vision_tool.io import DatasetExporter
1283
+
1284
+ exporter = DatasetExporter(
1285
+ output_dir="dataset/",
1286
+ format="yolo", # "yolo" | "coco" | "voc"
1287
+ split="train",
1288
+ class_names=["cat", "dog"],
1289
+ )
1290
+ exporter.run({
1291
+ "frame": image,
1292
+ "bboxes": [{"x1": 10, "y1": 20, "x2": 120, "y2": 80, "label": "cat", "conf": 0.9}],
1293
+ })
1294
+ exporter.cleanup() # flushes COCO JSON / VOC XML to disk
1295
+ ```
1296
+
1297
+ ---
1298
+
1299
+ ## Streaming
1300
+
1301
+ Streaming components connect real-time sources and sinks to pipelines.
1302
+
1303
+ ### FrameStream / DirectoryStream
1304
+
1305
+ Unified iterator over webcam index, video path, list of paths, or image directory.
1306
+
1307
+ ```python
1308
+ from ai_vision_tool.streaming import FrameStream, DirectoryStream
1309
+
1310
+ # Iterate a video
1311
+ with FrameStream("video.mp4", max_frames=100) as stream:
1312
+ for payload in stream:
1313
+ frame = payload["frame"]
1314
+
1315
+ # Iterate sorted images from a directory
1316
+ for payload in DirectoryStream("data/frames/", extensions=(".jpg", ".png")):
1317
+ frame = payload["frame"]
1318
+ ```
1319
+
1320
+ ### RTSPClient
1321
+
1322
+ Background-threaded RTSP reader with auto-reconnect.
1323
+
1324
+ ```python
1325
+ from ai_vision_tool.streaming import RTSPClient
1326
+
1327
+ client = RTSPClient(
1328
+ url="rtsp://192.168.1.10:554/stream",
1329
+ reconnect=True,
1330
+ reconnect_delay=2.0,
1331
+ max_retries=3,
1332
+ )
1333
+ client.setup({})
1334
+ result = client.run({}) # returns latest buffered frame
1335
+ frame = result["frame"]
1336
+ client.cleanup()
1337
+ ```
1338
+
1339
+ ### WebSocketSink / WebSocketSource
1340
+
1341
+ Broadcast frames as base64 JPEG over WebSocket. Falls back to MJPEG HTTP when
1342
+ `websockets` is not installed.
1343
+
1344
+ ```python
1345
+ from ai_vision_tool.integrations.streaming import WebSocketSink
1346
+
1347
+ sink = WebSocketSink(host="0.0.0.0", port=8765, quality=80)
1348
+ sink.setup({})
1349
+
1350
+ sink.run({"frame": frame}) # broadcast to all connected clients
1351
+ sink.cleanup()
1352
+ ```
1353
+
1354
+ ```python
1355
+ from ai_vision_tool.integrations.streaming import WebSocketSource
1356
+
1357
+ source = WebSocketSource(url="ws://localhost:8765")
1358
+ source.setup({})
1359
+ result = source.run({})
1360
+ frame = result["frame"]
1361
+ ```
1362
+
1363
+ ### KafkaSource / KafkaSink
1364
+
1365
+ Stream frames as base64-JPEG JSON messages through Kafka. Requires the `kafka` extra
1366
+ (`pip install "ai-vision-tool[kafka]"`).
1367
+
1368
+ ```python
1369
+ from ai_vision_tool.integrations.streaming import KafkaSink, KafkaSource
1370
+
1371
+ sink = KafkaSink(bootstrap_servers="localhost:9092", topic="vision_frames", quality=80)
1372
+ sink.setup({})
1373
+ sink.run({"frame": frame})
1374
+
1375
+ source = KafkaSource(
1376
+ bootstrap_servers="localhost:9092",
1377
+ topic="vision_frames",
1378
+ group_id="ai_vision",
1379
+ )
1380
+ source.setup({})
1381
+ result = source.run({})
1382
+ frame = result["frame"]
1383
+ ```
1384
+
1385
+ ### BufferedStream / SlidingWindowBuffer
1386
+
1387
+ Decouple producer and consumer speeds with a frame buffer.
1388
+
1389
+ ```python
1390
+ from ai_vision_tool.streaming import BufferedStream, SlidingWindowBuffer
1391
+
1392
+ # Buffer with "oldest" drop policy when full
1393
+ buf = BufferedStream(buffer_size=30, drop_policy="oldest", emit_rate=None)
1394
+ buf.run({"frame": frame}) # push frame
1395
+ result = buf.run({}) # pop frame
1396
+
1397
+ # Sliding window — yields batches of `window` frames with optional overlap
1398
+ window = SlidingWindowBuffer(window=16, overlap=8)
1399
+ window.push(frame)
1400
+ if window.ready():
1401
+ batch = window.get() # list of 16 frames
1402
+ ```
1403
+
1404
+ ---
1405
+
1406
+ ## Visualization
1407
+
1408
+ Visualization components render annotations, serve dashboards, and export annotated video.
1409
+
1410
+ ### FrameViewer
1411
+
1412
+ Display frames in a cv2 window with rolling FPS. Sets `data["stop"] = True` on `q`.
1413
+
1414
+ ```python
1415
+ from ai_vision_tool.visualization import FrameViewer
1416
+
1417
+ viewer = FrameViewer(window_name="Preview", fps_window=30)
1418
+ viewer.setup({})
1419
+
1420
+ for payload in FrameStream("video.mp4"):
1421
+ result = viewer.run(payload)
1422
+ if result.get("stop"):
1423
+ break
1424
+ viewer.cleanup()
1425
+ ```
1426
+
1427
+ ### BBoxRenderer
1428
+
1429
+ Render bounding boxes with consistent per-class colors, optional semi-transparent fill,
1430
+ and label/confidence/track-id text.
1431
+
1432
+ ```python
1433
+ from ai_vision_tool.visualization import BBoxRenderer
1434
+
1435
+ renderer = BBoxRenderer(
1436
+ thickness=2,
1437
+ font_scale=0.5,
1438
+ show_conf=True,
1439
+ show_label=True,
1440
+ show_track_id=True,
1441
+ alpha=0.25, # semi-transparent fill; 0 = no fill
1442
+ )
1443
+ result = renderer.run({
1444
+ "frame": image,
1445
+ "bboxes": [{"x1": 10, "y1": 20, "x2": 200, "y2": 150, "label": "person", "conf": 0.87}],
1446
+ })
1447
+ output = result["rendered_frame"]
1448
+ ```
1449
+
1450
+ ### HeatmapRenderer
1451
+
1452
+ Accumulate and overlay spatial heatmaps from detections, anomaly maps, attention, or
1453
+ optical flow.
1454
+
1455
+ ```python
1456
+ from ai_vision_tool.visualization import HeatmapRenderer
1457
+ import cv2
1458
+
1459
+ renderer = HeatmapRenderer(
1460
+ source="detections", # "detections" | "anomaly_map" | "attention" | "motion"
1461
+ colormap=cv2.COLORMAP_JET,
1462
+ alpha=0.5,
1463
+ accumulate=True, # keep cumulative density
1464
+ decay=0.95,
1465
+ )
1466
+ result = renderer.run({"frame": image, "bboxes": [...]})
1467
+ print(result["heatmap"]) # raw density float array
1468
+ print(result["heatmap_overlay"]) # blended on original frame
1469
+ ```
1470
+
1471
+ ### DashboardSink
1472
+
1473
+ Serve a live stream dashboard. Uses Gradio if installed; falls back to MJPEG HTTP.
1474
+
1475
+ ```python
1476
+ from ai_vision_tool.visualization import DashboardSink
1477
+
1478
+ sink = DashboardSink(host="0.0.0.0", port=7860, quality=80, title="Vision Dashboard")
1479
+ sink.setup({})
1480
+ # Opens http://0.0.0.0:7860/ — update by pushing frames in your loop
1481
+ sink.run({"frame": frame})
1482
+ ```
1483
+
1484
+ ### VideoAnnotationExporter
1485
+
1486
+ Write an annotated output video with optional JSON sidecar containing per-frame bbox data.
1487
+
1488
+ ```python
1489
+ from ai_vision_tool.visualization import VideoAnnotationExporter
1490
+
1491
+ exporter = VideoAnnotationExporter(
1492
+ output_path="output/annotated.mp4",
1493
+ fps=30.0,
1494
+ codec="mp4v",
1495
+ burn_annotations=True, # render bboxes/tracks onto frames
1496
+ export_json=True, # write annotated.mp4 + annotated_annotations.json
1497
+ )
1498
+ exporter.setup({})
1499
+
1500
+ for payload in FrameStream("video.mp4"):
1501
+ # payload["bboxes"] or payload["tracks"] added by upstream detector/tracker
1502
+ exporter.run(payload)
1503
+
1504
+ exporter.cleanup() # flushes video + JSON
1505
+ ```
1506
+
1507
+ ---
1508
+
1509
+ ## Capture Components
1510
+
1511
+ Stateful capture and annotation helpers. Import from their domain modules.
1512
+
1513
+ ```python
1514
+ import cv2
1515
+ image = cv2.imread("images/github/sample.jpg")
1516
+ ```
1517
+
1518
+ ### Frame Processors
1519
+
1520
+ **`FrameEnhancer`** — Brightness, contrast, sharpening, denoising in a single pass.
1521
+
1522
+ ```python
1523
+ from ai_vision_tool.enhancement import FrameEnhancer
1524
+
1525
+ result = FrameEnhancer().run(
1526
+ {"frame": image},
1527
+ {"brightness": 10, "contrast": 1.15, "sharpen": True, "denoise": False},
1528
+ )
1529
+ ```
1530
+
1531
+ **`MotionDetector`** — Detect motion regions using background subtraction.
1532
+
1533
+ ```python
1534
+ from ai_vision_tool.capture import MotionDetector
1535
+
1536
+ result = MotionDetector().run({"frame": image}, {"min_area": 800, "draw_motion": True})
1537
+ print(result["motion_boxes"])
1538
+ ```
1539
+
1540
+ **`FrameAnnotator`** — Render payload-driven annotations (text, boxes, lines).
1541
+
1542
+ ```python
1543
+ from ai_vision_tool.visualization import FrameAnnotator
1544
+
1545
+ result = FrameAnnotator().run(
1546
+ {"frame": image, "annotations": [{"type": "text", "text": "Demo", "pos": (20, 30)}]},
1547
+ {},
1548
+ )
1549
+ ```
1550
+
1551
+ ### Capture Helpers
1552
+
1553
+ ```python
1554
+ from ai_vision_tool.capture import PictureTaker, BurstPictureTaker, VideoTaker, FrameGrabber
1555
+
1556
+ PictureTaker().run(None, {"imgdir": "output/stills", "camera_id": 0})
1557
+ BurstPictureTaker(burst_count=5, interval_seconds=0.2)
1558
+ VideoTaker().run(None, {"viddir": "output/videos", "fps": 30.0})
1559
+ FrameGrabber().run("video.mp4", {"output_folder": "output/frames", "skip_frames": 90})
1560
+ ```
1561
+
1562
+ ### Dataset and Export
1563
+
1564
+ ```python
1565
+ from ai_vision_tool.io import DatasetCollector, ImageExporter
1566
+ from ai_vision_tool.capture import TimeLapseCapture
1567
+
1568
+ DatasetCollector().run(
1569
+ {"frame": image},
1570
+ {"save_sample": True, "output_dir": "output/dataset", "label": "forklift"},
1571
+ )
1572
+ TimeLapseCapture(output_dir="output/timelapse", interval_seconds=5).run({"frame": image}, {})
1573
+ ImageExporter(output_dir="output/exports").run({"frame": image}, {"export_gray": True})
1574
+ ```
1575
+
1576
+ ### Auto-Labeling
1577
+
1578
+ ```python
1579
+ from ai_vision_tool.integrations.labeling import DarknetAutoLabeler, TensorFlowAutoLabeler
1580
+
1581
+ DarknetAutoLabeler().run({"frame": image}, {"output_dir": "output/labels"})
1582
+ TensorFlowAutoLabeler().run({"frame": image}, {"output_dir": "output/labels"})
1583
+ ```
1584
+
1585
+ ---
1586
+
1587
+ ## Utilities
1588
+
1589
+ Utility classes provide shared infrastructure used across components.
1590
+
1591
+ ### ColorPalette
1592
+
1593
+ Golden-ratio hue HSV→BGR palette for consistent per-class coloring.
1594
+
1595
+ ```python
1596
+ from ai_vision_tool.utils import ColorPalette
1597
+
1598
+ palette = ColorPalette(n_colors=80, seed=42)
1599
+ color = palette.get("person") # (B, G, R) tuple, stable per label string
1600
+ color = palette[0] # by integer class index
1601
+ print(palette.as_dict()) # {label: (B, G, R), ...}
1602
+ ```
1603
+
1604
+ ### MetricsLogger / MetricsLoggerComponent
1605
+
1606
+ Thread-safe rolling metrics logger.
1607
+
1608
+ ```python
1609
+ from ai_vision_tool.utils import MetricsLogger, MetricsLoggerComponent
1610
+
1611
+ # Standalone
1612
+ logger = MetricsLogger(window=30)
1613
+ logger.tick()
1614
+ logger.log_latency(12.5) # ms
1615
+ print(logger.fps())
1616
+ print(logger.report())
1617
+
1618
+ # As a pipeline component — attaches data["metrics"] to payload
1619
+ component = MetricsLoggerComponent(window=30)
1620
+ result = component.run({"frame": image})
1621
+ print(result["metrics"]) # {"fps": ..., "mean_latency_ms": ..., "frame_count": ...}
1622
+ ```
1623
+
1624
+ ### FrameSampler
1625
+
1626
+ Throttle pipeline throughput by skipping frames.
1627
+
1628
+ ```python
1629
+ from ai_vision_tool.utils import FrameSampler
1630
+
1631
+ sampler = FrameSampler(
1632
+ every_n=3, # mode="count": process every 3rd frame
1633
+ mode="count", # "count" | "fps" | "random"
1634
+ target_fps=10.0, # mode="fps": target output rate
1635
+ prob=0.5, # mode="random": pass-through probability
1636
+ )
1637
+ result = sampler.run({"frame": image})
1638
+ print(result.get("skip")) # True → downstream should skip this frame
1639
+ ```
1640
+
1641
+ ### ImageHash
1642
+
1643
+ Perceptual hashing for duplicate detection.
1644
+
1645
+ ```python
1646
+ from ai_vision_tool.utils import ImageHash
1647
+
1648
+ hasher = ImageHash(
1649
+ method="phash", # "phash" | "ahash" | "dhash"
1650
+ hash_size=8,
1651
+ threshold=10, # Hamming distance threshold
1652
+ )
1653
+ result = hasher.run({"frame": image})
1654
+ print(result["hash"]) # hex string
1655
+ print(result["hash_distance"]) # distance to reference (if reference set)
1656
+ print(result["is_duplicate"]) # bool
1657
+ ```
1658
+
1659
+ ### DrawUtils
1660
+
1661
+ Render bboxes, masks, and keypoints from payload data.
1662
+
1663
+ ```python
1664
+ from ai_vision_tool.utils import DrawUtils
1665
+
1666
+ drawer = DrawUtils(font_scale=0.5, thickness=1, alpha=0.4)
1667
+ result = drawer.run({
1668
+ "frame": image,
1669
+ "bboxes": [{"x1": 10, "y1": 10, "x2": 200, "y2": 150, "label": "car", "conf": 0.92}],
1670
+ "masks": [binary_mask],
1671
+ "poses": [{"keypoints": [...]}],
1672
+ })
1673
+ output = result["frame"]
1674
+ ```
1675
+
1676
+ ---
1677
+
1678
+ ## Core
1679
+
1680
+ Core utilities provide device management, typed data structures, batch processing, and
1681
+ rate limiting.
1682
+
1683
+ ### Device
1684
+
1685
+ Auto-select CUDA, MPS (Apple Silicon), or CPU.
1686
+
1687
+ ```python
1688
+ from ai_vision_tool.core import Device
1689
+
1690
+ dev = Device("auto") # "auto" | "cuda" | "mps" | "cpu"
1691
+ print(dev.name) # "cuda:0" / "mps" / "cpu"
1692
+ tensor = dev.to_torch(numpy_array)
1693
+ backend = dev.to_cv_backend() # cv2 DNN target constant
1694
+
1695
+ # Singleton — shares device across the process
1696
+ default_dev = Device.default()
1697
+ ```
1698
+
1699
+ ### Data Types
1700
+
1701
+ Typed dataclasses for detections, poses, masks, and tracks.
1702
+
1703
+ ```python
1704
+ from ai_vision_tool.core import BBox, Detection, Keypoint, Pose, Mask, Track
1705
+
1706
+ bbox = BBox(x1=10, y1=20, x2=100, y2=80, label="car", conf=0.9)
1707
+ print(bbox.iou(BBox(x1=15, y1=25, x2=110, y2=85)))
1708
+ print(bbox.to_xywh())
1709
+ print(bbox.clip(width=640, height=480).as_dict())
1710
+
1711
+ mask = Mask(data=binary_array, label="person")
1712
+ polygon = mask.to_polygon() # contour points
1713
+
1714
+ track = Track(track_id=7, bbox=bbox, state="active", age=12)
1715
+ ```
1716
+
1717
+ ### BatchProcessor
1718
+
1719
+ Process image directories or lists in parallel.
1720
+
1721
+ ```python
1722
+ from ai_vision_tool.core import BatchProcessor
1723
+ from ai_vision_tool.pipelines import AIVisionPipeline
1724
+ from ai_vision_tool.preprocessing import Resize
1725
+
1726
+ pipeline = AIVisionPipeline().add(Resize(width=640, height=640))
1727
+
1728
+ processor = BatchProcessor(pipeline, batch_size=8, num_workers=4)
1729
+ results = processor.process([image_a, image_b, image_c])
1730
+ results = processor.process_directory("data/images/", extensions=(".jpg", ".png"))
1731
+ ```
1732
+
1733
+ ### Scheduler / RateLimiter
1734
+
1735
+ Token-bucket rate limiting. `Scheduler` is a pipeline component that skips or blocks
1736
+ frames to enforce a target FPS. `RateLimiter` is a standalone utility.
1737
+
1738
+ ```python
1739
+ from ai_vision_tool.core import Scheduler, RateLimiter
1740
+
1741
+ scheduler = Scheduler(target_fps=10.0, drop_policy="skip") # "skip" | "block"
1742
+ result = scheduler.run({"frame": image})
1743
+ if result.get("skip"):
1744
+ continue
1745
+
1746
+ limiter = RateLimiter(calls_per_second=5.0)
1747
+ limiter.acquire() # blocks until token available
1748
+ ```
1749
+
1750
+ ### MemoryManager / GPUMemoryTracker
1751
+
1752
+ Pre-allocated buffer pool for zero-copy frame passing.
1753
+
1754
+ ```python
1755
+ from ai_vision_tool.core import MemoryManager, GPUMemoryTracker
1756
+
1757
+ pool = MemoryManager(pool_size=10, shape=(720, 1280, 3))
1758
+ buf = pool.acquire() # numpy array from pool
1759
+ # ... fill buf ...
1760
+ pool.release(buf)
1761
+
1762
+ with pool.context() as buf: # auto-release on exit
1763
+ buf[:] = frame
1764
+
1765
+ tracker = GPUMemoryTracker()
1766
+ tracker.snapshot()
1767
+ print(tracker.delta_mb())
1768
+ ```
1769
+
1770
+ ---
1771
+
1772
+ ## Configuration
1773
+
1774
+ Configuration utilities manage YAML/JSON configs, component discovery, and environment
1775
+ variable injection.
1776
+
1777
+ ### YAMLConfig
1778
+
1779
+ ```python
1780
+ from ai_vision_tool.config import YAMLConfig
1781
+
1782
+ cfg = YAMLConfig("config/pipeline.yaml")
1783
+ fps = cfg.get("stream.fps", default=30)
1784
+ cfg.merge({"stream": {"fps": 25}})
1785
+ cfg.validate(schema={"stream": {"fps": int}})
1786
+ cfg.reload() # re-read file on disk
1787
+ ```
1788
+
1789
+ ### JSONConfig
1790
+
1791
+ ```python
1792
+ from ai_vision_tool.config import JSONConfig
1793
+
1794
+ cfg = JSONConfig("config/settings.json")
1795
+ cfg.set("model.threshold", 0.3)
1796
+ cfg.save()
1797
+
1798
+ cfg2 = JSONConfig.from_dict({"model": {"threshold": 0.5}})
1799
+ ```
1800
+
1801
+ ### ComponentRegistry
1802
+
1803
+ Singleton registry. Supports decorator-style registration and config-driven `build()`.
1804
+
1805
+ ```python
1806
+ from ai_vision_tool.config import ComponentRegistry
1807
+
1808
+ registry = ComponentRegistry()
1809
+
1810
+ @registry.register("MyPreprocessor")
1811
+ class MyPreprocessor:
1812
+ ...
1813
+
1814
+ # Build by name (auto-registers all ai_vision_tool exports)
1815
+ component = registry.build("Resize", width=640, height=640)
1816
+
1817
+ # Build a pipeline from a list of dicts
1818
+ pipeline = registry.build_from_config([
1819
+ {"name": "Resize", "params": {"width": 640, "height": 640}},
1820
+ {"name": "Flip", "params": {"horizontal": True}},
1821
+ ])
1822
+ ```
1823
+
1824
+ ### ProfileLoader
1825
+
1826
+ Load named profiles from YAML/JSON files in search paths.
1827
+
1828
+ ```python
1829
+ from ai_vision_tool.config import ProfileLoader
1830
+
1831
+ loader = ProfileLoader(search_paths=["profiles/", "~/.ai_vision/"])
1832
+ profile = loader.load("augmentation_heavy") # loads augmentation_heavy.yaml
1833
+ pipeline = loader.load_pipeline("detection_rtsp") # builds AIVisionPipeline
1834
+ loader.save_profile({"name": "custom"}, "profiles/custom.yaml")
1835
+ ```
1836
+
1837
+ ### EnvConfig
1838
+
1839
+ Read configuration from environment variables with type casting.
1840
+
1841
+ ```python
1842
+ from ai_vision_tool.config import EnvConfig
1843
+ import os
1844
+
1845
+ os.environ["AI_VISION_DEVICE"] = "cuda"
1846
+ os.environ["AI_VISION_API_PORT"] = "8080"
1847
+
1848
+ env = EnvConfig(prefix="AI_VISION")
1849
+ device = env.get("DEVICE", default="cpu") # → "cuda"
1850
+ port = env.get("API_PORT", cast=int, default=8300) # → 8080
1851
+ env.require("MODEL_PATH") # raises if missing
1852
+
1853
+ print(env.device) # shorthand property
1854
+ print(env.api_port)
1855
+ ```
1856
+
1857
+ ---
1858
+
1859
+ ## Models
1860
+
1861
+ Model runners, registry, downloader, and benchmarking utilities.
1862
+
1863
+ ### ModelRegistry
1864
+
1865
+ JSON-cached model registry stored at `~/.cache/ai_vision_tool/model_registry.json`.
1866
+
1867
+ ```python
1868
+ from ai_vision_tool.models import ModelRegistry
1869
+
1870
+ registry = ModelRegistry()
1871
+ registry.register("yolov8n", path="/models/yolov8n.pt", format="torch", tags=["detection"])
1872
+ component = registry.load("yolov8n") # returns TorchModel / ONNXModel / TFLiteModel
1873
+ component.setup({})
1874
+
1875
+ component2 = registry.from_huggingface("Salesforce/blip-image-captioning-base")
1876
+ ```
1877
+
1878
+ ### ONNXModel
1879
+
1880
+ Run any ONNX model as a pipeline component.
1881
+
1882
+ ```python
1883
+ from ai_vision_tool.models import ONNXModel
1884
+
1885
+ model = ONNXModel(
1886
+ model_path="model.onnx",
1887
+ input_name=None, # auto-detected
1888
+ input_size=(640, 640),
1889
+ providers=None, # ["CUDAExecutionProvider", "CPUExecutionProvider"]
1890
+ )
1891
+ result = model.run({"frame": image})
1892
+ print(result["model_output"]) # raw ONNX output arrays
1893
+ print(result["model_name"])
1894
+ ```
1895
+
1896
+ ### TorchModel
1897
+
1898
+ Run a TorchScript model as a pipeline component.
1899
+
1900
+ ```python
1901
+ from ai_vision_tool.models import TorchModel
1902
+
1903
+ model = TorchModel(
1904
+ model_path="model.torchscript",
1905
+ device="auto",
1906
+ half_precision=False,
1907
+ )
1908
+ result = model.run({"frame": image})
1909
+ print(result["model_output"])
1910
+ ```
1911
+
1912
+ ### TFLiteModel
1913
+
1914
+ Run a TFLite model (tflite-runtime or tensorflow fallback).
1915
+
1916
+ ```python
1917
+ from ai_vision_tool.models import TFLiteModel
1918
+
1919
+ model = TFLiteModel(model_path="model.tflite", num_threads=4)
1920
+ result = model.run({"frame": image})
1921
+ print(result["model_output"])
1922
+ print(result["inference_time_ms"])
1923
+ ```
1924
+
1925
+ ### ModelDownloader
1926
+
1927
+ Download models with progress callback and SHA256 verification.
1928
+
1929
+ ```python
1930
+ from ai_vision_tool.models import ModelDownloader
1931
+
1932
+ downloader = ModelDownloader(cache_dir="~/.cache/ai_vision_tool/models")
1933
+ path = downloader.download(
1934
+ url="https://example.com/model.onnx",
1935
+ sha256="abc123...",
1936
+ filename="model.onnx",
1937
+ progress=True,
1938
+ )
1939
+ hf_path = downloader.from_huggingface(
1940
+ repo_id="microsoft/resnet-50",
1941
+ filename="pytorch_model.bin",
1942
+ )
1943
+ ```
1944
+
1945
+ ### ModelBenchmark
1946
+
1947
+ Latency and memory profiling with p50/p95/p99 percentiles.
1948
+
1949
+ ```python
1950
+ from ai_vision_tool.models import ModelBenchmark, ONNXModel
1951
+
1952
+ model = ONNXModel(model_path="model.onnx")
1953
+ bench = ModelBenchmark(model, warmup_runs=5, benchmark_runs=100)
1954
+
1955
+ latency_report = bench.run({"frame": image})
1956
+ # {"p50_ms": ..., "p95_ms": ..., "p99_ms": ..., "mean_ms": ..., "fps": ...}
1957
+
1958
+ memory_report = bench.run_memory({"frame": image})
1959
+ # {"peak_mb": ..., "current_mb": ...}
1960
+
1961
+ bench.print_report() # ASCII table to stdout
1962
+ ```
1963
+
1964
+ ---
1965
+
1966
+ ## Prebuilt Pipelines
1967
+
1968
+ `PrebuiltPipelines` provides factory classmethods that instantiate common pipeline
1969
+ configurations. All return an `AIVisionPipeline` ready for `.execute()`.
1970
+
1971
+ ```python
1972
+ from ai_vision_tool.pipelines import PrebuiltPipelines
1973
+ import cv2
1974
+
1975
+ image = cv2.imread("images/github/sample.jpg")
1976
+ ```
1977
+
1978
+ ### Detection Pipeline
1979
+
1980
+ ```python
1981
+ pipeline = PrebuiltPipelines.detection_pipeline(
1982
+ model_path="yolov8n.pt",
1983
+ conf_threshold=0.25,
1984
+ render=True,
1985
+ )
1986
+ result = pipeline.execute(initial_data={"frame": image}, global_config={})
1987
+ print(result["bboxes"])
1988
+ print(result["rendered_frame"])
1989
+ ```
1990
+
1991
+ ### Augmentation Pipeline
1992
+
1993
+ Loads from an augmentation JSON profile.
1994
+
1995
+ ```python
1996
+ pipeline = PrebuiltPipelines.augmentation_pipeline(profile="examples/augmentation_profile.json")
1997
+ result = pipeline.execute(initial_data={"frame": image}, global_config={})
1998
+ ```
1999
+
2000
+ ### Preprocessing Pipeline
2001
+
2002
+ Standard resize + normalize + quality check chain.
2003
+
2004
+ ```python
2005
+ pipeline = PrebuiltPipelines.preprocessing_pipeline(width=640, height=640)
2006
+ result = pipeline.execute(initial_data={"frame": image}, global_config={})
2007
+ ```
2008
+
2009
+ ### Tracking Pipeline
2010
+
2011
+ Detection + ByteTracker + BBoxRenderer.
2012
+
2013
+ ```python
2014
+ pipeline = PrebuiltPipelines.tracking_pipeline(
2015
+ model_path="yolov8n.pt",
2016
+ conf_threshold=0.25,
2017
+ )
2018
+ result = pipeline.execute(initial_data={"frame": image}, global_config={})
2019
+ print(result["tracks"])
2020
+ ```
2021
+
2022
+ ### Enhancement Pipeline
2023
+
2024
+ Low-light enhancement + super-resolution.
2025
+
2026
+ ```python
2027
+ pipeline = PrebuiltPipelines.enhancement_pipeline(enhance_method="clahe", sr_scale=2)
2028
+ result = pipeline.execute(initial_data={"frame": image}, global_config={})
2029
+ ```
2030
+
2031
+ ### PipelineSerializer
2032
+
2033
+ Save and reload a pipeline configuration to/from YAML or JSON.
2034
+
2035
+ ```python
2036
+ from ai_vision_tool.pipelines import PipelineSerializer
2037
+ from ai_vision_tool.pipelines import AIVisionPipeline
2038
+ from ai_vision_tool.preprocessing import Resize
2039
+ from ai_vision_tool.augmentation import Flip
2040
+
2041
+ pipeline = AIVisionPipeline().add(Resize(width=640, height=640)).add(Flip(horizontal=True))
2042
+
2043
+ serializer = PipelineSerializer()
2044
+ config_dict = serializer.to_dict(pipeline)
2045
+ serializer.save(pipeline, "pipeline.yaml")
2046
+
2047
+ pipeline2 = serializer.load("pipeline.yaml")
2048
+ result = pipeline2.execute(initial_data={"frame": image}, global_config={})
2049
+ ```
2050
+
2051
+ ### AsyncPipeline
2052
+
2053
+ Execute pipeline steps concurrently using `asyncio` + `run_in_executor`.
2054
+
2055
+ ```python
2056
+ import asyncio
2057
+ from ai_vision_tool.pipelines import AsyncPipeline
2058
+ from ai_vision_tool.preprocessing import Resize
2059
+ from ai_vision_tool.augmentation import Flip
2060
+
2061
+ async def main():
2062
+ apipe = AsyncPipeline(
2063
+ components=[Resize(width=640, height=640), Flip(horizontal=True)],
2064
+ global_config={},
2065
+ )
2066
+ result = await apipe.execute({"frame": image})
2067
+
2068
+ # Process multiple frames concurrently
2069
+ results = await apipe.execute_batch([{"frame": image}] * 8)
2070
+
2071
+ # Async generator for streaming
2072
+ async for result in apipe.stream([{"frame": image}] * 100):
2073
+ print(result["frame"].shape)
2074
+
2075
+ asyncio.run(main())
2076
+ ```
2077
+
2078
+ ### ParallelPipeline / FanOutPipeline
2079
+
2080
+ Branch into independent sub-pipelines and merge results.
2081
+
2082
+ ```python
2083
+ from ai_vision_tool.pipelines import ParallelPipeline, FanOutPipeline
2084
+ from ai_vision_tool.pipelines.parallel_pipeline import merge_bboxes
2085
+ from ai_vision_tool.detection import ObjectDetector, FaceDetector
2086
+
2087
+ # Two independent detector branches merged
2088
+ parallel = ParallelPipeline(
2089
+ branches=[
2090
+ [ObjectDetector(model_path="yolov8n.pt")],
2091
+ [FaceDetector(backend="opencv")],
2092
+ ],
2093
+ merge_fn=merge_bboxes, # or "first" | "vote" | custom callable
2094
+ )
2095
+ result = parallel.execute({"frame": image})
2096
+
2097
+ # Shared preprocessing → parallel branches
2098
+ from ai_vision_tool.preprocessing import Resize
2099
+
2100
+ fanout = FanOutPipeline(
2101
+ shared=[Resize(width=640, height=640)],
2102
+ branches=[
2103
+ [ObjectDetector(model_path="yolov8n.pt")],
2104
+ [FaceDetector()],
2105
+ ],
2106
+ )
2107
+ result = fanout.execute({"frame": image})
2108
+ ```
2109
+
2110
+ ---
2111
+
2112
+ ## Capture Templates
2113
+
2114
+ Capture templates are standalone helper functions for quick image display or live video
2115
+ loops without building a full pipeline.
2116
+
2117
+ **`image_template`** — Display a still image with optional custom frame logic.
2118
+
2119
+ ```python
2120
+ from ai_vision_tool.capture.image_template import image_template
2121
+
2122
+ image_template(
2123
+ image_path="images/github/sample.jpg",
2124
+ custom_logic=lambda frame: frame,
2125
+ window_name="Preview",
2126
+ resolution=(1280, 720),
2127
+ )
2128
+ ```
2129
+
2130
+ **`video_capture_template`** — Run a live webcam loop with custom per-frame logic.
2131
+
2132
+ ```python
2133
+ from ai_vision_tool.capture.video_template import video_capture_template
2134
+
2135
+ video_capture_template(
2136
+ video_source=0,
2137
+ custom_logic=lambda frame: frame,
2138
+ window_name="Live",
2139
+ resolution=(1280, 720),
2140
+ enable_recording=False,
2141
+ enable_screenshot=True,
2142
+ )
2143
+ ```
2144
+
2145
+ **`save_screenshot`** — Save a frame to disk from within a template loop.
2146
+
2147
+ ```python
2148
+ from ai_vision_tool.capture.video_template import save_screenshot
2149
+
2150
+ save_screenshot(frame, output_dir="output/screenshots", prefix="capture")
2151
+ ```
2152
+
2153
+ ---
2154
+
2155
+ ## CLI Reference
2156
+
2157
+ ### Process a Local Image File
2158
+
2159
+ ```bash
2160
+ ai-vision-tool \
2161
+ --process-image-path \
2162
+ --component-category preprocessing \
2163
+ --component-name AutoOrient \
2164
+ --image-path images/github/sample.jpg \
2165
+ --init-args-json '{"rotation": 90}' \
2166
+ --save-output-image output/oriented.png
2167
+
2168
+ ai-vision-tool \
2169
+ --process-image-path \
2170
+ --component-category augmentation \
2171
+ --component-name Flip \
2172
+ --image-path images/github/sample.jpg \
2173
+ --init-args-json '{"horizontal": true}' \
2174
+ --save-output-image output/flipped.png
2175
+ ```
2176
+
2177
+ ### Browse Built-In Examples
2178
+
2179
+ ```bash
2180
+ ai-vision-tool --show-examples
2181
+ ai-vision-tool --show-examples --example-category preprocessing
2182
+ ai-vision-tool --show-examples --example-name GaussianBlur
2183
+ ```
2184
+
2185
+ ### Webcam Application
2186
+
2187
+ ```bash
2188
+ ai-vision-tool
2189
+ ai-vision-tool --enhance --brightness 12 --contrast 1.15 --sharpen
2190
+ ai-vision-tool --flip-horizontal --rotation-angle 12 --blur --blur-kernel-size 7
2191
+ ai-vision-tool --motion --motion-area 1200 --annotate
2192
+ ai-vision-tool --augmentation-config examples/augmentation_profile.json
2193
+ ```
2194
+
2195
+ #### Webcam Hotkeys
2196
+
2197
+ | Key | Action |
2198
+ |-----|--------|
2199
+ | `p` | Capture a single processed frame |
2200
+ | `b` | Capture a burst of frames |
2201
+ | `r` | Start or stop video recording |
2202
+ | `d` | Save a dataset sample |
2203
+ | `e` | Export grayscale and edge images |
2204
+ | `o` | Save the configured ROI crop |
2205
+ | `q` | Quit |
2206
+
2207
+ ---
2208
+
2209
+ ## Component Index
2210
+
2211
+ ### Preprocessing
2212
+
2213
+ | Component | Purpose |
2214
+ |-----------|---------|
2215
+ | `AutoOrient` | EXIF or explicit rotation correction |
2216
+ | `AutoAdjustContrast` | Adaptive, histogram, or stretch contrast |
2217
+ | `Resize` | Exact spatial resize |
2218
+ | `LetterboxResize` | Aspect-preserving resize with padding |
2219
+ | `CenterCrop` | Centre crop for model inputs |
2220
+ | `PadToSquare` | Square canvas padding |
2221
+ | `Normalize` | Normalise pixel range |
2222
+ | `Standardize` | z-score standardisation |
2223
+ | `RescalePixels` | Explicit pixel scale and offset |
2224
+ | `ConvertColorSpace` | Color-space conversion |
2225
+ | `BGRToRGB` / `RGBToBGR` | Channel-order swap |
2226
+ | `CLAHE` | Local contrast enhancement |
2227
+ | `HistogramEqualization` | Global histogram equalisation |
2228
+ | `GammaCorrection` | Gamma-based exposure tuning |
2229
+ | `WhiteBalance` | Colour cast correction |
2230
+ | `Denoise` | Sensor or compression noise reduction |
2231
+ | `Sharpen` | Edge sharpening |
2232
+ | `Deblur` | Unsharp-mask deblur |
2233
+ | `RemoveBackground` | Foreground isolation |
2234
+ | `Threshold` / `AdaptiveThreshold` | Binary thresholding |
2235
+ | `EdgeDetection` | Edge extraction |
2236
+ | `ContourExtraction` | Contour metadata generation |
2237
+ | `PerspectiveCorrection` | Document or planar rectification |
2238
+ | `Deskew` | Skew correction |
2239
+ | `AutoCrop` | Trim empty borders |
2240
+ | `FaceAlign` | Face normalisation from eye landmarks |
2241
+ | `ObjectCrop` | Bounding-box crop extraction |
2242
+ | `BoundingBoxClamp` | Clamp boxes to image bounds |
2243
+ | `BoundingBoxNormalize` | Normalise bounding boxes |
2244
+ | `MaskResize` | Payload mask resizing |
2245
+ | `ImageQualityCheck` | Blur and brightness quality flags |
2246
+ | `BlurDetection` | Blur threshold check |
2247
+ | `BrightnessCheck` | Brightness range check |
2248
+ | `DuplicateImageCheck` | Duplicate detection by hash |
2249
+ | `CorruptImageCheck` | Corrupt or empty frame check |
2250
+ | `AspectRatioFilter` | Aspect-ratio validation |
2251
+ | `MinSizeFilter` / `MaxSizeFilter` | Dimension validation |
2252
+
2253
+ ### Augmentation
2254
+
2255
+ | Component | Purpose |
2256
+ |-----------|---------|
2257
+ | `Flip` | Mirror augmentation |
2258
+ | `Rotate90` | 90-degree rotation |
2259
+ | `Crop` | Deterministic crop |
2260
+ | `Rotation` | Arbitrary-angle rotation |
2261
+ | `Shear` | Affine shear |
2262
+ | `Translate` | Spatial translation |
2263
+ | `RandomResize` / `RandomScale` | Random size/scale jitter |
2264
+ | `RandomCrop` / `RandomResizedCrop` | Random crop variants |
2265
+ | `RandomPadding` | Random padding |
2266
+ | `AffineTransform` | Combined affine transform |
2267
+ | `PerspectiveTransform` | Perspective warp |
2268
+ | `ElasticTransform` | Elastic distortion |
2269
+ | `GridDistortion` | Grid warp |
2270
+ | `OpticalDistortion` | Lens distortion |
2271
+ | `Greyscale` / `Hue` / `Saturation` / `Brightness` / `Exposure` | Color/tone adjustments |
2272
+ | `ColorJitter` | Compound color jitter |
2273
+ | `RandomGamma` / `RandomBrightnessContrast` | Randomised tone |
2274
+ | `RandomShadow` / `RandomSunFlare` / `RandomFog` / `RandomRain` / `RandomSnow` | Weather effects |
2275
+ | `ChannelShuffle` / `RGBShift` / `HSVShift` | Channel manipulation |
2276
+ | `ToSepia` / `InvertImage` | Color effects |
2277
+ | `Blur` / `GaussianBlur` / `MedianBlur` / `GlassBlur` / `DefocusBlur` / `ZoomBlur` | Blur types |
2278
+ | `MotionBlur` / `CameraGain` | Camera simulation |
2279
+ | `Emboss` / `Posterize` / `Solarize` / `Equalize` | Texture and tone effects |
2280
+ | `CompressionArtifacts` / `JPEGCompression` / `Downscale` / `Superpixel` | Degradation simulation |
2281
+ | `Noise` / `ISONoise` / `MultiplicativeNoise` / `SaltPepperNoise` | Noise types |
2282
+ | `CoarseDropout` / `GridDropout` / `RandomErasing` / `PixelDropout` / `MaskDropout` | Dropout variants |
2283
+ | `Cutout` / `Mosaic` / `Mosaic9` / `MixUp` / `CutMix` | Composition augmentations |
2284
+ | `CopyPaste` / `ObjectPaste` / `RandomOcclusion` / `BoundingBoxJitter` | Object manipulation |
2285
+
2286
+ ### Detection
2287
+
2288
+ | Component | Purpose |
2289
+ |-----------|---------|
2290
+ | `ObjectDetector` | YOLO / ONNX object detection with greedy NMS |
2291
+ | `FaceDetector` | OpenCV Haar or MediaPipe face detection |
2292
+ | `KeypointDetector` | MediaPipe / YOLO-pose 33-keypoint estimation |
2293
+ | `TextDetector` | EasyOCR / PaddleOCR text detection and recognition |
2294
+ | `AnomalyDetector` | Statistical / PatchCore / PCA anomaly scoring |
2295
+
2296
+ ### Tracking
2297
+
2298
+ | Component | Purpose |
2299
+ |-----------|---------|
2300
+ | `ByteTracker` | Two-stage high/low-confidence multi-object tracking |
2301
+ | `DeepSORTTracker` | HOG re-ID embedding + cosine distance tracking |
2302
+ | `ReIDExtractor` | Appearance embedding extraction for gallery search |
2303
+ | `TrackManager` | IoU Hungarian assignment + track lifecycle management |
2304
+ | `KalmanFilter` | 7-state SORT Kalman filter (cx, cy, s, r, vx, vy, vs) |
2305
+
2306
+ ### Segmentation
2307
+
2308
+ | Component | Purpose |
2309
+ |-----------|---------|
2310
+ | `SemanticSegmenter` | ONNX / DNN / TorchScript semantic segmentation |
2311
+ | `InstanceSegmenter` | YOLO-seg instance masks |
2312
+ | `PanopticSegmenter` | Stuff + thing panoptic segmentation |
2313
+ | `SAMSegmenter` | Segment Anything Model: point, box, auto-everything |
2314
+ | `MaskPostProcessor` | Erode/dilate/fill/largest-component/remove-small |
2315
+
2316
+ ### Enhancement
2317
+
2318
+ | Component | Purpose |
2319
+ |-----------|---------|
2320
+ | `SuperResolution` | 2× / 4× upscaling: OpenCV DNN SR / ONNX / bicubic |
2321
+ | `Denoiser` | NLM / bilateral / DnCNN-ONNX denoising |
2322
+ | `Deblurrer` | Wiener FFT / Richardson-Lucy / NAFNet-ONNX deblurring |
2323
+ | `LowLightEnhancer` | CLAHE / gamma / MSR / Zero-DCE / ONNX enhancement |
2324
+ | `Colorizer` | Zhang 2016 LAB-AB / pseudo-color / thermal colorization |
2325
+
2326
+ ### I/O
2327
+
2328
+ | Component | Purpose |
2329
+ |-----------|---------|
2330
+ | `ImageReader` | Read images from disk |
2331
+ | `ImageWriter` | Write frames to disk with pattern filenames |
2332
+ | `VideoReader` | Stream frames from video files with seek support |
2333
+ | `VideoWriter` | Write frames to video file |
2334
+ | `CameraSource` | Live webcam, RTSP, or HTTP camera source |
2335
+ | `S3Source` | Stream images from AWS S3 |
2336
+ | `GCSSource` | Stream images from Google Cloud Storage |
2337
+ | `DatasetExporter` | Export YOLO / COCO / VOC annotated datasets |
2338
+
2339
+ ### Streaming
2340
+
2341
+ | Component | Purpose |
2342
+ |-----------|---------|
2343
+ | `FrameStream` | Unified iterator over webcam / video / path list |
2344
+ | `DirectoryStream` | Stream sorted images from a directory |
2345
+ | `RTSPClient` | Background-threaded RTSP reader with reconnect |
2346
+ | `WebSocketSink` | Broadcast frames over WebSocket (MJPEG fallback) |
2347
+ | `WebSocketSource` | Receive frames from WebSocket source |
2348
+ | `KafkaSink` | Publish frames to Kafka topic |
2349
+ | `KafkaSource` | Consume frames from Kafka topic |
2350
+ | `BufferedStream` | Producer-consumer frame buffer with drop policy |
2351
+ | `SlidingWindowBuffer` | Temporal sliding window for batch processing |
2352
+
2353
+ ### Visualization
2354
+
2355
+ | Component | Purpose |
2356
+ |-----------|---------|
2357
+ | `FrameViewer` | Display frames with FPS overlay (headless-safe) |
2358
+ | `BBoxRenderer` | Render bboxes with color palette and label text |
2359
+ | `HeatmapRenderer` | Accumulate and overlay spatial heatmaps |
2360
+ | `DashboardSink` | Live web dashboard: Gradio or MJPEG HTTP |
2361
+ | `VideoAnnotationExporter` | Write annotated video + JSON sidecar |
2362
+
2363
+ ### Utilities
2364
+
2365
+ | Component | Purpose |
2366
+ |-----------|---------|
2367
+ | `ColorPalette` | Golden-ratio hue palette for consistent class colors |
2368
+ | `MetricsLogger` | Thread-safe rolling FPS and latency logger |
2369
+ | `MetricsLoggerComponent` | Pipeline component wrapper for MetricsLogger |
2370
+ | `FrameSampler` | Frame throttling by count, FPS, or probability |
2371
+ | `ImageHash` | Perceptual hashing (pHash/aHash/dHash) for deduplication |
2372
+ | `DrawUtils` | Render bboxes, masks, keypoints from payload |
2373
+
2374
+ ### Core
2375
+
2376
+ | Class | Purpose |
2377
+ |-------|---------|
2378
+ | `Device` | Auto CUDA/MPS/CPU device selector (singleton) |
2379
+ | `BBox` | Bounding box dataclass with IoU, clip, normalize |
2380
+ | `Detection` | Detection result (BBox + label + conf) |
2381
+ | `Keypoint` | Single keypoint (x, y, z, visibility, name) |
2382
+ | `Pose` | Full body pose (list of Keypoints) |
2383
+ | `Mask` | Binary segmentation mask with to_polygon() |
2384
+ | `Track` | Track state (id, bbox, age, state) |
2385
+ | `BatchProcessor` | Parallel directory / list processing |
2386
+ | `Scheduler` | Token-bucket FPS limiter (pipeline component) |
2387
+ | `RateLimiter` | Standalone calls-per-second limiter |
2388
+ | `MemoryManager` | Pre-allocated numpy buffer pool |
2389
+ | `GPUMemoryTracker` | CUDA memory delta tracker |
2390
+
2391
+ ### Configuration
2392
+
2393
+ | Class | Purpose |
2394
+ |-------|---------|
2395
+ | `YAMLConfig` | YAML config with dot-notation access, merge, validate, reload |
2396
+ | `JSONConfig` | JSON config with same interface + save |
2397
+ | `ComponentRegistry` | Singleton component registry with decorator registration |
2398
+ | `ProfileLoader` | Named pipeline profile loader from search paths |
2399
+ | `EnvConfig` | Prefix-based environment variable config reader |
2400
+
2401
+ ### Models
2402
+
2403
+ | Class | Purpose |
2404
+ |-------|---------|
2405
+ | `ModelRegistry` | JSON-cached model registry with HuggingFace support |
2406
+ | `ONNXModel` | ONNX runtime pipeline component |
2407
+ | `TorchModel` | TorchScript pipeline component |
2408
+ | `TFLiteModel` | TFLite runtime pipeline component |
2409
+ | `ModelDownloader` | urllib downloader with SHA256 and HF URL builder |
2410
+ | `ModelBenchmark` | p50/p95/p99 latency + tracemalloc memory benchmark |
2411
+
2412
+ ### Prebuilt Pipelines
2413
+
2414
+ | Class | Purpose |
2415
+ |-------|---------|
2416
+ | `PrebuiltPipelines` | Factory classmethods for common pipeline configurations |
2417
+ | `PipelineSerializer` | Serialize / deserialize pipelines to YAML/JSON |
2418
+ | `AsyncPipeline` | Async execution with asyncio run_in_executor |
2419
+ | `AsyncComponent` | Mixin for implementing async pipeline stages |
2420
+ | `ParallelPipeline` | Parallel branch execution with merge strategies |
2421
+ | `FanOutPipeline` | Shared sequential preprocessing → parallel branches |
2422
+
2423
+ ---
2424
+
2425
+ ## Output Structure
2426
+
2427
+ ```text
2428
+ output/
2429
+ ├── captures/ — still images (p key, burst)
2430
+ ├── dataset/ — labelled training samples (d key)
2431
+ ├── exports/ — grayscale and edge exports (e key)
2432
+ ├── timelapse/ — periodic time-lapse frames
2433
+ └── videos/ — recorded video files (r key)
2434
+ ```
2435
+
2436
+ ---
2437
+
2438
+ ## Testing
2439
+
2440
+ ```bash
2441
+ pytest
2442
+ pytest tests/test_preprocessing_components.py
2443
+ pytest tests/test_basic_augmentations.py
2444
+ pytest tests/test_advanced_augmentations.py
2445
+ pytest tests/test_capture_components.py
2446
+ pytest tests/test_core_components.py
2447
+ pytest tests/test_labeler_components.py
2448
+ pytest tests/test_cli_file_processing.py
2449
+ ```
2450
+
2451
+ ---
2452
+
2453
+ ## Build and Publish
2454
+
2455
+ ```bash
2456
+ python -m pip install --upgrade build
2457
+ python -m build
2458
+ ```
2459
+
2460
+ The wheel and source distribution are written to `dist/`.
2461
+
2462
+ See `PUBLISHING.md` for the release checklist and PyPI upload commands.
2463
+
2464
+ ---
2465
+
2466
+ <p align="center">
2467
+ <strong>Build once. Deploy anywhere.</strong><br>
2468
+ Scale from classical vision pipelines to state-of-the-art AI systems.
2469
+ </p>
2470
+