pyrobovision 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyrobovision/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyrobovision
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Advanced autonomous driving perception and vision-language foundation models for robotics
|
|
5
|
+
Author-email: Georgi Mammen Mullassery <mullassery@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Mullassery/PyRoboVision
|
|
8
|
+
Project-URL: Documentation, https://github.com/Mullassery/PyRoboVision/blob/main/README.md
|
|
9
|
+
Project-URL: Repository, https://github.com/Mullassery/PyRoboVision
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/Mullassery/PyRoboVision/issues
|
|
11
|
+
Keywords: robotics,autonomous-driving,perception,computer-vision,foundation-models,sam3,clip,grounding-dino
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
Requires-Dist: pyroboframes>=1.0.0
|
|
27
|
+
Requires-Dist: numpy>=1.24.0
|
|
28
|
+
Requires-Dist: torch>=2.0.0
|
|
29
|
+
Requires-Dist: torchvision>=0.15.0
|
|
30
|
+
Requires-Dist: transformers>=4.30.0
|
|
31
|
+
Requires-Dist: scipy>=1.10.0
|
|
32
|
+
Requires-Dist: open3d>=0.17.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
36
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
38
|
+
Requires-Dist: mypy>=1.4.0; extra == "dev"
|
|
39
|
+
Requires-Dist: ruff>=0.0.285; extra == "dev"
|
|
40
|
+
Provides-Extra: cuda
|
|
41
|
+
Requires-Dist: cupy>=12.0.0; extra == "cuda"
|
|
42
|
+
Requires-Dist: torch[cuda11x]>=2.0.0; extra == "cuda"
|
|
43
|
+
Provides-Extra: mlx
|
|
44
|
+
Requires-Dist: mlx>=0.0.13; extra == "mlx"
|
|
45
|
+
|
|
46
|
+
# PyRoboVision
|
|
47
|
+
|
|
48
|
+
Advanced autonomous driving perception and vision-language foundation models for robotics. Built on top of [PyRoboFrames](https://github.com/Mullassery/PyRoboFrames).
|
|
49
|
+
|
|
50
|
+
**Focus:** Advanced perception pipelines and multimodal understanding that consume data loaded by PyRoboFrames.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## What's Inside
|
|
55
|
+
|
|
56
|
+
### Autonomous Driving (v0.5)
|
|
57
|
+
- **Cylindrical panoramic stitching** — 360° multi-camera fusion (Waymo, nuScenes)
|
|
58
|
+
- **Advanced blending** — Laplacian pyramid + graph-cut seams
|
|
59
|
+
- **Bird's-eye-view (BEV)** — 3D projection for autonomous perception
|
|
60
|
+
- **GPU acceleration** — CuPy (NVIDIA), MLX (Apple Silicon), NumPy (CPU)
|
|
61
|
+
- **Sensor fusion** — Lidar/Radar + occupancy grid mapping
|
|
62
|
+
- **Dataset loaders** — Waymo TFRecord, nuScenes JSON, KITTI stereo
|
|
63
|
+
|
|
64
|
+
### Foundation Models (Phase 7)
|
|
65
|
+
- **SAM3 segmentation** — Instance segmentation + temporal tracking
|
|
66
|
+
- **CLIP embeddings** — Scene understanding, text-image similarity
|
|
67
|
+
- **Grounding DINO** — Open-vocabulary object detection
|
|
68
|
+
- **Multi-modal fusion** — Unified detection + segmentation + classification
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Installation
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# Requires PyRoboFrames v1.0+
|
|
76
|
+
pip install pyroboframes pyroboframes-vision
|
|
77
|
+
|
|
78
|
+
# From source
|
|
79
|
+
git clone https://github.com/Mullassery/PyRoboVision.git
|
|
80
|
+
cd PyRoboVision
|
|
81
|
+
pip install -e .
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Quick Start
|
|
87
|
+
|
|
88
|
+
### Autonomous Driving: 360° Panoramic Perception
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from pyrobovision.automotive import (
|
|
92
|
+
CylindricalStitcher,
|
|
93
|
+
get_waymo_layout,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Stitch 5 cameras into 360° panorama
|
|
97
|
+
layout = get_waymo_layout()
|
|
98
|
+
stitcher = CylindricalStitcher(layout, blend_method="laplacian")
|
|
99
|
+
|
|
100
|
+
frames = {
|
|
101
|
+
"FRONT": ...,
|
|
102
|
+
"FRONT_LEFT": ...,
|
|
103
|
+
# ... other cameras
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
panorama = stitcher.stitch(frames) # [1, H, W, 3] seamless 360°
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Foundation Models: Multi-Modal Scene Understanding
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from pyrobovision.foundation_models import MultiModalFusion
|
|
113
|
+
|
|
114
|
+
fusion = MultiModalFusion(
|
|
115
|
+
detection_prompt="car . pedestrian . cyclist",
|
|
116
|
+
device="mlx", # or "cuda"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
scene = fusion.understand(frame)
|
|
120
|
+
for obj in scene.objects:
|
|
121
|
+
print(f"{obj.object_class}: {obj.semantic_label}")
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Architecture
|
|
127
|
+
|
|
128
|
+
### Dependency Graph
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
PyRoboVision/
|
|
132
|
+
├── automotive/ # v0.5 AV perception
|
|
133
|
+
│ ├── stitching.py
|
|
134
|
+
│ ├── blending.py
|
|
135
|
+
│ ├── bev.py
|
|
136
|
+
│ ├── perception_3d.py
|
|
137
|
+
│ ├── tfrecord_utils.py
|
|
138
|
+
│ ├── nuscenes_utils.py
|
|
139
|
+
│ └── datasets.py
|
|
140
|
+
│
|
|
141
|
+
└── foundation_models/ # Phase 7
|
|
142
|
+
├── sam3_segmentation.py
|
|
143
|
+
├── clip_embeddings.py
|
|
144
|
+
├── grounding_dino.py
|
|
145
|
+
└── multimodal_fusion.py
|
|
146
|
+
|
|
147
|
+
↓ Depends on PyRoboFrames v1.0+ (dataloader)
|
|
148
|
+
PyRoboFrames/
|
|
149
|
+
├── RoboFrameDataset # Load LeRobot
|
|
150
|
+
├── ProprioceptiveLoader # Load state/action
|
|
151
|
+
├── DataLoader # Device selection
|
|
152
|
+
└── [video decode, sensor fusion, etc.]
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Key design:** PyRoboVision is a consumer library, not a foundation. It uses PyRoboFrames to load data, then applies perception algorithms.
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Features
|
|
160
|
+
|
|
161
|
+
| Phase | Feature | Status | Tests |
|
|
162
|
+
|-------|---------|--------|-------|
|
|
163
|
+
| **1** | Cylindrical panoramic projection | ✅ | 10 |
|
|
164
|
+
| **2** | Laplacian pyramid blending | ✅ | 5 |
|
|
165
|
+
| **3** | Bird's-eye-view (BEV) projection | ✅ | 5 |
|
|
166
|
+
| **4a** | GPU acceleration (CuPy/MLX/NumPy) | ✅ | 6 |
|
|
167
|
+
| **4b** | Optical flow seam tracking | ✅ | 10 |
|
|
168
|
+
| **5** | Waymo/nuScenes/KITTI loaders | ✅ | 9 |
|
|
169
|
+
| **6** | Lidar/Radar fusion + Occupancy grids | ✅ | 18 |
|
|
170
|
+
| **7a** | SAM3 temporal segmentation | ✅ | 18 |
|
|
171
|
+
| **7b** | CLIP scene embeddings | ✅ | 25 |
|
|
172
|
+
| **7c** | Grounding DINO detection | ✅ | 26 |
|
|
173
|
+
| **7d** | Multi-modal fusion | ✅ | 17 |
|
|
174
|
+
|
|
175
|
+
**Total: 149 tests, all passing**
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## Use Cases
|
|
180
|
+
|
|
181
|
+
### Autonomous Driving
|
|
182
|
+
- Waymo perception pipeline (panoramic stitching + 3D fusion)
|
|
183
|
+
- nuScenes multi-camera understanding
|
|
184
|
+
- Real-time BEV mapping
|
|
185
|
+
|
|
186
|
+
### Mobile Manipulation
|
|
187
|
+
- Egocentric robot perception (360° view from mobile base)
|
|
188
|
+
- Scene understanding for pick-and-place
|
|
189
|
+
|
|
190
|
+
### Robotdog Navigation
|
|
191
|
+
- Panoramic localization (where am I in the scene?)
|
|
192
|
+
- Terrain classification from multi-camera fusion
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Related Projects
|
|
197
|
+
|
|
198
|
+
- **[PyRoboFrames](https://github.com/Mullassery/PyRoboFrames)** — Fast ML dataloader for robot learning (core dependency)
|
|
199
|
+
- **[LeRobot](https://github.com/huggingface/lerobot)** — HuggingFace robotics datasets
|
|
200
|
+
- **[Segment Anything 3 (SAM3)](https://github.com/facebookresearch/segment-anything-3)** — Instance segmentation
|
|
201
|
+
- **[CLIP](https://github.com/openai/CLIP)** — Vision-language models
|
|
202
|
+
- **[Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)** — Open-vocabulary detection
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT (same as PyRoboFrames)
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Contributing
|
|
213
|
+
|
|
214
|
+
Contributions welcome. Please open issues and PRs on GitHub.
|
|
215
|
+
|
|
216
|
+
For architectural decisions, see [ARCHITECTURE.md](./ARCHITECTURE.md).
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Citation
|
|
221
|
+
|
|
222
|
+
```bibtex
|
|
223
|
+
@software{mullassery2025pyrobovision,
|
|
224
|
+
title={PyRoboVision: Advanced perception and vision-language models for robotics},
|
|
225
|
+
author={Mullassery, Georgi},
|
|
226
|
+
url={https://github.com/Mullassery/PyRoboVision},
|
|
227
|
+
year={2025}
|
|
228
|
+
}
|
|
229
|
+
```
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
pyrobovision/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
pyrobovision-0.5.0.dist-info/METADATA,sha256=1AzyEDw1T7Vvlu_HI9CXyUZaB5Dv8BNxMn4y4Dc35tA,7149
|
|
3
|
+
pyrobovision-0.5.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
4
|
+
pyrobovision-0.5.0.dist-info/top_level.txt,sha256=Yz6xlF7fChNwMOxWth-7NMmRcWWzEHaqRF30R5I1w3s,13
|
|
5
|
+
pyrobovision-0.5.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pyrobovision
|