neuromeka-vfm 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neuromeka_vfm-0.1.6/PKG-INFO +301 -0
- neuromeka_vfm-0.1.6/README.md +254 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/pyproject.toml +4 -2
- neuromeka_vfm-0.1.6/src/neuromeka_vfm/generate_mesh.py +122 -0
- neuromeka_vfm-0.1.6/src/neuromeka_vfm/point_cloud_utils.py +377 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/segmentation.py +54 -0
- neuromeka_vfm-0.1.6/src/neuromeka_vfm.egg-info/PKG-INFO +301 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/SOURCES.txt +2 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/requires.txt +2 -0
- neuromeka_vfm-0.1.4/PKG-INFO +0 -186
- neuromeka_vfm-0.1.4/README.md +0 -141
- neuromeka_vfm-0.1.4/src/neuromeka_vfm.egg-info/PKG-INFO +0 -186
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/LICENSE +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/setup.cfg +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/__init__.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/compression.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/examples/__init__.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/examples/pose_demo.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/pickle_client.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/pose_estimation.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/upload_mesh.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/dependency_links.txt +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/entry_points.txt +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: neuromeka_vfm
|
|
3
|
+
Version: 0.1.6
|
|
4
|
+
Summary: Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)
|
|
5
|
+
Author: Neuromeka
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Neuromeka Co., Ltd.
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Classifier: Development Status :: 3 - Alpha
|
|
29
|
+
Classifier: Intended Audience :: Developers
|
|
30
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
31
|
+
Classifier: Programming Language :: Python :: 3
|
|
32
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Requires-Python: >=3.8
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
License-File: LICENSE
|
|
40
|
+
Requires-Dist: numpy
|
|
41
|
+
Requires-Dist: pyzmq
|
|
42
|
+
Requires-Dist: paramiko
|
|
43
|
+
Requires-Dist: av
|
|
44
|
+
Requires-Dist: trimesh
|
|
45
|
+
Requires-Dist: tqdm
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
# neuromeka_vfm
|
|
49
|
+
|
|
50
|
+
A lightweight client SDK for communicating with Segmentation (SAM2, Grounding DINO) and Pose Estimation (NVIDIA FoundationPose) servers over RPC/ZeroMQ. It also provides SSH/SFTP utilities to upload mesh files to the host.
|
|
51
|
+
|
|
52
|
+
- Website: http://www.neuromeka.com
|
|
53
|
+
- PyPI package: https://pypi.org/project/neuromeka_vfm/
|
|
54
|
+
- Documents: https://docs.neuromeka.com
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install neuromeka_vfm
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Python API (usage by example)
|
|
63
|
+
|
|
64
|
+
- Client PC: the machine running your application with this package installed.
|
|
65
|
+
- Host PC: the machine running Segmentation and Pose Estimation Docker servers. If you run Docker locally, use `localhost`.
|
|
66
|
+
|
|
67
|
+
### Segmentation
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from neuromeka_vfm import Segmentation
|
|
71
|
+
|
|
72
|
+
seg = Segmentation(
|
|
73
|
+
hostname="192.168.10.63",
|
|
74
|
+
port=5432,
|
|
75
|
+
compression_strategy="png", # none | png | jpeg | h264
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Register using an image prompt
|
|
79
|
+
seg.add_image_prompt("drug_box", ref_rgb)
|
|
80
|
+
seg.register_first_frame(
|
|
81
|
+
frame=first_rgb,
|
|
82
|
+
prompt="drug_box", # ID string
|
|
83
|
+
use_image_prompt=True,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Register using a text prompt
|
|
87
|
+
seg.register_first_frame(
|
|
88
|
+
frame=first_rgb,
|
|
89
|
+
prompt="box .", # Text prompt (must end with " .")
|
|
90
|
+
use_image_prompt=False,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# SAM2 tracking on the registered mask(s)
|
|
94
|
+
resp = seg.get_next(next_rgb)
|
|
95
|
+
if isinstance(resp, dict) and resp.get("result") == "ERROR":
|
|
96
|
+
print(f"Tracking error: {resp.get('message')}")
|
|
97
|
+
seg.reset()
|
|
98
|
+
else:
|
|
99
|
+
masks = resp
|
|
100
|
+
|
|
101
|
+
# Segmentation settings / model selection (nrmk_realtime_segmentation v0.2+)
|
|
102
|
+
caps = seg.get_capabilities()["data"]
|
|
103
|
+
current = seg.get_config()["data"]
|
|
104
|
+
seg.set_config(
|
|
105
|
+
{
|
|
106
|
+
"grounding_dino": {
|
|
107
|
+
"backbone": "Swin-B", # Swin-T | Swin-B
|
|
108
|
+
"box_threshold": 0.35,
|
|
109
|
+
"text_threshold": 0.25,
|
|
110
|
+
},
|
|
111
|
+
"dino_detection": {
|
|
112
|
+
"threshold": 0.5,
|
|
113
|
+
"target_multiplier": 25,
|
|
114
|
+
"img_multiplier": 50,
|
|
115
|
+
"background_threshold": -1.0,
|
|
116
|
+
"final_erosion_count": 10,
|
|
117
|
+
"segment_min_size": 20,
|
|
118
|
+
},
|
|
119
|
+
"sam2": {
|
|
120
|
+
"model": "facebook/sam2.1-hiera-large",
|
|
121
|
+
"use_legacy": False,
|
|
122
|
+
"compile": False,
|
|
123
|
+
"offload_state_to_cpu": False,
|
|
124
|
+
"offload_video_to_cpu": False,
|
|
125
|
+
},
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Remove an object (v0.2+, only when use_legacy=False)
|
|
130
|
+
seg.remove_object("cup_0")
|
|
131
|
+
|
|
132
|
+
seg.close()
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Additional Segmentation APIs and behaviors
|
|
136
|
+
|
|
137
|
+
- `benchmark=True` in the constructor enables timing counters (`call_time`, `call_count`) for `add_image_prompt`, `register_first_frame`, and `get_next`.
|
|
138
|
+
- `switch_compression_strategy()` lets you change the compression strategy at runtime.
|
|
139
|
+
- `register_first_frame()` returns `True`/`False` and raises `ValueError` if image prompts are missing when `use_image_prompt=True`.
|
|
140
|
+
- `register_first_frame()` accepts a list of prompt IDs when `use_image_prompt=True`.
|
|
141
|
+
- `get_next()` returns `None` if called before registration; it can also return the server error dict when available.
|
|
142
|
+
- `reset()` performs a server-side reset, while `finish()` clears only local state.
|
|
143
|
+
- Exposed state: `tracking_object_ids`, `current_frame_masks`, `invisible_object_ids`.
|
|
144
|
+
- Backward-compat alias: `NrmkRealtimeSegmentation`.
|
|
145
|
+
|
|
146
|
+
#### Segmentation v0.2 config summary (defaults/choices)
|
|
147
|
+
`seg.get_capabilities()` can differ depending on server configuration. The following reflects v0.2 defaults.
|
|
148
|
+
|
|
149
|
+
```yaml
|
|
150
|
+
grounding_dino:
|
|
151
|
+
backbone:
|
|
152
|
+
choices:
|
|
153
|
+
- Swin-B
|
|
154
|
+
- Swin-T
|
|
155
|
+
default: Swin-T
|
|
156
|
+
box_threshold:
|
|
157
|
+
default: 0.35
|
|
158
|
+
min: 0.0
|
|
159
|
+
max: 1.0
|
|
160
|
+
text_threshold:
|
|
161
|
+
default: 0.25
|
|
162
|
+
min: 0.0
|
|
163
|
+
max: 1.0
|
|
164
|
+
|
|
165
|
+
dino_detection:
|
|
166
|
+
threshold:
|
|
167
|
+
default: 0.5
|
|
168
|
+
target_multiplier:
|
|
169
|
+
default: 25
|
|
170
|
+
img_multiplier:
|
|
171
|
+
default: 50
|
|
172
|
+
background_threshold:
|
|
173
|
+
default: -1.0
|
|
174
|
+
final_erosion_count:
|
|
175
|
+
default: 10
|
|
176
|
+
segment_min_size:
|
|
177
|
+
default: 20
|
|
178
|
+
|
|
179
|
+
sam2:
|
|
180
|
+
model:
|
|
181
|
+
choices:
|
|
182
|
+
- facebook/sam2-hiera-base-plus
|
|
183
|
+
- facebook/sam2-hiera-large
|
|
184
|
+
- facebook/sam2-hiera-small
|
|
185
|
+
- facebook/sam2-hiera-tiny
|
|
186
|
+
- facebook/sam2.1-hiera-base-plus
|
|
187
|
+
- facebook/sam2.1-hiera-large
|
|
188
|
+
- facebook/sam2.1-hiera-small
|
|
189
|
+
- facebook/sam2.1-hiera-tiny
|
|
190
|
+
default: facebook/sam2.1-hiera-large
|
|
191
|
+
use_legacy:
|
|
192
|
+
default: false
|
|
193
|
+
compile:
|
|
194
|
+
default: false
|
|
195
|
+
offload_state_to_cpu:
|
|
196
|
+
default: false
|
|
197
|
+
offload_video_to_cpu:
|
|
198
|
+
default: false
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
#### Segmentation v0.2 notes and changes
|
|
202
|
+
|
|
203
|
+
- If SAM2 VRAM estimation fails, `seg.get_next()` may return `{"result":"ERROR"}`. Handle the error and call `reset` before re-registering.
|
|
204
|
+
- `compile=True` can slow down first-frame registration and `reset`.
|
|
205
|
+
- CPU offloading is most effective when both `offload_state_to_cpu=True` and `offload_video_to_cpu=True` are set (legacy mode does not support `offload_video_to_cpu`).
|
|
206
|
+
- `remove_object` is supported only when `use_legacy=False`.
|
|
207
|
+
- GroundingDINO added the Swin-B backbone and fixed prompt-token merge issues.
|
|
208
|
+
|
|
209
|
+
### Pose Estimation
|
|
210
|
+
|
|
211
|
+
**Mesh upload**: Upload the mesh file (STL) to `/opt/meshes/` on the host PC. You can also use SSH directly.
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from neuromeka_vfm import upload_mesh
|
|
215
|
+
|
|
216
|
+
upload_mesh(
|
|
217
|
+
host="192.168.10.63",
|
|
218
|
+
user="user",
|
|
219
|
+
password="pass",
|
|
220
|
+
local="mesh/my_mesh.stl", # local mesh path
|
|
221
|
+
remote="/opt/meshes/my_mesh.stl", # host mesh path (Docker volume)
|
|
222
|
+
)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Initialization
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from neuromeka_vfm import PoseEstimation
|
|
229
|
+
|
|
230
|
+
pose = PoseEstimation(host="192.168.10.72", port=5557)
|
|
231
|
+
|
|
232
|
+
pose.init(
|
|
233
|
+
mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
|
|
234
|
+
apply_scale=1.0,
|
|
235
|
+
track_refine_iter=3,
|
|
236
|
+
min_n_views=40,
|
|
237
|
+
inplane_step=60,
|
|
238
|
+
)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
- mesh_path: path to the mesh file (STL/OBJ). Initialization fails if missing.
|
|
242
|
+
- apply_scale: scalar applied after loading the mesh.
|
|
243
|
+
- STL in meters: 1.0 (no scaling)
|
|
244
|
+
- STL in centimeters: 0.01 (1 cm -> 0.01 m)
|
|
245
|
+
- STL in millimeters: 0.001 (1 mm -> 0.001 m)
|
|
246
|
+
- force_apply_color: if True, forces a solid color when the mesh lacks color data.
|
|
247
|
+
- apply_color: RGB tuple (0-255) used when `force_apply_color=True`.
|
|
248
|
+
- est_refine_iter: number of refinement iterations during registration (higher = more accurate, slower).
|
|
249
|
+
- track_refine_iter: number of refinement iterations per frame during tracking.
|
|
250
|
+
- min_n_views: minimum number of sampled camera views (affects rotation candidates).
|
|
251
|
+
- inplane_step: in-plane rotation step in degrees (smaller = more candidates).
|
|
252
|
+
|
|
253
|
+
Registration and tracking
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
# Registration (server defaults when iteration is omitted, check_vram=True pre-checks VRAM)
|
|
257
|
+
register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
|
|
258
|
+
|
|
259
|
+
# Tracking (optionally limit search area with bbox_xywh)
|
|
260
|
+
track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
|
|
261
|
+
|
|
262
|
+
pose.close()
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
- cam_K: camera intrinsics.
|
|
266
|
+
- Large RGB resolution, large `min_n_views`, or small `inplane_step` can cause GPU VRAM errors.
|
|
267
|
+
- `check_vram=True` in `register` performs a pre-check to prevent server shutdown due to OOM.
|
|
268
|
+
- `iteration` in `register`/`track` can override the server default if provided.
|
|
269
|
+
- `reset()` resets the server state; `reset_object()` reuses the cached mesh to rebuild the rotation grid.
|
|
270
|
+
- Default host/port can come from `FPOSE_HOST` and `FPOSE_PORT` environment variables.
|
|
271
|
+
- Backward-compat alias: `FoundationPoseClient`.
|
|
272
|
+
|
|
273
|
+
<!--
|
|
274
|
+
## Benchmark
|
|
275
|
+
|
|
276
|
+
Measured on local servers. Empty cells are not yet measured.
|
|
277
|
+
|
|
278
|
+
**RTX 5060**
|
|
279
|
+
| Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
|
|
280
|
+
| --- | --- | --- | --- | --- | --- |
|
|
281
|
+
| Grounding DINO | text (human . cup .) | 0.86 | 0.35 | 0.50 | 0.52 |
|
|
282
|
+
| DINOv2 | image prompt | 0.85 | 0.49 | 0.65 | 0.63 |
|
|
283
|
+
| SAM2 | - | | | | |
|
|
284
|
+
| FoundationPose registration | - | | | | |
|
|
285
|
+
| FoundationPose track | - | | | | |
|
|
286
|
+
|
|
287
|
+
**RTX 5090**
|
|
288
|
+
| Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
|
|
289
|
+
| --- | --- | --- | --- | --- | --- |
|
|
290
|
+
| Grounding DINO | text (human . cup .) | | | | |
|
|
291
|
+
| DINOv2 | image prompt | | | | |
|
|
292
|
+
| SAM2 | - | | | | |
|
|
293
|
+
| FoundationPose registration | - | 0.4 | - | | |
|
|
294
|
+
| FoundationPose track | - | 0.03 | | | |
|
|
295
|
+
-->
|
|
296
|
+
|
|
297
|
+
## Release notes
|
|
298
|
+
|
|
299
|
+
- 0.1.2: Improved success detection for Segmentation responses (`result`/`success`/`status`), fixed image prompt registration/usage, added `check_vram` to PoseEstimation `register`.
|
|
300
|
+
- 0.1.1: Improved resource cleanup in PoseEstimation/Segmentation, use server defaults when iteration is omitted, added pose demo example.
|
|
301
|
+
- 0.1.0: Initial public release. Includes FoundationPose RPC client, real-time segmentation client, SSH-based mesh upload CLI/API.
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# neuromeka_vfm
|
|
2
|
+
|
|
3
|
+
A lightweight client SDK for communicating with Segmentation (SAM2, Grounding DINO) and Pose Estimation (NVIDIA FoundationPose) servers over RPC/ZeroMQ. It also provides SSH/SFTP utilities to upload mesh files to the host.
|
|
4
|
+
|
|
5
|
+
- Website: http://www.neuromeka.com
|
|
6
|
+
- PyPI package: https://pypi.org/project/neuromeka_vfm/
|
|
7
|
+
- Documents: https://docs.neuromeka.com
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install neuromeka_vfm
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Python API (usage by example)
|
|
16
|
+
|
|
17
|
+
- Client PC: the machine running your application with this package installed.
|
|
18
|
+
- Host PC: the machine running Segmentation and Pose Estimation Docker servers. If you run Docker locally, use `localhost`.
|
|
19
|
+
|
|
20
|
+
### Segmentation
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from neuromeka_vfm import Segmentation
|
|
24
|
+
|
|
25
|
+
seg = Segmentation(
|
|
26
|
+
hostname="192.168.10.63",
|
|
27
|
+
port=5432,
|
|
28
|
+
compression_strategy="png", # none | png | jpeg | h264
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Register using an image prompt
|
|
32
|
+
seg.add_image_prompt("drug_box", ref_rgb)
|
|
33
|
+
seg.register_first_frame(
|
|
34
|
+
frame=first_rgb,
|
|
35
|
+
prompt="drug_box", # ID string
|
|
36
|
+
use_image_prompt=True,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Register using a text prompt
|
|
40
|
+
seg.register_first_frame(
|
|
41
|
+
frame=first_rgb,
|
|
42
|
+
prompt="box .", # Text prompt (must end with " .")
|
|
43
|
+
use_image_prompt=False,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# SAM2 tracking on the registered mask(s)
|
|
47
|
+
resp = seg.get_next(next_rgb)
|
|
48
|
+
if isinstance(resp, dict) and resp.get("result") == "ERROR":
|
|
49
|
+
print(f"Tracking error: {resp.get('message')}")
|
|
50
|
+
seg.reset()
|
|
51
|
+
else:
|
|
52
|
+
masks = resp
|
|
53
|
+
|
|
54
|
+
# Segmentation settings / model selection (nrmk_realtime_segmentation v0.2+)
|
|
55
|
+
caps = seg.get_capabilities()["data"]
|
|
56
|
+
current = seg.get_config()["data"]
|
|
57
|
+
seg.set_config(
|
|
58
|
+
{
|
|
59
|
+
"grounding_dino": {
|
|
60
|
+
"backbone": "Swin-B", # Swin-T | Swin-B
|
|
61
|
+
"box_threshold": 0.35,
|
|
62
|
+
"text_threshold": 0.25,
|
|
63
|
+
},
|
|
64
|
+
"dino_detection": {
|
|
65
|
+
"threshold": 0.5,
|
|
66
|
+
"target_multiplier": 25,
|
|
67
|
+
"img_multiplier": 50,
|
|
68
|
+
"background_threshold": -1.0,
|
|
69
|
+
"final_erosion_count": 10,
|
|
70
|
+
"segment_min_size": 20,
|
|
71
|
+
},
|
|
72
|
+
"sam2": {
|
|
73
|
+
"model": "facebook/sam2.1-hiera-large",
|
|
74
|
+
"use_legacy": False,
|
|
75
|
+
"compile": False,
|
|
76
|
+
"offload_state_to_cpu": False,
|
|
77
|
+
"offload_video_to_cpu": False,
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Remove an object (v0.2+, only when use_legacy=False)
|
|
83
|
+
seg.remove_object("cup_0")
|
|
84
|
+
|
|
85
|
+
seg.close()
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Additional Segmentation APIs and behaviors
|
|
89
|
+
|
|
90
|
+
- `benchmark=True` in the constructor enables timing counters (`call_time`, `call_count`) for `add_image_prompt`, `register_first_frame`, and `get_next`.
|
|
91
|
+
- `switch_compression_strategy()` lets you change the compression strategy at runtime.
|
|
92
|
+
- `register_first_frame()` returns `True`/`False` and raises `ValueError` if image prompts are missing when `use_image_prompt=True`.
|
|
93
|
+
- `register_first_frame()` accepts a list of prompt IDs when `use_image_prompt=True`.
|
|
94
|
+
- `get_next()` returns `None` if called before registration; it can also return the server error dict when available.
|
|
95
|
+
- `reset()` performs a server-side reset, while `finish()` clears only local state.
|
|
96
|
+
- Exposed state: `tracking_object_ids`, `current_frame_masks`, `invisible_object_ids`.
|
|
97
|
+
- Backward-compat alias: `NrmkRealtimeSegmentation`.
|
|
98
|
+
|
|
99
|
+
#### Segmentation v0.2 config summary (defaults/choices)
|
|
100
|
+
`seg.get_capabilities()` can differ depending on server configuration. The following reflects v0.2 defaults.
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
grounding_dino:
|
|
104
|
+
backbone:
|
|
105
|
+
choices:
|
|
106
|
+
- Swin-B
|
|
107
|
+
- Swin-T
|
|
108
|
+
default: Swin-T
|
|
109
|
+
box_threshold:
|
|
110
|
+
default: 0.35
|
|
111
|
+
min: 0.0
|
|
112
|
+
max: 1.0
|
|
113
|
+
text_threshold:
|
|
114
|
+
default: 0.25
|
|
115
|
+
min: 0.0
|
|
116
|
+
max: 1.0
|
|
117
|
+
|
|
118
|
+
dino_detection:
|
|
119
|
+
threshold:
|
|
120
|
+
default: 0.5
|
|
121
|
+
target_multiplier:
|
|
122
|
+
default: 25
|
|
123
|
+
img_multiplier:
|
|
124
|
+
default: 50
|
|
125
|
+
background_threshold:
|
|
126
|
+
default: -1.0
|
|
127
|
+
final_erosion_count:
|
|
128
|
+
default: 10
|
|
129
|
+
segment_min_size:
|
|
130
|
+
default: 20
|
|
131
|
+
|
|
132
|
+
sam2:
|
|
133
|
+
model:
|
|
134
|
+
choices:
|
|
135
|
+
- facebook/sam2-hiera-base-plus
|
|
136
|
+
- facebook/sam2-hiera-large
|
|
137
|
+
- facebook/sam2-hiera-small
|
|
138
|
+
- facebook/sam2-hiera-tiny
|
|
139
|
+
- facebook/sam2.1-hiera-base-plus
|
|
140
|
+
- facebook/sam2.1-hiera-large
|
|
141
|
+
- facebook/sam2.1-hiera-small
|
|
142
|
+
- facebook/sam2.1-hiera-tiny
|
|
143
|
+
default: facebook/sam2.1-hiera-large
|
|
144
|
+
use_legacy:
|
|
145
|
+
default: false
|
|
146
|
+
compile:
|
|
147
|
+
default: false
|
|
148
|
+
offload_state_to_cpu:
|
|
149
|
+
default: false
|
|
150
|
+
offload_video_to_cpu:
|
|
151
|
+
default: false
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
#### Segmentation v0.2 notes and changes
|
|
155
|
+
|
|
156
|
+
- If SAM2 VRAM estimation fails, `seg.get_next()` may return `{"result":"ERROR"}`. Handle the error and call `reset` before re-registering.
|
|
157
|
+
- `compile=True` can slow down first-frame registration and `reset`.
|
|
158
|
+
- CPU offloading is most effective when both `offload_state_to_cpu=True` and `offload_video_to_cpu=True` are set (legacy mode does not support `offload_video_to_cpu`).
|
|
159
|
+
- `remove_object` is supported only when `use_legacy=False`.
|
|
160
|
+
- GroundingDINO added the Swin-B backbone and fixed prompt-token merge issues.
|
|
161
|
+
|
|
162
|
+
### Pose Estimation
|
|
163
|
+
|
|
164
|
+
**Mesh upload**: Upload the mesh file (STL) to `/opt/meshes/` on the host PC. You can also use SSH directly.
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
from neuromeka_vfm import upload_mesh
|
|
168
|
+
|
|
169
|
+
upload_mesh(
|
|
170
|
+
host="192.168.10.63",
|
|
171
|
+
user="user",
|
|
172
|
+
password="pass",
|
|
173
|
+
local="mesh/my_mesh.stl", # local mesh path
|
|
174
|
+
remote="/opt/meshes/my_mesh.stl", # host mesh path (Docker volume)
|
|
175
|
+
)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Initialization
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from neuromeka_vfm import PoseEstimation
|
|
182
|
+
|
|
183
|
+
pose = PoseEstimation(host="192.168.10.72", port=5557)
|
|
184
|
+
|
|
185
|
+
pose.init(
|
|
186
|
+
mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
|
|
187
|
+
apply_scale=1.0,
|
|
188
|
+
track_refine_iter=3,
|
|
189
|
+
min_n_views=40,
|
|
190
|
+
inplane_step=60,
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
- mesh_path: path to the mesh file (STL/OBJ). Initialization fails if missing.
|
|
195
|
+
- apply_scale: scalar applied after loading the mesh.
|
|
196
|
+
- STL in meters: 1.0 (no scaling)
|
|
197
|
+
- STL in centimeters: 0.01 (1 cm -> 0.01 m)
|
|
198
|
+
- STL in millimeters: 0.001 (1 mm -> 0.001 m)
|
|
199
|
+
- force_apply_color: if True, forces a solid color when the mesh lacks color data.
|
|
200
|
+
- apply_color: RGB tuple (0-255) used when `force_apply_color=True`.
|
|
201
|
+
- est_refine_iter: number of refinement iterations during registration (higher = more accurate, slower).
|
|
202
|
+
- track_refine_iter: number of refinement iterations per frame during tracking.
|
|
203
|
+
- min_n_views: minimum number of sampled camera views (affects rotation candidates).
|
|
204
|
+
- inplane_step: in-plane rotation step in degrees (smaller = more candidates).
|
|
205
|
+
|
|
206
|
+
Registration and tracking
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
# Registration (server defaults when iteration is omitted, check_vram=True pre-checks VRAM)
|
|
210
|
+
register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
|
|
211
|
+
|
|
212
|
+
# Tracking (optionally limit search area with bbox_xywh)
|
|
213
|
+
track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
|
|
214
|
+
|
|
215
|
+
pose.close()
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
- cam_K: camera intrinsics.
|
|
219
|
+
- Large RGB resolution, large `min_n_views`, or small `inplane_step` can cause GPU VRAM errors.
|
|
220
|
+
- `check_vram=True` in `register` performs a pre-check to prevent server shutdown due to OOM.
|
|
221
|
+
- `iteration` in `register`/`track` can override the server default if provided.
|
|
222
|
+
- `reset()` resets the server state; `reset_object()` reuses the cached mesh to rebuild the rotation grid.
|
|
223
|
+
- Default host/port can come from `FPOSE_HOST` and `FPOSE_PORT` environment variables.
|
|
224
|
+
- Backward-compat alias: `FoundationPoseClient`.
|
|
225
|
+
|
|
226
|
+
<!--
|
|
227
|
+
## Benchmark
|
|
228
|
+
|
|
229
|
+
Measured on local servers. Empty cells are not yet measured.
|
|
230
|
+
|
|
231
|
+
**RTX 5060**
|
|
232
|
+
| Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
|
|
233
|
+
| --- | --- | --- | --- | --- | --- |
|
|
234
|
+
| Grounding DINO | text (human . cup .) | 0.86 | 0.35 | 0.50 | 0.52 |
|
|
235
|
+
| DINOv2 | image prompt | 0.85 | 0.49 | 0.65 | 0.63 |
|
|
236
|
+
| SAM2 | - | | | | |
|
|
237
|
+
| FoundationPose registration | - | | | | |
|
|
238
|
+
| FoundationPose track | - | | | | |
|
|
239
|
+
|
|
240
|
+
**RTX 5090**
|
|
241
|
+
| Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
|
|
242
|
+
| --- | --- | --- | --- | --- | --- |
|
|
243
|
+
| Grounding DINO | text (human . cup .) | | | | |
|
|
244
|
+
| DINOv2 | image prompt | | | | |
|
|
245
|
+
| SAM2 | - | | | | |
|
|
246
|
+
| FoundationPose registration | - | 0.4 | - | | |
|
|
247
|
+
| FoundationPose track | - | 0.03 | | | |
|
|
248
|
+
-->
|
|
249
|
+
|
|
250
|
+
## Release notes
|
|
251
|
+
|
|
252
|
+
- 0.1.2: Improved success detection for Segmentation responses (`result`/`success`/`status`), fixed image prompt registration/usage, added `check_vram` to PoseEstimation `register`.
|
|
253
|
+
- 0.1.1: Improved resource cleanup in PoseEstimation/Segmentation, use server defaults when iteration is omitted, added pose demo example.
|
|
254
|
+
- 0.1.0: Initial public release. Includes FoundationPose RPC client, real-time segmentation client, SSH-based mesh upload CLI/API.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "neuromeka_vfm"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.6"
|
|
8
8
|
description = "Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -13,7 +13,9 @@ dependencies = [
|
|
|
13
13
|
"numpy",
|
|
14
14
|
"pyzmq",
|
|
15
15
|
"paramiko",
|
|
16
|
-
"av",
|
|
16
|
+
"av",
|
|
17
|
+
"trimesh",
|
|
18
|
+
"tqdm",
|
|
17
19
|
]
|
|
18
20
|
authors = [{name = "Neuromeka"}]
|
|
19
21
|
classifiers = [
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility to generate simple parametric meshes (currently rectangular box) as binary STL.
|
|
3
|
+
|
|
4
|
+
Design goals
|
|
5
|
+
- Units: meters
|
|
6
|
+
- Origin: object center at (0, 0, 0)
|
|
7
|
+
- Axes: faces aligned to +/-X, +/-Y, +/-Z
|
|
8
|
+
- Output: binary STL saved to /opt/meshes (docker volume mount)
|
|
9
|
+
|
|
10
|
+
Usage (programmatic):
|
|
11
|
+
from backend.generate_mesh import write_box_stl
|
|
12
|
+
path = write_box_stl("custom_box.stl", width=0.054, depth=0.097, height=0.054)
|
|
13
|
+
|
|
14
|
+
CLI (optional):
|
|
15
|
+
python -m backend.generate_mesh box custom_box.stl 0.054 0.097 0.054
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import struct
|
|
21
|
+
import sys
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Iterable, Tuple
|
|
24
|
+
|
|
25
|
+
MESH_DIR = Path("/opt/meshes")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _ensure_dir(path: Path) -> None:
|
|
29
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _pack_triangle(normal: Iterable[float], v1: Iterable[float], v2: Iterable[float], v3: Iterable[float]) -> bytes:
|
|
33
|
+
"""Pack one triangle (normal + 3 vertices) into binary STL record."""
|
|
34
|
+
return struct.pack(
|
|
35
|
+
"<12fH",
|
|
36
|
+
*normal,
|
|
37
|
+
*v1,
|
|
38
|
+
*v2,
|
|
39
|
+
*v3,
|
|
40
|
+
0, # attribute byte count
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _box_triangles(width: float, depth: float, height: float):
|
|
45
|
+
"""Generate normals and vertices for a box centered at origin."""
|
|
46
|
+
hx, hy, hz = width / 2.0, depth / 2.0, height / 2.0
|
|
47
|
+
# 8 vertices
|
|
48
|
+
p = [
|
|
49
|
+
(-hx, -hy, -hz),
|
|
50
|
+
(hx, -hy, -hz),
|
|
51
|
+
(hx, hy, -hz),
|
|
52
|
+
(-hx, hy, -hz),
|
|
53
|
+
(-hx, -hy, hz),
|
|
54
|
+
(hx, -hy, hz),
|
|
55
|
+
(hx, hy, hz),
|
|
56
|
+
(-hx, hy, hz),
|
|
57
|
+
]
|
|
58
|
+
# Each face: two triangles (ccw when looking from outside)
|
|
59
|
+
faces = [
|
|
60
|
+
((-1, 0, 0), (0, 1, 3, 7, 4)), # -X
|
|
61
|
+
((1, 0, 0), (1, 2, 6, 5)), # +X
|
|
62
|
+
((0, -1, 0), (0, 1, 5, 4)), # -Y
|
|
63
|
+
((0, 1, 0), (3, 2, 6, 7)), # +Y
|
|
64
|
+
((0, 0, -1), (0, 1, 2, 3)), # -Z
|
|
65
|
+
((0, 0, 1), (4, 5, 6, 7)), # +Z
|
|
66
|
+
]
|
|
67
|
+
for normal, idx in faces:
|
|
68
|
+
if len(idx) == 4:
|
|
69
|
+
a, b, c, d = idx
|
|
70
|
+
# two triangles: (a,b,c) and (a,c,d)
|
|
71
|
+
yield normal, p[a], p[b], p[c]
|
|
72
|
+
yield normal, p[a], p[c], p[d]
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError("Face index must have 4 vertices.")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def write_box_stl(filename: str, width: float, depth: float, height: float) -> Path:
|
|
78
|
+
"""
|
|
79
|
+
Create a rectangular box STL.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
filename: output file name (placed under /opt/meshes). If only a name is
|
|
83
|
+
given, it is resolved relative to MESH_DIR.
|
|
84
|
+
width, depth, height: box dimensions in meters (must be > 0).
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Path to the written STL file.
|
|
88
|
+
"""
|
|
89
|
+
if width <= 0 or depth <= 0 or height <= 0:
|
|
90
|
+
raise ValueError("width, depth, height must be positive.")
|
|
91
|
+
|
|
92
|
+
out_path = Path(filename)
|
|
93
|
+
if not out_path.is_absolute():
|
|
94
|
+
out_path = MESH_DIR / out_path
|
|
95
|
+
_ensure_dir(out_path)
|
|
96
|
+
|
|
97
|
+
triangles = list(_box_triangles(width, depth, height))
|
|
98
|
+
header = b"rect_box_stl" + b"\0" * (80 - len("rect_box_stl"))
|
|
99
|
+
with out_path.open("wb") as f:
|
|
100
|
+
f.write(header)
|
|
101
|
+
f.write(struct.pack("<I", len(triangles)))
|
|
102
|
+
for tri in triangles:
|
|
103
|
+
f.write(_pack_triangle(*tri))
|
|
104
|
+
return out_path
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _cli(args: list[str]) -> int:
|
|
108
|
+
if len(args) != 5 or args[0].lower() != "box":
|
|
109
|
+
print("Usage: python -m backend.generate_mesh box <filename> <width> <depth> <height>")
|
|
110
|
+
return 1
|
|
111
|
+
_, fname, w, d, h = args
|
|
112
|
+
try:
|
|
113
|
+
path = write_box_stl(fname, float(w), float(d), float(h))
|
|
114
|
+
except Exception as exc: # noqa: BLE001
|
|
115
|
+
print(f"Error: {exc}")
|
|
116
|
+
return 1
|
|
117
|
+
print(f"STL written to: {path}")
|
|
118
|
+
return 0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
if __name__ == "__main__":
|
|
122
|
+
sys.exit(_cli(sys.argv[1:]))
|