neuromeka-vfm 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. neuromeka_vfm-0.1.6/PKG-INFO +301 -0
  2. neuromeka_vfm-0.1.6/README.md +254 -0
  3. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/pyproject.toml +4 -2
  4. neuromeka_vfm-0.1.6/src/neuromeka_vfm/generate_mesh.py +122 -0
  5. neuromeka_vfm-0.1.6/src/neuromeka_vfm/point_cloud_utils.py +377 -0
  6. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/segmentation.py +54 -0
  7. neuromeka_vfm-0.1.6/src/neuromeka_vfm.egg-info/PKG-INFO +301 -0
  8. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/SOURCES.txt +2 -0
  9. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/requires.txt +2 -0
  10. neuromeka_vfm-0.1.4/PKG-INFO +0 -186
  11. neuromeka_vfm-0.1.4/README.md +0 -141
  12. neuromeka_vfm-0.1.4/src/neuromeka_vfm.egg-info/PKG-INFO +0 -186
  13. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/LICENSE +0 -0
  14. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/setup.cfg +0 -0
  15. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/__init__.py +0 -0
  16. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/compression.py +0 -0
  17. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/examples/__init__.py +0 -0
  18. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/examples/pose_demo.py +0 -0
  19. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/pickle_client.py +0 -0
  20. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/pose_estimation.py +0 -0
  21. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/upload_mesh.py +0 -0
  22. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/dependency_links.txt +0 -0
  23. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/entry_points.txt +0 -0
  24. {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/top_level.txt +0 -0
@@ -0,0 +1,301 @@
1
+ Metadata-Version: 2.4
2
+ Name: neuromeka_vfm
3
+ Version: 0.1.6
4
+ Summary: Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)
5
+ Author: Neuromeka
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Neuromeka Co., Ltd.
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Classifier: Development Status :: 3 - Alpha
29
+ Classifier: Intended Audience :: Developers
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Programming Language :: Python :: 3
32
+ Classifier: Programming Language :: Python :: 3.8
33
+ Classifier: Programming Language :: Python :: 3.9
34
+ Classifier: Programming Language :: Python :: 3.10
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Requires-Python: >=3.8
38
+ Description-Content-Type: text/markdown
39
+ License-File: LICENSE
40
+ Requires-Dist: numpy
41
+ Requires-Dist: pyzmq
42
+ Requires-Dist: paramiko
43
+ Requires-Dist: av
44
+ Requires-Dist: trimesh
45
+ Requires-Dist: tqdm
46
+ Dynamic: license-file
47
+
48
+ # neuromeka_vfm
49
+
50
+ A lightweight client SDK for communicating with Segmentation (SAM2, Grounding DINO) and Pose Estimation (NVIDIA FoundationPose) servers over RPC/ZeroMQ. It also provides SSH/SFTP utilities to upload mesh files to the host.
51
+
52
+ - Website: http://www.neuromeka.com
53
+ - PyPI package: https://pypi.org/project/neuromeka_vfm/
54
+ - Documents: https://docs.neuromeka.com
55
+
56
+ ## Installation
57
+
58
+ ```bash
59
+ pip install neuromeka_vfm
60
+ ```
61
+
62
+ ## Python API (usage by example)
63
+
64
+ - Client PC: the machine running your application with this package installed.
65
+ - Host PC: the machine running Segmentation and Pose Estimation Docker servers. If you run Docker locally, use `localhost`.
66
+
67
+ ### Segmentation
68
+
69
+ ```python
70
+ from neuromeka_vfm import Segmentation
71
+
72
+ seg = Segmentation(
73
+ hostname="192.168.10.63",
74
+ port=5432,
75
+ compression_strategy="png", # none | png | jpeg | h264
76
+ )
77
+
78
+ # Register using an image prompt
79
+ seg.add_image_prompt("drug_box", ref_rgb)
80
+ seg.register_first_frame(
81
+ frame=first_rgb,
82
+ prompt="drug_box", # ID string
83
+ use_image_prompt=True,
84
+ )
85
+
86
+ # Register using a text prompt
87
+ seg.register_first_frame(
88
+ frame=first_rgb,
89
+ prompt="box .", # Text prompt (must end with " .")
90
+ use_image_prompt=False,
91
+ )
92
+
93
+ # SAM2 tracking on the registered mask(s)
94
+ resp = seg.get_next(next_rgb)
95
+ if isinstance(resp, dict) and resp.get("result") == "ERROR":
96
+ print(f"Tracking error: {resp.get('message')}")
97
+ seg.reset()
98
+ else:
99
+ masks = resp
100
+
101
+ # Segmentation settings / model selection (nrmk_realtime_segmentation v0.2+)
102
+ caps = seg.get_capabilities()["data"]
103
+ current = seg.get_config()["data"]
104
+ seg.set_config(
105
+ {
106
+ "grounding_dino": {
107
+ "backbone": "Swin-B", # Swin-T | Swin-B
108
+ "box_threshold": 0.35,
109
+ "text_threshold": 0.25,
110
+ },
111
+ "dino_detection": {
112
+ "threshold": 0.5,
113
+ "target_multiplier": 25,
114
+ "img_multiplier": 50,
115
+ "background_threshold": -1.0,
116
+ "final_erosion_count": 10,
117
+ "segment_min_size": 20,
118
+ },
119
+ "sam2": {
120
+ "model": "facebook/sam2.1-hiera-large",
121
+ "use_legacy": False,
122
+ "compile": False,
123
+ "offload_state_to_cpu": False,
124
+ "offload_video_to_cpu": False,
125
+ },
126
+ }
127
+ )
128
+
129
+ # Remove an object (v0.2+, only when use_legacy=False)
130
+ seg.remove_object("cup_0")
131
+
132
+ seg.close()
133
+ ```
134
+
135
+ Additional Segmentation APIs and behaviors
136
+
137
+ - `benchmark=True` in the constructor enables timing counters (`call_time`, `call_count`) for `add_image_prompt`, `register_first_frame`, and `get_next`.
138
+ - `switch_compression_strategy()` lets you change the compression strategy at runtime.
139
+ - `register_first_frame()` returns `True`/`False` and raises `ValueError` if image prompts are missing when `use_image_prompt=True`.
140
+ - `register_first_frame()` accepts a list of prompt IDs when `use_image_prompt=True`.
141
+ - `get_next()` returns `None` if called before registration; it can also return the server error dict when available.
142
+ - `reset()` performs a server-side reset, while `finish()` clears only local state.
143
+ - Exposed state: `tracking_object_ids`, `current_frame_masks`, `invisible_object_ids`.
144
+ - Backward-compat alias: `NrmkRealtimeSegmentation`.
145
+
146
+ #### Segmentation v0.2 config summary (defaults/choices)
147
+ `seg.get_capabilities()` can differ depending on server configuration. The following reflects v0.2 defaults.
148
+
149
+ ```yaml
150
+ grounding_dino:
151
+ backbone:
152
+ choices:
153
+ - Swin-B
154
+ - Swin-T
155
+ default: Swin-T
156
+ box_threshold:
157
+ default: 0.35
158
+ min: 0.0
159
+ max: 1.0
160
+ text_threshold:
161
+ default: 0.25
162
+ min: 0.0
163
+ max: 1.0
164
+
165
+ dino_detection:
166
+ threshold:
167
+ default: 0.5
168
+ target_multiplier:
169
+ default: 25
170
+ img_multiplier:
171
+ default: 50
172
+ background_threshold:
173
+ default: -1.0
174
+ final_erosion_count:
175
+ default: 10
176
+ segment_min_size:
177
+ default: 20
178
+
179
+ sam2:
180
+ model:
181
+ choices:
182
+ - facebook/sam2-hiera-base-plus
183
+ - facebook/sam2-hiera-large
184
+ - facebook/sam2-hiera-small
185
+ - facebook/sam2-hiera-tiny
186
+ - facebook/sam2.1-hiera-base-plus
187
+ - facebook/sam2.1-hiera-large
188
+ - facebook/sam2.1-hiera-small
189
+ - facebook/sam2.1-hiera-tiny
190
+ default: facebook/sam2.1-hiera-large
191
+ use_legacy:
192
+ default: false
193
+ compile:
194
+ default: false
195
+ offload_state_to_cpu:
196
+ default: false
197
+ offload_video_to_cpu:
198
+ default: false
199
+ ```
200
+
201
+ #### Segmentation v0.2 notes and changes
202
+
203
+ - If SAM2 VRAM estimation fails, `seg.get_next()` may return `{"result":"ERROR"}`. Handle the error and call `reset` before re-registering.
204
+ - `compile=True` can slow down first-frame registration and `reset`.
205
+ - CPU offloading is most effective when both `offload_state_to_cpu=True` and `offload_video_to_cpu=True` are set (legacy mode does not support `offload_video_to_cpu`).
206
+ - `remove_object` is supported only when `use_legacy=False`.
207
+ - GroundingDINO added the Swin-B backbone and fixed prompt-token merge issues.
208
+
209
+ ### Pose Estimation
210
+
211
+ **Mesh upload**: Upload the mesh file (STL) to `/opt/meshes/` on the host PC. You can also use SSH directly.
212
+
213
+ ```python
214
+ from neuromeka_vfm import upload_mesh
215
+
216
+ upload_mesh(
217
+ host="192.168.10.63",
218
+ user="user",
219
+ password="pass",
220
+ local="mesh/my_mesh.stl", # local mesh path
221
+ remote="/opt/meshes/my_mesh.stl", # host mesh path (Docker volume)
222
+ )
223
+ ```
224
+
225
+ Initialization
226
+
227
+ ```python
228
+ from neuromeka_vfm import PoseEstimation
229
+
230
+ pose = PoseEstimation(host="192.168.10.72", port=5557)
231
+
232
+ pose.init(
233
+ mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
234
+ apply_scale=1.0,
235
+ track_refine_iter=3,
236
+ min_n_views=40,
237
+ inplane_step=60,
238
+ )
239
+ ```
240
+
241
+ - mesh_path: path to the mesh file (STL/OBJ). Initialization fails if missing.
242
+ - apply_scale: scalar applied after loading the mesh.
243
+ - STL in meters: 1.0 (no scaling)
244
+ - STL in centimeters: 0.01 (1 cm -> 0.01 m)
245
+ - STL in millimeters: 0.001 (1 mm -> 0.001 m)
246
+ - force_apply_color: if True, forces a solid color when the mesh lacks color data.
247
+ - apply_color: RGB tuple (0-255) used when `force_apply_color=True`.
248
+ - est_refine_iter: number of refinement iterations during registration (higher = more accurate, slower).
249
+ - track_refine_iter: number of refinement iterations per frame during tracking.
250
+ - min_n_views: minimum number of sampled camera views (affects rotation candidates).
251
+ - inplane_step: in-plane rotation step in degrees (smaller = more candidates).
252
+
253
+ Registration and tracking
254
+
255
+ ```python
256
+ # Registration (server defaults when iteration is omitted, check_vram=True pre-checks VRAM)
257
+ register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
258
+
259
+ # Tracking (optionally limit search area with bbox_xywh)
260
+ track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
261
+
262
+ pose.close()
263
+ ```
264
+
265
+ - cam_K: camera intrinsics.
266
+ - Large RGB resolution, large `min_n_views`, or small `inplane_step` can cause GPU VRAM errors.
267
+ - `check_vram=True` in `register` performs a pre-check to prevent server shutdown due to OOM.
268
+ - `iteration` in `register`/`track` can override the server default if provided.
269
+ - `reset()` resets the server state; `reset_object()` reuses the cached mesh to rebuild the rotation grid.
270
+ - Default host/port can come from `FPOSE_HOST` and `FPOSE_PORT` environment variables.
271
+ - Backward-compat alias: `FoundationPoseClient`.
272
+
273
+ <!--
274
+ ## Benchmark
275
+
276
+ Measured on local servers. Empty cells are not yet measured.
277
+
278
+ **RTX 5060**
279
+ | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
280
+ | --- | --- | --- | --- | --- | --- |
281
+ | Grounding DINO | text (human . cup .) | 0.86 | 0.35 | 0.50 | 0.52 |
282
+ | DINOv2 | image prompt | 0.85 | 0.49 | 0.65 | 0.63 |
283
+ | SAM2 | - | | | | |
284
+ | FoundationPose registration | - | | | | |
285
+ | FoundationPose track | - | | | | |
286
+
287
+ **RTX 5090**
288
+ | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
289
+ | --- | --- | --- | --- | --- | --- |
290
+ | Grounding DINO | text (human . cup .) | | | | |
291
+ | DINOv2 | image prompt | | | | |
292
+ | SAM2 | - | | | | |
293
+ | FoundationPose registration | - | 0.4 | - | | |
294
+ | FoundationPose track | - | 0.03 | | | |
295
+ -->
296
+
297
+ ## Release notes
298
+
299
+ - 0.1.2: Improved success detection for Segmentation responses (`result`/`success`/`status`), fixed image prompt registration/usage, added `check_vram` to PoseEstimation `register`.
300
+ - 0.1.1: Improved resource cleanup in PoseEstimation/Segmentation, use server defaults when iteration is omitted, added pose demo example.
301
+ - 0.1.0: Initial public release. Includes FoundationPose RPC client, real-time segmentation client, SSH-based mesh upload CLI/API.
@@ -0,0 +1,254 @@
1
+ # neuromeka_vfm
2
+
3
+ A lightweight client SDK for communicating with Segmentation (SAM2, Grounding DINO) and Pose Estimation (NVIDIA FoundationPose) servers over RPC/ZeroMQ. It also provides SSH/SFTP utilities to upload mesh files to the host.
4
+
5
+ - Website: http://www.neuromeka.com
6
+ - PyPI package: https://pypi.org/project/neuromeka_vfm/
7
+ - Documents: https://docs.neuromeka.com
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install neuromeka_vfm
13
+ ```
14
+
15
+ ## Python API (usage by example)
16
+
17
+ - Client PC: the machine running your application with this package installed.
18
+ - Host PC: the machine running Segmentation and Pose Estimation Docker servers. If you run Docker locally, use `localhost`.
19
+
20
+ ### Segmentation
21
+
22
+ ```python
23
+ from neuromeka_vfm import Segmentation
24
+
25
+ seg = Segmentation(
26
+ hostname="192.168.10.63",
27
+ port=5432,
28
+ compression_strategy="png", # none | png | jpeg | h264
29
+ )
30
+
31
+ # Register using an image prompt
32
+ seg.add_image_prompt("drug_box", ref_rgb)
33
+ seg.register_first_frame(
34
+ frame=first_rgb,
35
+ prompt="drug_box", # ID string
36
+ use_image_prompt=True,
37
+ )
38
+
39
+ # Register using a text prompt
40
+ seg.register_first_frame(
41
+ frame=first_rgb,
42
+ prompt="box .", # Text prompt (must end with " .")
43
+ use_image_prompt=False,
44
+ )
45
+
46
+ # SAM2 tracking on the registered mask(s)
47
+ resp = seg.get_next(next_rgb)
48
+ if isinstance(resp, dict) and resp.get("result") == "ERROR":
49
+ print(f"Tracking error: {resp.get('message')}")
50
+ seg.reset()
51
+ else:
52
+ masks = resp
53
+
54
+ # Segmentation settings / model selection (nrmk_realtime_segmentation v0.2+)
55
+ caps = seg.get_capabilities()["data"]
56
+ current = seg.get_config()["data"]
57
+ seg.set_config(
58
+ {
59
+ "grounding_dino": {
60
+ "backbone": "Swin-B", # Swin-T | Swin-B
61
+ "box_threshold": 0.35,
62
+ "text_threshold": 0.25,
63
+ },
64
+ "dino_detection": {
65
+ "threshold": 0.5,
66
+ "target_multiplier": 25,
67
+ "img_multiplier": 50,
68
+ "background_threshold": -1.0,
69
+ "final_erosion_count": 10,
70
+ "segment_min_size": 20,
71
+ },
72
+ "sam2": {
73
+ "model": "facebook/sam2.1-hiera-large",
74
+ "use_legacy": False,
75
+ "compile": False,
76
+ "offload_state_to_cpu": False,
77
+ "offload_video_to_cpu": False,
78
+ },
79
+ }
80
+ )
81
+
82
+ # Remove an object (v0.2+, only when use_legacy=False)
83
+ seg.remove_object("cup_0")
84
+
85
+ seg.close()
86
+ ```
87
+
88
+ Additional Segmentation APIs and behaviors
89
+
90
+ - `benchmark=True` in the constructor enables timing counters (`call_time`, `call_count`) for `add_image_prompt`, `register_first_frame`, and `get_next`.
91
+ - `switch_compression_strategy()` lets you change the compression strategy at runtime.
92
+ - `register_first_frame()` returns `True`/`False` and raises `ValueError` if image prompts are missing when `use_image_prompt=True`.
93
+ - `register_first_frame()` accepts a list of prompt IDs when `use_image_prompt=True`.
94
+ - `get_next()` returns `None` if called before registration; it can also return the server error dict when available.
95
+ - `reset()` performs a server-side reset, while `finish()` clears only local state.
96
+ - Exposed state: `tracking_object_ids`, `current_frame_masks`, `invisible_object_ids`.
97
+ - Backward-compat alias: `NrmkRealtimeSegmentation`.
98
+
99
+ #### Segmentation v0.2 config summary (defaults/choices)
100
+ `seg.get_capabilities()` can differ depending on server configuration. The following reflects v0.2 defaults.
101
+
102
+ ```yaml
103
+ grounding_dino:
104
+ backbone:
105
+ choices:
106
+ - Swin-B
107
+ - Swin-T
108
+ default: Swin-T
109
+ box_threshold:
110
+ default: 0.35
111
+ min: 0.0
112
+ max: 1.0
113
+ text_threshold:
114
+ default: 0.25
115
+ min: 0.0
116
+ max: 1.0
117
+
118
+ dino_detection:
119
+ threshold:
120
+ default: 0.5
121
+ target_multiplier:
122
+ default: 25
123
+ img_multiplier:
124
+ default: 50
125
+ background_threshold:
126
+ default: -1.0
127
+ final_erosion_count:
128
+ default: 10
129
+ segment_min_size:
130
+ default: 20
131
+
132
+ sam2:
133
+ model:
134
+ choices:
135
+ - facebook/sam2-hiera-base-plus
136
+ - facebook/sam2-hiera-large
137
+ - facebook/sam2-hiera-small
138
+ - facebook/sam2-hiera-tiny
139
+ - facebook/sam2.1-hiera-base-plus
140
+ - facebook/sam2.1-hiera-large
141
+ - facebook/sam2.1-hiera-small
142
+ - facebook/sam2.1-hiera-tiny
143
+ default: facebook/sam2.1-hiera-large
144
+ use_legacy:
145
+ default: false
146
+ compile:
147
+ default: false
148
+ offload_state_to_cpu:
149
+ default: false
150
+ offload_video_to_cpu:
151
+ default: false
152
+ ```
153
+
154
+ #### Segmentation v0.2 notes and changes
155
+
156
+ - If SAM2 VRAM estimation fails, `seg.get_next()` may return `{"result":"ERROR"}`. Handle the error and call `reset` before re-registering.
157
+ - `compile=True` can slow down first-frame registration and `reset`.
158
+ - CPU offloading is most effective when both `offload_state_to_cpu=True` and `offload_video_to_cpu=True` are set (legacy mode does not support `offload_video_to_cpu`).
159
+ - `remove_object` is supported only when `use_legacy=False`.
160
+ - GroundingDINO added the Swin-B backbone and fixed prompt-token merge issues.
161
+
162
+ ### Pose Estimation
163
+
164
+ **Mesh upload**: Upload the mesh file (STL) to `/opt/meshes/` on the host PC. You can also use SSH directly.
165
+
166
+ ```python
167
+ from neuromeka_vfm import upload_mesh
168
+
169
+ upload_mesh(
170
+ host="192.168.10.63",
171
+ user="user",
172
+ password="pass",
173
+ local="mesh/my_mesh.stl", # local mesh path
174
+ remote="/opt/meshes/my_mesh.stl", # host mesh path (Docker volume)
175
+ )
176
+ ```
177
+
178
+ Initialization
179
+
180
+ ```python
181
+ from neuromeka_vfm import PoseEstimation
182
+
183
+ pose = PoseEstimation(host="192.168.10.72", port=5557)
184
+
185
+ pose.init(
186
+ mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
187
+ apply_scale=1.0,
188
+ track_refine_iter=3,
189
+ min_n_views=40,
190
+ inplane_step=60,
191
+ )
192
+ ```
193
+
194
+ - mesh_path: path to the mesh file (STL/OBJ). Initialization fails if missing.
195
+ - apply_scale: scalar applied after loading the mesh.
196
+ - STL in meters: 1.0 (no scaling)
197
+ - STL in centimeters: 0.01 (1 cm -> 0.01 m)
198
+ - STL in millimeters: 0.001 (1 mm -> 0.001 m)
199
+ - force_apply_color: if True, forces a solid color when the mesh lacks color data.
200
+ - apply_color: RGB tuple (0-255) used when `force_apply_color=True`.
201
+ - est_refine_iter: number of refinement iterations during registration (higher = more accurate, slower).
202
+ - track_refine_iter: number of refinement iterations per frame during tracking.
203
+ - min_n_views: minimum number of sampled camera views (affects rotation candidates).
204
+ - inplane_step: in-plane rotation step in degrees (smaller = more candidates).
205
+
206
+ Registration and tracking
207
+
208
+ ```python
209
+ # Registration (server defaults when iteration is omitted, check_vram=True pre-checks VRAM)
210
+ register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
211
+
212
+ # Tracking (optionally limit search area with bbox_xywh)
213
+ track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
214
+
215
+ pose.close()
216
+ ```
217
+
218
+ - cam_K: camera intrinsics.
219
+ - Large RGB resolution, large `min_n_views`, or small `inplane_step` can cause GPU VRAM errors.
220
+ - `check_vram=True` in `register` performs a pre-check to prevent server shutdown due to OOM.
221
+ - `iteration` in `register`/`track` can override the server default if provided.
222
+ - `reset()` resets the server state; `reset_object()` reuses the cached mesh to rebuild the rotation grid.
223
+ - Default host/port can come from `FPOSE_HOST` and `FPOSE_PORT` environment variables.
224
+ - Backward-compat alias: `FoundationPoseClient`.
225
+
226
+ <!--
227
+ ## Benchmark
228
+
229
+ Measured on local servers. Empty cells are not yet measured.
230
+
231
+ **RTX 5060**
232
+ | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
233
+ | --- | --- | --- | --- | --- | --- |
234
+ | Grounding DINO | text (human . cup .) | 0.86 | 0.35 | 0.50 | 0.52 |
235
+ | DINOv2 | image prompt | 0.85 | 0.49 | 0.65 | 0.63 |
236
+ | SAM2 | - | | | | |
237
+ | FoundationPose registration | - | | | | |
238
+ | FoundationPose track | - | | | | |
239
+
240
+ **RTX 5090**
241
+ | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
242
+ | --- | --- | --- | --- | --- | --- |
243
+ | Grounding DINO | text (human . cup .) | | | | |
244
+ | DINOv2 | image prompt | | | | |
245
+ | SAM2 | - | | | | |
246
+ | FoundationPose registration | - | 0.4 | - | | |
247
+ | FoundationPose track | - | 0.03 | | | |
248
+ -->
249
+
250
+ ## Release notes
251
+
252
+ - 0.1.2: Improved success detection for Segmentation responses (`result`/`success`/`status`), fixed image prompt registration/usage, added `check_vram` to PoseEstimation `register`.
253
+ - 0.1.1: Improved resource cleanup in PoseEstimation/Segmentation, use server defaults when iteration is omitted, added pose demo example.
254
+ - 0.1.0: Initial public release. Includes FoundationPose RPC client, real-time segmentation client, SSH-based mesh upload CLI/API.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "neuromeka_vfm"
7
- version = "0.1.4"
7
+ version = "0.1.6"
8
8
  description = "Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -13,7 +13,9 @@ dependencies = [
13
13
  "numpy",
14
14
  "pyzmq",
15
15
  "paramiko",
16
- "av",
16
+ "av",
17
+ "trimesh",
18
+ "tqdm",
17
19
  ]
18
20
  authors = [{name = "Neuromeka"}]
19
21
  classifiers = [
@@ -0,0 +1,122 @@
1
+ """
2
+ Utility to generate simple parametric meshes (currently rectangular box) as binary STL.
3
+
4
+ Design goals
5
+ - Units: meters
6
+ - Origin: object center at (0, 0, 0)
7
+ - Axes: faces aligned to +/-X, +/-Y, +/-Z
8
+ - Output: binary STL saved to /opt/meshes (docker volume mount)
9
+
10
+ Usage (programmatic):
11
+ from backend.generate_mesh import write_box_stl
12
+ path = write_box_stl("custom_box.stl", width=0.054, depth=0.097, height=0.054)
13
+
14
+ CLI (optional):
15
+ python -m backend.generate_mesh box custom_box.stl 0.054 0.097 0.054
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import struct
21
+ import sys
22
+ from pathlib import Path
23
+ from typing import Iterable, Tuple
24
+
25
+ MESH_DIR = Path("/opt/meshes")
26
+
27
+
28
+ def _ensure_dir(path: Path) -> None:
29
+ path.parent.mkdir(parents=True, exist_ok=True)
30
+
31
+
32
+ def _pack_triangle(normal: Iterable[float], v1: Iterable[float], v2: Iterable[float], v3: Iterable[float]) -> bytes:
33
+ """Pack one triangle (normal + 3 vertices) into binary STL record."""
34
+ return struct.pack(
35
+ "<12fH",
36
+ *normal,
37
+ *v1,
38
+ *v2,
39
+ *v3,
40
+ 0, # attribute byte count
41
+ )
42
+
43
+
44
+ def _box_triangles(width: float, depth: float, height: float):
45
+ """Generate normals and vertices for a box centered at origin."""
46
+ hx, hy, hz = width / 2.0, depth / 2.0, height / 2.0
47
+ # 8 vertices
48
+ p = [
49
+ (-hx, -hy, -hz),
50
+ (hx, -hy, -hz),
51
+ (hx, hy, -hz),
52
+ (-hx, hy, -hz),
53
+ (-hx, -hy, hz),
54
+ (hx, -hy, hz),
55
+ (hx, hy, hz),
56
+ (-hx, hy, hz),
57
+ ]
58
+ # Each face: two triangles (ccw when looking from outside)
59
+ faces = [
60
+ ((-1, 0, 0), (0, 1, 3, 7, 4)), # -X
61
+ ((1, 0, 0), (1, 2, 6, 5)), # +X
62
+ ((0, -1, 0), (0, 1, 5, 4)), # -Y
63
+ ((0, 1, 0), (3, 2, 6, 7)), # +Y
64
+ ((0, 0, -1), (0, 1, 2, 3)), # -Z
65
+ ((0, 0, 1), (4, 5, 6, 7)), # +Z
66
+ ]
67
+ for normal, idx in faces:
68
+ if len(idx) == 4:
69
+ a, b, c, d = idx
70
+ # two triangles: (a,b,c) and (a,c,d)
71
+ yield normal, p[a], p[b], p[c]
72
+ yield normal, p[a], p[c], p[d]
73
+ else:
74
+ raise ValueError("Face index must have 4 vertices.")
75
+
76
+
77
+ def write_box_stl(filename: str, width: float, depth: float, height: float) -> Path:
78
+ """
79
+ Create a rectangular box STL.
80
+
81
+ Args:
82
+ filename: output file name (placed under /opt/meshes). If only a name is
83
+ given, it is resolved relative to MESH_DIR.
84
+ width, depth, height: box dimensions in meters (must be > 0).
85
+
86
+ Returns:
87
+ Path to the written STL file.
88
+ """
89
+ if width <= 0 or depth <= 0 or height <= 0:
90
+ raise ValueError("width, depth, height must be positive.")
91
+
92
+ out_path = Path(filename)
93
+ if not out_path.is_absolute():
94
+ out_path = MESH_DIR / out_path
95
+ _ensure_dir(out_path)
96
+
97
+ triangles = list(_box_triangles(width, depth, height))
98
+ header = b"rect_box_stl" + b"\0" * (80 - len("rect_box_stl"))
99
+ with out_path.open("wb") as f:
100
+ f.write(header)
101
+ f.write(struct.pack("<I", len(triangles)))
102
+ for tri in triangles:
103
+ f.write(_pack_triangle(*tri))
104
+ return out_path
105
+
106
+
107
+ def _cli(args: list[str]) -> int:
108
+ if len(args) != 5 or args[0].lower() != "box":
109
+ print("Usage: python -m backend.generate_mesh box <filename> <width> <depth> <height>")
110
+ return 1
111
+ _, fname, w, d, h = args
112
+ try:
113
+ path = write_box_stl(fname, float(w), float(d), float(h))
114
+ except Exception as exc: # noqa: BLE001
115
+ print(f"Error: {exc}")
116
+ return 1
117
+ print(f"STL written to: {path}")
118
+ return 0
119
+
120
+
121
+ if __name__ == "__main__":
122
+ sys.exit(_cli(sys.argv[1:]))