neuromeka-vfm 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {neuromeka_vfm-0.1.5/src/neuromeka_vfm.egg-info → neuromeka_vfm-0.1.6}/PKG-INFO +89 -73
  2. neuromeka_vfm-0.1.6/README.md +254 -0
  3. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/pyproject.toml +1 -1
  4. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/__init__.py +0 -2
  5. neuromeka_vfm-0.1.6/src/neuromeka_vfm/generate_mesh.py +122 -0
  6. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6/src/neuromeka_vfm.egg-info}/PKG-INFO +89 -73
  7. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/SOURCES.txt +1 -1
  8. neuromeka_vfm-0.1.5/README.md +0 -238
  9. neuromeka_vfm-0.1.5/src/neuromeka_vfm/grasp_gen.py +0 -79
  10. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/LICENSE +0 -0
  11. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/setup.cfg +0 -0
  12. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/compression.py +0 -0
  13. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/examples/__init__.py +0 -0
  14. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/examples/pose_demo.py +0 -0
  15. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/pickle_client.py +0 -0
  16. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/point_cloud_utils.py +0 -0
  17. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/pose_estimation.py +0 -0
  18. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/segmentation.py +0 -0
  19. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm/upload_mesh.py +0 -0
  20. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/dependency_links.txt +0 -0
  21. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/entry_points.txt +0 -0
  22. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/requires.txt +0 -0
  23. {neuromeka_vfm-0.1.5 → neuromeka_vfm-0.1.6}/src/neuromeka_vfm.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: neuromeka_vfm
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)
5
5
  Author: Neuromeka
6
6
  License: MIT License
@@ -47,50 +47,50 @@ Dynamic: license-file
47
47
 
48
48
  # neuromeka_vfm
49
49
 
50
- 클라이언트 PC에서 Segmentation (SAM2, Grounding DINO), Pose Estimation (NVIDIA FoundationPose) 서버(RPC, ZeroMQ)와 통신하고, SSH/SFTP 호스트에 mesh 업로드하는 간단한 유틸 패키지입니다.
50
+ A lightweight client SDK for communicating with Segmentation (SAM2, Grounding DINO) and Pose Estimation (NVIDIA FoundationPose) servers over RPC/ZeroMQ. It also provides SSH/SFTP utilities to upload mesh files to the host.
51
51
 
52
52
  - Website: http://www.neuromeka.com
53
- - Source code: https://github.com/neuromeka-robotics/neuromeka_vfm
54
53
  - PyPI package: https://pypi.org/project/neuromeka_vfm/
55
54
  - Documents: https://docs.neuromeka.com
56
55
 
57
- ## Web UI (VFM Tester)를 통해 사용 가능
58
-
59
- - VFM Tester (Web UI): https://gitlab.com/neuromeka-group/nrmkq/nrmk_vfm_tester
60
-
61
-
62
56
  ## Installation
57
+
63
58
  ```bash
64
59
  pip install neuromeka_vfm
65
60
  ```
66
61
 
67
- ## Python API (예제로 보는 사용법)
62
+ ## Python API (usage by example)
68
63
 
69
- * PC: 어플리케이션을 구현하고 패키지 (neuromeka_vfm)이 설치된 PC
70
- * 서버PC (Host): Segmentation, Pose Estimation 도커 서버가 설치된 PC. PC에 도커를 설치할 경우 localhost 사용.
64
+ - Client PC: the machine running your application with this package installed.
65
+ - Host PC: the machine running Segmentation and Pose Estimation Docker servers. If you run Docker locally, use `localhost`.
71
66
 
72
67
  ### Segmentation
68
+
73
69
  ```python
74
70
  from neuromeka_vfm import Segmentation
75
71
 
76
72
  seg = Segmentation(
77
- hostname="192.168.10.63",
73
+ hostname="192.168.10.63",
78
74
  port=5432,
79
75
  compression_strategy="png", # none | png | jpeg | h264
80
76
  )
81
77
 
82
- # Image Prompt를 이용한 등록
78
+ # Register using an image prompt
83
79
  seg.add_image_prompt("drug_box", ref_rgb)
84
- seg.register_first_frame(frame=first_rgb,
85
- prompt="drug_box", # ID str
86
- use_image_prompt=True)
80
+ seg.register_first_frame(
81
+ frame=first_rgb,
82
+ prompt="drug_box", # ID string
83
+ use_image_prompt=True,
84
+ )
87
85
 
88
- # Text Prompt를 이용한 등록
89
- seg.register_first_frame(frame=first_rgb,
90
- prompt="box .", # Text prompt (끝에 띄어쓰기 . 필수)
91
- use_image_prompt=False)
86
+ # Register using a text prompt
87
+ seg.register_first_frame(
88
+ frame=first_rgb,
89
+ prompt="box .", # Text prompt (must end with " .")
90
+ use_image_prompt=False,
91
+ )
92
92
 
93
- # 등록된 mask에 대한 SAM2 tracking
93
+ # SAM2 tracking on the registered mask(s)
94
94
  resp = seg.get_next(next_rgb)
95
95
  if isinstance(resp, dict) and resp.get("result") == "ERROR":
96
96
  print(f"Tracking error: {resp.get('message')}")
@@ -98,7 +98,7 @@ if isinstance(resp, dict) and resp.get("result") == "ERROR":
98
98
  else:
99
99
  masks = resp
100
100
 
101
- # Segmentation 설정/모델 선택 (nrmk_realtime_segmentation v0.2+)
101
+ # Segmentation settings / model selection (nrmk_realtime_segmentation v0.2+)
102
102
  caps = seg.get_capabilities()["data"]
103
103
  current = seg.get_config()["data"]
104
104
  seg.set_config(
@@ -126,15 +126,26 @@ seg.set_config(
126
126
  }
127
127
  )
128
128
 
129
- # SAM2 object 제거 (v0.2+, use_legacy=False에서만 지원)
129
+ # Remove an object (v0.2+, only when use_legacy=False)
130
130
  seg.remove_object("cup_0")
131
131
 
132
-
133
132
  seg.close()
134
133
  ```
135
134
 
136
- #### Segmentation v0.2 설정 요약 (defaults/choices)
137
- `seg.get_capabilities()` 결과는 서버 설정에 따라 달라질 수 있습니다. 아래는 v0.2 기본값입니다.
135
+ Additional Segmentation APIs and behaviors
136
+
137
+ - `benchmark=True` in the constructor enables timing counters (`call_time`, `call_count`) for `add_image_prompt`, `register_first_frame`, and `get_next`.
138
+ - `switch_compression_strategy()` lets you change the compression strategy at runtime.
139
+ - `register_first_frame()` returns `True`/`False` and raises `ValueError` if image prompts are missing when `use_image_prompt=True`.
140
+ - `register_first_frame()` accepts a list of prompt IDs when `use_image_prompt=True`.
141
+ - `get_next()` returns `None` if called before registration; it can also return the server error dict when available.
142
+ - `reset()` performs a server-side reset, while `finish()` clears only local state.
143
+ - Exposed state: `tracking_object_ids`, `current_frame_masks`, `invisible_object_ids`.
144
+ - Backward-compat alias: `NrmkRealtimeSegmentation`.
145
+
146
+ #### Segmentation v0.2 config summary (defaults/choices)
147
+ `seg.get_capabilities()` can differ depending on server configuration. The following reflects v0.2 defaults.
148
+
138
149
  ```yaml
139
150
  grounding_dino:
140
151
  backbone:
@@ -187,70 +198,82 @@ sam2:
187
198
  default: false
188
199
  ```
189
200
 
190
- #### Segmentation v0.2 주의사항/변경사항
191
- - SAM2 VRAM 추정 실패 시 `seg.get_next()`가 `{"result":"ERROR"}`로 반환될 수 있으니 에러 처리 후 `reset`/재등록을 권장합니다.
192
- - SAM2 `compile=True`는 프레임 등록 `reset`이 느려질 수 있습니다.
193
- - SAM2 CPU offloading은 `offload_state_to_cpu=True`와 `offload_video_to_cpu=True`를 함께 설정할 효과가 큽니다(legacy 모드에서는 `offload_video_to_cpu` 미지원).
194
- - SAM2 `remove_object`는 `use_legacy=False`에서만 지원됩니다.
195
- - GroundingDINO는 Swin-B 백본이 추가되었고, 프롬프트 토큰 병합 이슈가 수정되었습니다.
201
+ #### Segmentation v0.2 notes and changes
202
+
203
+ - If SAM2 VRAM estimation fails, `seg.get_next()` may return `{"result":"ERROR"}`. Handle the error and call `reset` before re-registering.
204
+ - `compile=True` can slow down first-frame registration and `reset`.
205
+ - CPU offloading is most effective when both `offload_state_to_cpu=True` and `offload_video_to_cpu=True` are set (legacy mode does not support `offload_video_to_cpu`).
206
+ - `remove_object` is supported only when `use_legacy=False`.
207
+ - GroundingDINO added the Swin-B backbone and fixed prompt-token merge issues.
196
208
 
197
209
  ### Pose Estimation
198
210
 
199
- **Mesh 파일 업로드**: 등록/인식하고자 하는 mesh 파일 (stl) 호스트PC의 '/opt/meshes/' 경로에 업로드 (직접 SSH 통해 파일을 옮겨도 됨)
211
+ **Mesh upload**: Upload the mesh file (STL) to `/opt/meshes/` on the host PC. You can also use SSH directly.
212
+
200
213
  ```python
201
214
  from neuromeka_vfm import upload_mesh
215
+
202
216
  upload_mesh(
203
217
  host="192.168.10.63",
204
218
  user="user",
205
- password="pass",
206
- local="mesh/my_mesh.stl", # PC mesh 경로
207
- remote="/opt/meshes/my_mesh.stl", # 호스트PC mesh 경로 (도커 볼륨마운트)
219
+ password="pass",
220
+ local="mesh/my_mesh.stl", # local mesh path
221
+ remote="/opt/meshes/my_mesh.stl", # host mesh path (Docker volume)
208
222
  )
209
223
  ```
210
224
 
211
- 초기화
225
+ Initialization
226
+
212
227
  ```python
213
228
  from neuromeka_vfm import PoseEstimation
214
229
 
215
- pose = PoseEstimation(host="192.168.10.72", port=5557)
230
+ pose = PoseEstimation(host="192.168.10.72", port=5557)
216
231
 
217
232
  pose.init(
218
233
  mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
219
- apply_scale=1.0,
234
+ apply_scale=1.0,
220
235
  track_refine_iter=3,
221
236
  min_n_views=40,
222
- inplane_step=60
237
+ inplane_step=60,
223
238
  )
224
239
  ```
225
- - mesh_path: 사용할 물체 메시 파일(STL/OBJ 등) 경로. 없으면 초기화 실패.
226
- - apply_scale: 메시를 로드한 전체를 배율 조정하는 스케일 값. 단위 없는 곱셈 계수.
227
- - STL 모델이 미터 단위라면 1.0 (스케일 없음)
228
- - STL 모델이 센티미터 단위라면 0.01 (1 cm → 0.01 m)
229
- - STL 모델이 밀리미터 단위라면 0.001 (1 mm 0.001 m)
230
- - force_apply_color: True일 메시에 단색 텍스처를 강제로 입힘. 메시가 색상을 안 가졌을 때 시각화 안정성을 위해 사용.
231
- - apply_color: force_apply_color가 True 적용할 RGB 색상값(0~255) 튜플.
232
- - est_refine_iter: 초기 등록(register) 단계에서 포즈를 반복 정련하는 횟수. 값이 클수록 정확도 ↑, 연산 시간 ↑.
233
- - track_refine_iter: 추적(track) 단계에서 프레임당 포즈 정련 반복 횟수.
234
- - min_n_views: 초기 샘플링 생성할 최소 카메라 뷰 수(회전 후보 수에 영향).
235
- - inplane_step: in-plane 회전 샘플링 간격(도 단위). 값이 작을수록 많은 회전 후보를 생성.
236
-
237
-
238
- 인식 추적
240
+
241
+ - mesh_path: path to the mesh file (STL/OBJ). Initialization fails if missing.
242
+ - apply_scale: scalar applied after loading the mesh.
243
+ - STL in meters: 1.0 (no scaling)
244
+ - STL in centimeters: 0.01 (1 cm -> 0.01 m)
245
+ - STL in millimeters: 0.001 (1 mm -> 0.001 m)
246
+ - force_apply_color: if True, forces a solid color when the mesh lacks color data.
247
+ - apply_color: RGB tuple (0-255) used when `force_apply_color=True`.
248
+ - est_refine_iter: number of refinement iterations during registration (higher = more accurate, slower).
249
+ - track_refine_iter: number of refinement iterations per frame during tracking.
250
+ - min_n_views: minimum number of sampled camera views (affects rotation candidates).
251
+ - inplane_step: in-plane rotation step in degrees (smaller = more candidates).
252
+
253
+ Registration and tracking
254
+
239
255
  ```python
240
- # 초기 등록 (iteration 생략 서버 기본값, check_vram=True VRAM 사전 체크)
256
+ # Registration (server defaults when iteration is omitted, check_vram=True pre-checks VRAM)
241
257
  register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
242
258
 
243
- # 추적 (bbox_xywh로 탐색 범위 제한 가능)
259
+ # Tracking (optionally limit search area with bbox_xywh)
244
260
  track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
261
+
245
262
  pose.close()
246
263
  ```
247
- - cam_K: camera intrinsic
248
- - RGB resolution이 크거나, min_n_views 값이 크거나, inplane_step이 작을 경우 GPU VRAM 초과 에러 발생.
249
- - register check_vram=True 일 경우 VRAM 초과 사전 검사하여 shutdown 방지.
250
264
 
265
+ - cam_K: camera intrinsics.
266
+ - Large RGB resolution, large `min_n_views`, or small `inplane_step` can cause GPU VRAM errors.
267
+ - `check_vram=True` in `register` performs a pre-check to prevent server shutdown due to OOM.
268
+ - `iteration` in `register`/`track` can override the server default if provided.
269
+ - `reset()` resets the server state; `reset_object()` reuses the cached mesh to rebuild the rotation grid.
270
+ - Default host/port can come from `FPOSE_HOST` and `FPOSE_PORT` environment variables.
271
+ - Backward-compat alias: `FoundationPoseClient`.
272
+
273
+ <!--
274
+ ## Benchmark
251
275
 
252
- ## VFM (Vision Foundation Model) latency benchmark
253
- 로컬 서버 구동 시 측정. 빈칸은 아직 미측정 항목입니다.
276
+ Measured on local servers. Empty cells are not yet measured.
254
277
 
255
278
  **RTX 5060**
256
279
  | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
@@ -269,17 +292,10 @@ pose.close()
269
292
  | SAM2 | - | | | | |
270
293
  | FoundationPose registration | - | 0.4 | - | | |
271
294
  | FoundationPose track | - | 0.03 | | | |
295
+ -->
272
296
 
273
- **Jetson Orin**
274
- | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
275
- | --- | --- | --- | --- | --- | --- |
276
- | Grounding DINO | text (human . cup .) | | | | |
277
- | DINOv2 | image prompt | | | | |
278
- | SAM2 | - | | | | |
279
- | FoundationPose registration | - | 0.4 | - | | |
280
- | FoundationPose track | - | 0.03 | | | |
297
+ ## Release notes
281
298
 
282
- ## 릴리스 노트
283
- - 0.1.2: Segmentation 응답 성공 판정 개선(`result`/`success`/`status` 모두 지원), image prompt 등록/사용 오류 수정, PoseEstimation `register`에 `check_vram` 옵션 반영.
284
- - 0.1.1: PoseEstimation/Segmentation에서 리소스 정리 개선, iteration 미전달 서버 기본값 사용, pose 데모 예제 추가.
285
- - 0.1.0: 초기 공개 버전. FoundationPose RPC 클라이언트, 실시간 세그멘테이션 클라이언트, SSH 기반 mesh 업로드 CLI/API 포함.
299
+ - 0.1.2: Improved success detection for Segmentation responses (`result`/`success`/`status`), fixed image prompt registration/usage, added `check_vram` to PoseEstimation `register`.
300
+ - 0.1.1: Improved resource cleanup in PoseEstimation/Segmentation, use server defaults when iteration is omitted, added pose demo example.
301
+ - 0.1.0: Initial public release. Includes FoundationPose RPC client, real-time segmentation client, SSH-based mesh upload CLI/API.
@@ -0,0 +1,254 @@
1
+ # neuromeka_vfm
2
+
3
+ A lightweight client SDK for communicating with Segmentation (SAM2, Grounding DINO) and Pose Estimation (NVIDIA FoundationPose) servers over RPC/ZeroMQ. It also provides SSH/SFTP utilities to upload mesh files to the host.
4
+
5
+ - Website: http://www.neuromeka.com
6
+ - PyPI package: https://pypi.org/project/neuromeka_vfm/
7
+ - Documents: https://docs.neuromeka.com
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install neuromeka_vfm
13
+ ```
14
+
15
+ ## Python API (usage by example)
16
+
17
+ - Client PC: the machine running your application with this package installed.
18
+ - Host PC: the machine running Segmentation and Pose Estimation Docker servers. If you run Docker locally, use `localhost`.
19
+
20
+ ### Segmentation
21
+
22
+ ```python
23
+ from neuromeka_vfm import Segmentation
24
+
25
+ seg = Segmentation(
26
+ hostname="192.168.10.63",
27
+ port=5432,
28
+ compression_strategy="png", # none | png | jpeg | h264
29
+ )
30
+
31
+ # Register using an image prompt
32
+ seg.add_image_prompt("drug_box", ref_rgb)
33
+ seg.register_first_frame(
34
+ frame=first_rgb,
35
+ prompt="drug_box", # ID string
36
+ use_image_prompt=True,
37
+ )
38
+
39
+ # Register using a text prompt
40
+ seg.register_first_frame(
41
+ frame=first_rgb,
42
+ prompt="box .", # Text prompt (must end with " .")
43
+ use_image_prompt=False,
44
+ )
45
+
46
+ # SAM2 tracking on the registered mask(s)
47
+ resp = seg.get_next(next_rgb)
48
+ if isinstance(resp, dict) and resp.get("result") == "ERROR":
49
+ print(f"Tracking error: {resp.get('message')}")
50
+ seg.reset()
51
+ else:
52
+ masks = resp
53
+
54
+ # Segmentation settings / model selection (nrmk_realtime_segmentation v0.2+)
55
+ caps = seg.get_capabilities()["data"]
56
+ current = seg.get_config()["data"]
57
+ seg.set_config(
58
+ {
59
+ "grounding_dino": {
60
+ "backbone": "Swin-B", # Swin-T | Swin-B
61
+ "box_threshold": 0.35,
62
+ "text_threshold": 0.25,
63
+ },
64
+ "dino_detection": {
65
+ "threshold": 0.5,
66
+ "target_multiplier": 25,
67
+ "img_multiplier": 50,
68
+ "background_threshold": -1.0,
69
+ "final_erosion_count": 10,
70
+ "segment_min_size": 20,
71
+ },
72
+ "sam2": {
73
+ "model": "facebook/sam2.1-hiera-large",
74
+ "use_legacy": False,
75
+ "compile": False,
76
+ "offload_state_to_cpu": False,
77
+ "offload_video_to_cpu": False,
78
+ },
79
+ }
80
+ )
81
+
82
+ # Remove an object (v0.2+, only when use_legacy=False)
83
+ seg.remove_object("cup_0")
84
+
85
+ seg.close()
86
+ ```
87
+
88
+ Additional Segmentation APIs and behaviors
89
+
90
+ - `benchmark=True` in the constructor enables timing counters (`call_time`, `call_count`) for `add_image_prompt`, `register_first_frame`, and `get_next`.
91
+ - `switch_compression_strategy()` lets you change the compression strategy at runtime.
92
+ - `register_first_frame()` returns `True`/`False` and raises `ValueError` if image prompts are missing when `use_image_prompt=True`.
93
+ - `register_first_frame()` accepts a list of prompt IDs when `use_image_prompt=True`.
94
+ - `get_next()` returns `None` if called before registration; it can also return the server error dict when available.
95
+ - `reset()` performs a server-side reset, while `finish()` clears only local state.
96
+ - Exposed state: `tracking_object_ids`, `current_frame_masks`, `invisible_object_ids`.
97
+ - Backward-compat alias: `NrmkRealtimeSegmentation`.
98
+
99
+ #### Segmentation v0.2 config summary (defaults/choices)
100
+ `seg.get_capabilities()` can differ depending on server configuration. The following reflects v0.2 defaults.
101
+
102
+ ```yaml
103
+ grounding_dino:
104
+ backbone:
105
+ choices:
106
+ - Swin-B
107
+ - Swin-T
108
+ default: Swin-T
109
+ box_threshold:
110
+ default: 0.35
111
+ min: 0.0
112
+ max: 1.0
113
+ text_threshold:
114
+ default: 0.25
115
+ min: 0.0
116
+ max: 1.0
117
+
118
+ dino_detection:
119
+ threshold:
120
+ default: 0.5
121
+ target_multiplier:
122
+ default: 25
123
+ img_multiplier:
124
+ default: 50
125
+ background_threshold:
126
+ default: -1.0
127
+ final_erosion_count:
128
+ default: 10
129
+ segment_min_size:
130
+ default: 20
131
+
132
+ sam2:
133
+ model:
134
+ choices:
135
+ - facebook/sam2-hiera-base-plus
136
+ - facebook/sam2-hiera-large
137
+ - facebook/sam2-hiera-small
138
+ - facebook/sam2-hiera-tiny
139
+ - facebook/sam2.1-hiera-base-plus
140
+ - facebook/sam2.1-hiera-large
141
+ - facebook/sam2.1-hiera-small
142
+ - facebook/sam2.1-hiera-tiny
143
+ default: facebook/sam2.1-hiera-large
144
+ use_legacy:
145
+ default: false
146
+ compile:
147
+ default: false
148
+ offload_state_to_cpu:
149
+ default: false
150
+ offload_video_to_cpu:
151
+ default: false
152
+ ```
153
+
154
+ #### Segmentation v0.2 notes and changes
155
+
156
+ - If SAM2 VRAM estimation fails, `seg.get_next()` may return `{"result":"ERROR"}`. Handle the error and call `reset` before re-registering.
157
+ - `compile=True` can slow down first-frame registration and `reset`.
158
+ - CPU offloading is most effective when both `offload_state_to_cpu=True` and `offload_video_to_cpu=True` are set (legacy mode does not support `offload_video_to_cpu`).
159
+ - `remove_object` is supported only when `use_legacy=False`.
160
+ - GroundingDINO added the Swin-B backbone and fixed prompt-token merge issues.
161
+
162
+ ### Pose Estimation
163
+
164
+ **Mesh upload**: Upload the mesh file (STL) to `/opt/meshes/` on the host PC. You can also use SSH directly.
165
+
166
+ ```python
167
+ from neuromeka_vfm import upload_mesh
168
+
169
+ upload_mesh(
170
+ host="192.168.10.63",
171
+ user="user",
172
+ password="pass",
173
+ local="mesh/my_mesh.stl", # local mesh path
174
+ remote="/opt/meshes/my_mesh.stl", # host mesh path (Docker volume)
175
+ )
176
+ ```
177
+
178
+ Initialization
179
+
180
+ ```python
181
+ from neuromeka_vfm import PoseEstimation
182
+
183
+ pose = PoseEstimation(host="192.168.10.72", port=5557)
184
+
185
+ pose.init(
186
+ mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
187
+ apply_scale=1.0,
188
+ track_refine_iter=3,
189
+ min_n_views=40,
190
+ inplane_step=60,
191
+ )
192
+ ```
193
+
194
+ - mesh_path: path to the mesh file (STL/OBJ). Initialization fails if missing.
195
+ - apply_scale: scalar applied after loading the mesh.
196
+ - STL in meters: 1.0 (no scaling)
197
+ - STL in centimeters: 0.01 (1 cm -> 0.01 m)
198
+ - STL in millimeters: 0.001 (1 mm -> 0.001 m)
199
+ - force_apply_color: if True, forces a solid color when the mesh lacks color data.
200
+ - apply_color: RGB tuple (0-255) used when `force_apply_color=True`.
201
+ - est_refine_iter: number of refinement iterations during registration (higher = more accurate, slower).
202
+ - track_refine_iter: number of refinement iterations per frame during tracking.
203
+ - min_n_views: minimum number of sampled camera views (affects rotation candidates).
204
+ - inplane_step: in-plane rotation step in degrees (smaller = more candidates).
205
+
206
+ Registration and tracking
207
+
208
+ ```python
209
+ # Registration (server defaults when iteration is omitted, check_vram=True pre-checks VRAM)
210
+ register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
211
+
212
+ # Tracking (optionally limit search area with bbox_xywh)
213
+ track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
214
+
215
+ pose.close()
216
+ ```
217
+
218
+ - cam_K: camera intrinsics.
219
+ - Large RGB resolution, large `min_n_views`, or small `inplane_step` can cause GPU VRAM errors.
220
+ - `check_vram=True` in `register` performs a pre-check to prevent server shutdown due to OOM.
221
+ - `iteration` in `register`/`track` can override the server default if provided.
222
+ - `reset()` resets the server state; `reset_object()` reuses the cached mesh to rebuild the rotation grid.
223
+ - Default host/port can come from `FPOSE_HOST` and `FPOSE_PORT` environment variables.
224
+ - Backward-compat alias: `FoundationPoseClient`.
225
+
226
+ <!--
227
+ ## Benchmark
228
+
229
+ Measured on local servers. Empty cells are not yet measured.
230
+
231
+ **RTX 5060**
232
+ | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
233
+ | --- | --- | --- | --- | --- | --- |
234
+ | Grounding DINO | text (human . cup .) | 0.86 | 0.35 | 0.50 | 0.52 |
235
+ | DINOv2 | image prompt | 0.85 | 0.49 | 0.65 | 0.63 |
236
+ | SAM2 | - | | | | |
237
+ | FoundationPose registration | - | | | | |
238
+ | FoundationPose track | - | | | | |
239
+
240
+ **RTX 5090**
241
+ | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
242
+ | --- | --- | --- | --- | --- | --- |
243
+ | Grounding DINO | text (human . cup .) | | | | |
244
+ | DINOv2 | image prompt | | | | |
245
+ | SAM2 | - | | | | |
246
+ | FoundationPose registration | - | 0.4 | - | | |
247
+ | FoundationPose track | - | 0.03 | | | |
248
+ -->
249
+
250
+ ## Release notes
251
+
252
+ - 0.1.2: Improved success detection for Segmentation responses (`result`/`success`/`status`), fixed image prompt registration/usage, added `check_vram` to PoseEstimation `register`.
253
+ - 0.1.1: Improved resource cleanup in PoseEstimation/Segmentation, use server defaults when iteration is omitted, added pose demo example.
254
+ - 0.1.0: Initial public release. Includes FoundationPose RPC client, real-time segmentation client, SSH-based mesh upload CLI/API.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "neuromeka_vfm"
7
- version = "0.1.5"
7
+ version = "0.1.6"
8
8
  description = "Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -2,7 +2,6 @@ from .pose_estimation import PoseEstimation, FoundationPoseClient
2
2
  from .upload_mesh import upload_mesh
3
3
  from .segmentation import Segmentation, NrmkRealtimeSegmentation
4
4
  from .compression import STRATEGIES as SEGMENTATION_COMPRESSION_STRATEGIES
5
- from .grasp_gen import GraspPoseGeneration
6
5
 
7
6
  __all__ = [
8
7
  "PoseEstimation",
@@ -11,5 +10,4 @@ __all__ = [
11
10
  "Segmentation",
12
11
  "NrmkRealtimeSegmentation",
13
12
  "SEGMENTATION_COMPRESSION_STRATEGIES",
14
- "GraspPoseGeneration",
15
13
  ]
@@ -0,0 +1,122 @@
1
+ """
2
+ Utility to generate simple parametric meshes (currently rectangular box) as binary STL.
3
+
4
+ Design goals
5
+ - Units: meters
6
+ - Origin: object center at (0, 0, 0)
7
+ - Axes: faces aligned to +/-X, +/-Y, +/-Z
8
+ - Output: binary STL saved to /opt/meshes (docker volume mount)
9
+
10
+ Usage (programmatic):
11
+ from backend.generate_mesh import write_box_stl
12
+ path = write_box_stl("custom_box.stl", width=0.054, depth=0.097, height=0.054)
13
+
14
+ CLI (optional):
15
+ python -m backend.generate_mesh box custom_box.stl 0.054 0.097 0.054
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import struct
21
+ import sys
22
+ from pathlib import Path
23
+ from typing import Iterable, Tuple
24
+
25
+ MESH_DIR = Path("/opt/meshes")
26
+
27
+
28
+ def _ensure_dir(path: Path) -> None:
29
+ path.parent.mkdir(parents=True, exist_ok=True)
30
+
31
+
32
+ def _pack_triangle(normal: Iterable[float], v1: Iterable[float], v2: Iterable[float], v3: Iterable[float]) -> bytes:
33
+ """Pack one triangle (normal + 3 vertices) into binary STL record."""
34
+ return struct.pack(
35
+ "<12fH",
36
+ *normal,
37
+ *v1,
38
+ *v2,
39
+ *v3,
40
+ 0, # attribute byte count
41
+ )
42
+
43
+
44
+ def _box_triangles(width: float, depth: float, height: float):
45
+ """Generate normals and vertices for a box centered at origin."""
46
+ hx, hy, hz = width / 2.0, depth / 2.0, height / 2.0
47
+ # 8 vertices
48
+ p = [
49
+ (-hx, -hy, -hz),
50
+ (hx, -hy, -hz),
51
+ (hx, hy, -hz),
52
+ (-hx, hy, -hz),
53
+ (-hx, -hy, hz),
54
+ (hx, -hy, hz),
55
+ (hx, hy, hz),
56
+ (-hx, hy, hz),
57
+ ]
58
+ # Each face: two triangles (ccw when looking from outside)
59
+ faces = [
60
+ ((-1, 0, 0), (0, 1, 3, 7, 4)), # -X
61
+ ((1, 0, 0), (1, 2, 6, 5)), # +X
62
+ ((0, -1, 0), (0, 1, 5, 4)), # -Y
63
+ ((0, 1, 0), (3, 2, 6, 7)), # +Y
64
+ ((0, 0, -1), (0, 1, 2, 3)), # -Z
65
+ ((0, 0, 1), (4, 5, 6, 7)), # +Z
66
+ ]
67
+ for normal, idx in faces:
68
+ if len(idx) == 4:
69
+ a, b, c, d = idx
70
+ # two triangles: (a,b,c) and (a,c,d)
71
+ yield normal, p[a], p[b], p[c]
72
+ yield normal, p[a], p[c], p[d]
73
+ else:
74
+ raise ValueError("Face index must have 4 vertices.")
75
+
76
+
77
+ def write_box_stl(filename: str, width: float, depth: float, height: float) -> Path:
78
+ """
79
+ Create a rectangular box STL.
80
+
81
+ Args:
82
+ filename: output file name (placed under /opt/meshes). If only a name is
83
+ given, it is resolved relative to MESH_DIR.
84
+ width, depth, height: box dimensions in meters (must be > 0).
85
+
86
+ Returns:
87
+ Path to the written STL file.
88
+ """
89
+ if width <= 0 or depth <= 0 or height <= 0:
90
+ raise ValueError("width, depth, height must be positive.")
91
+
92
+ out_path = Path(filename)
93
+ if not out_path.is_absolute():
94
+ out_path = MESH_DIR / out_path
95
+ _ensure_dir(out_path)
96
+
97
+ triangles = list(_box_triangles(width, depth, height))
98
+ header = b"rect_box_stl" + b"\0" * (80 - len("rect_box_stl"))
99
+ with out_path.open("wb") as f:
100
+ f.write(header)
101
+ f.write(struct.pack("<I", len(triangles)))
102
+ for tri in triangles:
103
+ f.write(_pack_triangle(*tri))
104
+ return out_path
105
+
106
+
107
+ def _cli(args: list[str]) -> int:
108
+ if len(args) != 5 or args[0].lower() != "box":
109
+ print("Usage: python -m backend.generate_mesh box <filename> <width> <depth> <height>")
110
+ return 1
111
+ _, fname, w, d, h = args
112
+ try:
113
+ path = write_box_stl(fname, float(w), float(d), float(h))
114
+ except Exception as exc: # noqa: BLE001
115
+ print(f"Error: {exc}")
116
+ return 1
117
+ print(f"STL written to: {path}")
118
+ return 0
119
+
120
+
121
+ if __name__ == "__main__":
122
+ sys.exit(_cli(sys.argv[1:]))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: neuromeka_vfm
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)
5
5
  Author: Neuromeka
6
6
  License: MIT License
@@ -47,50 +47,50 @@ Dynamic: license-file
47
47
 
48
48
  # neuromeka_vfm
49
49
 
50
- 클라이언트 PC에서 Segmentation (SAM2, Grounding DINO), Pose Estimation (NVIDIA FoundationPose) 서버(RPC, ZeroMQ)와 통신하고, SSH/SFTP 호스트에 mesh 업로드하는 간단한 유틸 패키지입니다.
50
+ A lightweight client SDK for communicating with Segmentation (SAM2, Grounding DINO) and Pose Estimation (NVIDIA FoundationPose) servers over RPC/ZeroMQ. It also provides SSH/SFTP utilities to upload mesh files to the host.
51
51
 
52
52
  - Website: http://www.neuromeka.com
53
- - Source code: https://github.com/neuromeka-robotics/neuromeka_vfm
54
53
  - PyPI package: https://pypi.org/project/neuromeka_vfm/
55
54
  - Documents: https://docs.neuromeka.com
56
55
 
57
- ## Web UI (VFM Tester)를 통해 사용 가능
58
-
59
- - VFM Tester (Web UI): https://gitlab.com/neuromeka-group/nrmkq/nrmk_vfm_tester
60
-
61
-
62
56
  ## Installation
57
+
63
58
  ```bash
64
59
  pip install neuromeka_vfm
65
60
  ```
66
61
 
67
- ## Python API (예제로 보는 사용법)
62
+ ## Python API (usage by example)
68
63
 
69
- * PC: 어플리케이션을 구현하고 패키지 (neuromeka_vfm)이 설치된 PC
70
- * 서버PC (Host): Segmentation, Pose Estimation 도커 서버가 설치된 PC. PC에 도커를 설치할 경우 localhost 사용.
64
+ - Client PC: the machine running your application with this package installed.
65
+ - Host PC: the machine running Segmentation and Pose Estimation Docker servers. If you run Docker locally, use `localhost`.
71
66
 
72
67
  ### Segmentation
68
+
73
69
  ```python
74
70
  from neuromeka_vfm import Segmentation
75
71
 
76
72
  seg = Segmentation(
77
- hostname="192.168.10.63",
73
+ hostname="192.168.10.63",
78
74
  port=5432,
79
75
  compression_strategy="png", # none | png | jpeg | h264
80
76
  )
81
77
 
82
- # Image Prompt를 이용한 등록
78
+ # Register using an image prompt
83
79
  seg.add_image_prompt("drug_box", ref_rgb)
84
- seg.register_first_frame(frame=first_rgb,
85
- prompt="drug_box", # ID str
86
- use_image_prompt=True)
80
+ seg.register_first_frame(
81
+ frame=first_rgb,
82
+ prompt="drug_box", # ID string
83
+ use_image_prompt=True,
84
+ )
87
85
 
88
- # Text Prompt를 이용한 등록
89
- seg.register_first_frame(frame=first_rgb,
90
- prompt="box .", # Text prompt (끝에 띄어쓰기 . 필수)
91
- use_image_prompt=False)
86
+ # Register using a text prompt
87
+ seg.register_first_frame(
88
+ frame=first_rgb,
89
+ prompt="box .", # Text prompt (must end with " .")
90
+ use_image_prompt=False,
91
+ )
92
92
 
93
- # 등록된 mask에 대한 SAM2 tracking
93
+ # SAM2 tracking on the registered mask(s)
94
94
  resp = seg.get_next(next_rgb)
95
95
  if isinstance(resp, dict) and resp.get("result") == "ERROR":
96
96
  print(f"Tracking error: {resp.get('message')}")
@@ -98,7 +98,7 @@ if isinstance(resp, dict) and resp.get("result") == "ERROR":
98
98
  else:
99
99
  masks = resp
100
100
 
101
- # Segmentation 설정/모델 선택 (nrmk_realtime_segmentation v0.2+)
101
+ # Segmentation settings / model selection (nrmk_realtime_segmentation v0.2+)
102
102
  caps = seg.get_capabilities()["data"]
103
103
  current = seg.get_config()["data"]
104
104
  seg.set_config(
@@ -126,15 +126,26 @@ seg.set_config(
126
126
  }
127
127
  )
128
128
 
129
- # SAM2 object 제거 (v0.2+, use_legacy=False에서만 지원)
129
+ # Remove an object (v0.2+, only when use_legacy=False)
130
130
  seg.remove_object("cup_0")
131
131
 
132
-
133
132
  seg.close()
134
133
  ```
135
134
 
136
- #### Segmentation v0.2 설정 요약 (defaults/choices)
137
- `seg.get_capabilities()` 결과는 서버 설정에 따라 달라질 수 있습니다. 아래는 v0.2 기본값입니다.
135
+ Additional Segmentation APIs and behaviors
136
+
137
+ - `benchmark=True` in the constructor enables timing counters (`call_time`, `call_count`) for `add_image_prompt`, `register_first_frame`, and `get_next`.
138
+ - `switch_compression_strategy()` lets you change the compression strategy at runtime.
139
+ - `register_first_frame()` returns `True`/`False` and raises `ValueError` if image prompts are missing when `use_image_prompt=True`.
140
+ - `register_first_frame()` accepts a list of prompt IDs when `use_image_prompt=True`.
141
+ - `get_next()` returns `None` if called before registration; it can also return the server error dict when available.
142
+ - `reset()` performs a server-side reset, while `finish()` clears only local state.
143
+ - Exposed state: `tracking_object_ids`, `current_frame_masks`, `invisible_object_ids`.
144
+ - Backward-compat alias: `NrmkRealtimeSegmentation`.
145
+
146
+ #### Segmentation v0.2 config summary (defaults/choices)
147
+ `seg.get_capabilities()` can differ depending on server configuration. The following reflects v0.2 defaults.
148
+
138
149
  ```yaml
139
150
  grounding_dino:
140
151
  backbone:
@@ -187,70 +198,82 @@ sam2:
187
198
  default: false
188
199
  ```
189
200
 
190
- #### Segmentation v0.2 주의사항/변경사항
191
- - SAM2 VRAM 추정 실패 시 `seg.get_next()`가 `{"result":"ERROR"}`로 반환될 수 있으니 에러 처리 후 `reset`/재등록을 권장합니다.
192
- - SAM2 `compile=True`는 프레임 등록 `reset`이 느려질 수 있습니다.
193
- - SAM2 CPU offloading은 `offload_state_to_cpu=True`와 `offload_video_to_cpu=True`를 함께 설정할 효과가 큽니다(legacy 모드에서는 `offload_video_to_cpu` 미지원).
194
- - SAM2 `remove_object`는 `use_legacy=False`에서만 지원됩니다.
195
- - GroundingDINO는 Swin-B 백본이 추가되었고, 프롬프트 토큰 병합 이슈가 수정되었습니다.
201
+ #### Segmentation v0.2 notes and changes
202
+
203
+ - If SAM2 VRAM estimation fails, `seg.get_next()` may return `{"result":"ERROR"}`. Handle the error and call `reset` before re-registering.
204
+ - `compile=True` can slow down first-frame registration and `reset`.
205
+ - CPU offloading is most effective when both `offload_state_to_cpu=True` and `offload_video_to_cpu=True` are set (legacy mode does not support `offload_video_to_cpu`).
206
+ - `remove_object` is supported only when `use_legacy=False`.
207
+ - GroundingDINO added the Swin-B backbone and fixed prompt-token merge issues.
196
208
 
197
209
  ### Pose Estimation
198
210
 
199
- **Mesh 파일 업로드**: 등록/인식하고자 하는 mesh 파일 (stl) 호스트PC의 '/opt/meshes/' 경로에 업로드 (직접 SSH 통해 파일을 옮겨도 됨)
211
+ **Mesh upload**: Upload the mesh file (STL) to `/opt/meshes/` on the host PC. You can also use SSH directly.
212
+
200
213
  ```python
201
214
  from neuromeka_vfm import upload_mesh
215
+
202
216
  upload_mesh(
203
217
  host="192.168.10.63",
204
218
  user="user",
205
- password="pass",
206
- local="mesh/my_mesh.stl", # PC mesh 경로
207
- remote="/opt/meshes/my_mesh.stl", # 호스트PC mesh 경로 (도커 볼륨마운트)
219
+ password="pass",
220
+ local="mesh/my_mesh.stl", # local mesh path
221
+ remote="/opt/meshes/my_mesh.stl", # host mesh path (Docker volume)
208
222
  )
209
223
  ```
210
224
 
211
- 초기화
225
+ Initialization
226
+
212
227
  ```python
213
228
  from neuromeka_vfm import PoseEstimation
214
229
 
215
- pose = PoseEstimation(host="192.168.10.72", port=5557)
230
+ pose = PoseEstimation(host="192.168.10.72", port=5557)
216
231
 
217
232
  pose.init(
218
233
  mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
219
- apply_scale=1.0,
234
+ apply_scale=1.0,
220
235
  track_refine_iter=3,
221
236
  min_n_views=40,
222
- inplane_step=60
237
+ inplane_step=60,
223
238
  )
224
239
  ```
225
- - mesh_path: 사용할 물체 메시 파일(STL/OBJ 등) 경로. 없으면 초기화 실패.
226
- - apply_scale: 메시를 로드한 전체를 배율 조정하는 스케일 값. 단위 없는 곱셈 계수.
227
- - STL 모델이 미터 단위라면 1.0 (스케일 없음)
228
- - STL 모델이 센티미터 단위라면 0.01 (1 cm → 0.01 m)
229
- - STL 모델이 밀리미터 단위라면 0.001 (1 mm 0.001 m)
230
- - force_apply_color: True일 메시에 단색 텍스처를 강제로 입힘. 메시가 색상을 안 가졌을 때 시각화 안정성을 위해 사용.
231
- - apply_color: force_apply_color가 True 적용할 RGB 색상값(0~255) 튜플.
232
- - est_refine_iter: 초기 등록(register) 단계에서 포즈를 반복 정련하는 횟수. 값이 클수록 정확도 ↑, 연산 시간 ↑.
233
- - track_refine_iter: 추적(track) 단계에서 프레임당 포즈 정련 반복 횟수.
234
- - min_n_views: 초기 샘플링 생성할 최소 카메라 뷰 수(회전 후보 수에 영향).
235
- - inplane_step: in-plane 회전 샘플링 간격(도 단위). 값이 작을수록 많은 회전 후보를 생성.
236
-
237
-
238
- 인식 추적
240
+
241
+ - mesh_path: path to the mesh file (STL/OBJ). Initialization fails if missing.
242
+ - apply_scale: scalar applied after loading the mesh.
243
+ - STL in meters: 1.0 (no scaling)
244
+ - STL in centimeters: 0.01 (1 cm -> 0.01 m)
245
+ - STL in millimeters: 0.001 (1 mm -> 0.001 m)
246
+ - force_apply_color: if True, forces a solid color when the mesh lacks color data.
247
+ - apply_color: RGB tuple (0-255) used when `force_apply_color=True`.
248
+ - est_refine_iter: number of refinement iterations during registration (higher = more accurate, slower).
249
+ - track_refine_iter: number of refinement iterations per frame during tracking.
250
+ - min_n_views: minimum number of sampled camera views (affects rotation candidates).
251
+ - inplane_step: in-plane rotation step in degrees (smaller = more candidates).
252
+
253
+ Registration and tracking
254
+
239
255
  ```python
240
- # 초기 등록 (iteration 생략 서버 기본값, check_vram=True VRAM 사전 체크)
256
+ # Registration (server defaults when iteration is omitted, check_vram=True pre-checks VRAM)
241
257
  register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
242
258
 
243
- # 추적 (bbox_xywh로 탐색 범위 제한 가능)
259
+ # Tracking (optionally limit search area with bbox_xywh)
244
260
  track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
261
+
245
262
  pose.close()
246
263
  ```
247
- - cam_K: camera intrinsic
248
- - RGB resolution이 크거나, min_n_views 값이 크거나, inplane_step이 작을 경우 GPU VRAM 초과 에러 발생.
249
- - register check_vram=True 일 경우 VRAM 초과 사전 검사하여 shutdown 방지.
250
264
 
265
+ - cam_K: camera intrinsics.
266
+ - Large RGB resolution, large `min_n_views`, or small `inplane_step` can cause GPU VRAM errors.
267
+ - `check_vram=True` in `register` performs a pre-check to prevent server shutdown due to OOM.
268
+ - `iteration` in `register`/`track` can override the server default if provided.
269
+ - `reset()` resets the server state; `reset_object()` reuses the cached mesh to rebuild the rotation grid.
270
+ - Default host/port can come from `FPOSE_HOST` and `FPOSE_PORT` environment variables.
271
+ - Backward-compat alias: `FoundationPoseClient`.
272
+
273
+ <!--
274
+ ## Benchmark
251
275
 
252
- ## VFM (Vision Foundation Model) latency benchmark
253
- 로컬 서버 구동 시 측정. 빈칸은 아직 미측정 항목입니다.
276
+ Measured on local servers. Empty cells are not yet measured.
254
277
 
255
278
  **RTX 5060**
256
279
  | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
@@ -269,17 +292,10 @@ pose.close()
269
292
  | SAM2 | - | | | | |
270
293
  | FoundationPose registration | - | 0.4 | - | | |
271
294
  | FoundationPose track | - | 0.03 | | | |
295
+ -->
272
296
 
273
- **Jetson Orin**
274
- | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
275
- | --- | --- | --- | --- | --- | --- |
276
- | Grounding DINO | text (human . cup .) | | | | |
277
- | DINOv2 | image prompt | | | | |
278
- | SAM2 | - | | | | |
279
- | FoundationPose registration | - | 0.4 | - | | |
280
- | FoundationPose track | - | 0.03 | | | |
297
+ ## Release notes
281
298
 
282
- ## 릴리스 노트
283
- - 0.1.2: Segmentation 응답 성공 판정 개선(`result`/`success`/`status` 모두 지원), image prompt 등록/사용 오류 수정, PoseEstimation `register`에 `check_vram` 옵션 반영.
284
- - 0.1.1: PoseEstimation/Segmentation에서 리소스 정리 개선, iteration 미전달 서버 기본값 사용, pose 데모 예제 추가.
285
- - 0.1.0: 초기 공개 버전. FoundationPose RPC 클라이언트, 실시간 세그멘테이션 클라이언트, SSH 기반 mesh 업로드 CLI/API 포함.
299
+ - 0.1.2: Improved success detection for Segmentation responses (`result`/`success`/`status`), fixed image prompt registration/usage, added `check_vram` to PoseEstimation `register`.
300
+ - 0.1.1: Improved resource cleanup in PoseEstimation/Segmentation, use server defaults when iteration is omitted, added pose demo example.
301
+ - 0.1.0: Initial public release. Includes FoundationPose RPC client, real-time segmentation client, SSH-based mesh upload CLI/API.
@@ -3,7 +3,7 @@ README.md
3
3
  pyproject.toml
4
4
  src/neuromeka_vfm/__init__.py
5
5
  src/neuromeka_vfm/compression.py
6
- src/neuromeka_vfm/grasp_gen.py
6
+ src/neuromeka_vfm/generate_mesh.py
7
7
  src/neuromeka_vfm/pickle_client.py
8
8
  src/neuromeka_vfm/point_cloud_utils.py
9
9
  src/neuromeka_vfm/pose_estimation.py
@@ -1,238 +0,0 @@
1
- # neuromeka_vfm
2
-
3
- 클라이언트 PC에서 Segmentation (SAM2, Grounding DINO), Pose Estimation (NVIDIA FoundationPose) 서버(RPC, ZeroMQ)와 통신하고, SSH/SFTP로 호스트에 mesh를 업로드하는 간단한 유틸 패키지입니다.
4
-
5
- - Website: http://www.neuromeka.com
6
- - Source code: https://github.com/neuromeka-robotics/neuromeka_vfm
7
- - PyPI package: https://pypi.org/project/neuromeka_vfm/
8
- - Documents: https://docs.neuromeka.com
9
-
10
- ## Web UI (VFM Tester)를 통해 사용 가능
11
-
12
- - VFM Tester (Web UI): https://gitlab.com/neuromeka-group/nrmkq/nrmk_vfm_tester
13
-
14
-
15
- ## Installation
16
- ```bash
17
- pip install neuromeka_vfm
18
- ```
19
-
20
- ## Python API (예제로 보는 사용법)
21
-
22
- * 내 PC: 어플리케이션을 구현하고 이 패키지 (neuromeka_vfm)이 설치된 PC
23
- * 서버PC (Host): Segmentation, Pose Estimation 도커 서버가 설치된 PC. 내 PC에 도커를 설치할 경우 localhost 사용.
24
-
25
- ### Segmentation
26
- ```python
27
- from neuromeka_vfm import Segmentation
28
-
29
- seg = Segmentation(
30
- hostname="192.168.10.63",
31
- port=5432,
32
- compression_strategy="png", # none | png | jpeg | h264
33
- )
34
-
35
- # Image Prompt를 이용한 등록
36
- seg.add_image_prompt("drug_box", ref_rgb)
37
- seg.register_first_frame(frame=first_rgb,
38
- prompt="drug_box", # ID str
39
- use_image_prompt=True)
40
-
41
- # Text Prompt를 이용한 등록
42
- seg.register_first_frame(frame=first_rgb,
43
- prompt="box .", # Text prompt (끝에 띄어쓰기 . 필수)
44
- use_image_prompt=False)
45
-
46
- # 등록된 mask에 대한 SAM2 tracking
47
- resp = seg.get_next(next_rgb)
48
- if isinstance(resp, dict) and resp.get("result") == "ERROR":
49
- print(f"Tracking error: {resp.get('message')}")
50
- seg.reset()
51
- else:
52
- masks = resp
53
-
54
- # Segmentation 설정/모델 선택 (nrmk_realtime_segmentation v0.2+)
55
- caps = seg.get_capabilities()["data"]
56
- current = seg.get_config()["data"]
57
- seg.set_config(
58
- {
59
- "grounding_dino": {
60
- "backbone": "Swin-B", # Swin-T | Swin-B
61
- "box_threshold": 0.35,
62
- "text_threshold": 0.25,
63
- },
64
- "dino_detection": {
65
- "threshold": 0.5,
66
- "target_multiplier": 25,
67
- "img_multiplier": 50,
68
- "background_threshold": -1.0,
69
- "final_erosion_count": 10,
70
- "segment_min_size": 20,
71
- },
72
- "sam2": {
73
- "model": "facebook/sam2.1-hiera-large",
74
- "use_legacy": False,
75
- "compile": False,
76
- "offload_state_to_cpu": False,
77
- "offload_video_to_cpu": False,
78
- },
79
- }
80
- )
81
-
82
- # SAM2 object 제거 (v0.2+, use_legacy=False에서만 지원)
83
- seg.remove_object("cup_0")
84
-
85
-
86
- seg.close()
87
- ```
88
-
89
- #### Segmentation v0.2 설정 요약 (defaults/choices)
90
- `seg.get_capabilities()` 결과는 서버 설정에 따라 달라질 수 있습니다. 아래는 v0.2 기본값입니다.
91
- ```yaml
92
- grounding_dino:
93
- backbone:
94
- choices:
95
- - Swin-B
96
- - Swin-T
97
- default: Swin-T
98
- box_threshold:
99
- default: 0.35
100
- min: 0.0
101
- max: 1.0
102
- text_threshold:
103
- default: 0.25
104
- min: 0.0
105
- max: 1.0
106
-
107
- dino_detection:
108
- threshold:
109
- default: 0.5
110
- target_multiplier:
111
- default: 25
112
- img_multiplier:
113
- default: 50
114
- background_threshold:
115
- default: -1.0
116
- final_erosion_count:
117
- default: 10
118
- segment_min_size:
119
- default: 20
120
-
121
- sam2:
122
- model:
123
- choices:
124
- - facebook/sam2-hiera-base-plus
125
- - facebook/sam2-hiera-large
126
- - facebook/sam2-hiera-small
127
- - facebook/sam2-hiera-tiny
128
- - facebook/sam2.1-hiera-base-plus
129
- - facebook/sam2.1-hiera-large
130
- - facebook/sam2.1-hiera-small
131
- - facebook/sam2.1-hiera-tiny
132
- default: facebook/sam2.1-hiera-large
133
- use_legacy:
134
- default: false
135
- compile:
136
- default: false
137
- offload_state_to_cpu:
138
- default: false
139
- offload_video_to_cpu:
140
- default: false
141
- ```
142
-
143
- #### Segmentation v0.2 주의사항/변경사항
144
- - SAM2 VRAM 추정 실패 시 `seg.get_next()`가 `{"result":"ERROR"}`로 반환될 수 있으니 에러 처리 후 `reset`/재등록을 권장합니다.
145
- - SAM2 `compile=True`는 첫 프레임 등록 및 `reset`이 느려질 수 있습니다.
146
- - SAM2 CPU offloading은 `offload_state_to_cpu=True`와 `offload_video_to_cpu=True`를 함께 설정할 때 효과가 큽니다(legacy 모드에서는 `offload_video_to_cpu` 미지원).
147
- - SAM2 `remove_object`는 `use_legacy=False`에서만 지원됩니다.
148
- - GroundingDINO는 Swin-B 백본이 추가되었고, 프롬프트 토큰 병합 이슈가 수정되었습니다.
149
-
150
- ### Pose Estimation
151
-
152
- **Mesh 파일 업로드**: 등록/인식하고자 하는 mesh 파일 (stl)을 호스트PC의 '/opt/meshes/' 경로에 업로드 (직접 SSH 통해 파일을 옮겨도 됨)
153
- ```python
154
- from neuromeka_vfm import upload_mesh
155
- upload_mesh(
156
- host="192.168.10.63",
157
- user="user",
158
- password="pass",
159
- local="mesh/my_mesh.stl", # 내 PC mesh 경로
160
- remote="/opt/meshes/my_mesh.stl", # 호스트PC mesh 경로 (도커 볼륨마운트)
161
- )
162
- ```
163
-
164
- 초기화
165
- ```python
166
- from neuromeka_vfm import PoseEstimation
167
-
168
- pose = PoseEstimation(host="192.168.10.72", port=5557)
169
-
170
- pose.init(
171
- mesh_path="/app/modules/foundation_pose/mesh/my_mesh.stl",
172
- apply_scale=1.0,
173
- track_refine_iter=3,
174
- min_n_views=40,
175
- inplane_step=60
176
- )
177
- ```
178
- - mesh_path: 사용할 물체 메시 파일(STL/OBJ 등) 경로. 없으면 초기화 실패.
179
- - apply_scale: 메시를 로드한 뒤 전체를 배율 조정하는 스케일 값. 단위 없는 곱셈 계수.
180
- - STL 모델이 미터 단위라면 1.0 (스케일 없음)
181
- - STL 모델이 센티미터 단위라면 0.01 (1 cm → 0.01 m)
182
- - STL 모델이 밀리미터 단위라면 0.001 (1 mm → 0.001 m)
183
- - force_apply_color: True일 때 메시에 단색 텍스처를 강제로 입힘. 메시가 색상을 안 가졌을 때 시각화 안정성을 위해 사용.
184
- - apply_color: force_apply_color가 True일 때 적용할 RGB 색상값(0~255) 튜플.
185
- - est_refine_iter: 초기 등록(register) 단계에서 포즈를 반복 정련하는 횟수. 값이 클수록 정확도 ↑, 연산 시간 ↑.
186
- - track_refine_iter: 추적(track) 단계에서 한 프레임당 포즈 정련 반복 횟수.
187
- - min_n_views: 초기 뷰 샘플링 시 생성할 최소 카메라 뷰 수(회전 후보 수에 영향).
188
- - inplane_step: in-plane 회전 샘플링 간격(도 단위). 값이 작을수록 더 많은 회전 후보를 생성.
189
-
190
-
191
- 인식 및 추적
192
- ```python
193
- # 초기 등록 (iteration 생략 시 서버 기본값, check_vram=True로 VRAM 사전 체크)
194
- register_resp = pose.register(rgb=rgb0, depth=depth0, mask=mask0, K=cam_K, check_vram=True)
195
-
196
- # 추적 (bbox_xywh로 탐색 범위 제한 가능)
197
- track_resp = pose.track(rgb=rgb1, depth=depth1, K=cam_K, bbox_xywh=bbox_xywh)
198
- pose.close()
199
- ```
200
- - cam_K: camera intrinsic
201
- - RGB resolution이 크거나, min_n_views 값이 크거나, inplane_step이 작을 경우 GPU VRAM 초과 에러 발생.
202
- - register check_vram=True 일 경우 VRAM 초과 사전 검사하여 shutdown 방지.
203
-
204
-
205
- ## VFM (Vision Foundation Model) latency benchmark
206
- 로컬 서버 구동 시 측정. 빈칸은 아직 미측정 항목입니다.
207
-
208
- **RTX 5060**
209
- | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
210
- | --- | --- | --- | --- | --- | --- |
211
- | Grounding DINO | text (human . cup .) | 0.86 | 0.35 | 0.50 | 0.52 |
212
- | DINOv2 | image prompt | 0.85 | 0.49 | 0.65 | 0.63 |
213
- | SAM2 | - | | | | |
214
- | FoundationPose registration | - | | | | |
215
- | FoundationPose track | - | | | | |
216
-
217
- **RTX 5090**
218
- | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
219
- | --- | --- | --- | --- | --- | --- |
220
- | Grounding DINO | text (human . cup .) | | | | |
221
- | DINOv2 | image prompt | | | | |
222
- | SAM2 | - | | | | |
223
- | FoundationPose registration | - | 0.4 | - | | |
224
- | FoundationPose track | - | 0.03 | | | |
225
-
226
- **Jetson Orin**
227
- | Task | Prompt | None (s) | JPEG (s) | PNG (s) | h264 (s) |
228
- | --- | --- | --- | --- | --- | --- |
229
- | Grounding DINO | text (human . cup .) | | | | |
230
- | DINOv2 | image prompt | | | | |
231
- | SAM2 | - | | | | |
232
- | FoundationPose registration | - | 0.4 | - | | |
233
- | FoundationPose track | - | 0.03 | | | |
234
-
235
- ## 릴리스 노트
236
- - 0.1.2: Segmentation 응답 성공 판정 개선(`result`/`success`/`status` 모두 지원), image prompt 등록/사용 오류 수정, PoseEstimation `register`에 `check_vram` 옵션 반영.
237
- - 0.1.1: PoseEstimation/Segmentation에서 리소스 정리 개선, iteration 미전달 시 서버 기본값 사용, pose 데모 예제 추가.
238
- - 0.1.0: 초기 공개 버전. FoundationPose RPC 클라이언트, 실시간 세그멘테이션 클라이언트, SSH 기반 mesh 업로드 CLI/API 포함.
@@ -1,79 +0,0 @@
1
- from typing import Tuple
2
-
3
- import numpy as np
4
- import trimesh
5
-
6
- from . import point_cloud_utils
7
-
8
-
9
- class GraspPoseGeneration:
10
- """
11
- Wrapper class for point cloud utilities used in grasp pose workflows.
12
- """
13
-
14
- def knn_points(self, X: np.ndarray, K: int, norm: int):
15
- return point_cloud_utils.knn_points(X=X, K=K, norm=norm)
16
-
17
- def point_cloud_outlier_removal(
18
- self, obj_pc: np.ndarray, threshold: float = 0.014, K: int = 20
19
- ) -> Tuple[np.ndarray, np.ndarray]:
20
- return point_cloud_utils.point_cloud_outlier_removal(
21
- obj_pc=obj_pc, threshold=threshold, K=K
22
- )
23
-
24
- def point_cloud_outlier_removal_with_color(
25
- self,
26
- obj_pc: np.ndarray,
27
- obj_pc_color: np.ndarray,
28
- threshold: float = 0.014,
29
- K: int = 20,
30
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
31
- return point_cloud_utils.point_cloud_outlier_removal_with_color(
32
- obj_pc=obj_pc,
33
- obj_pc_color=obj_pc_color,
34
- threshold=threshold,
35
- K=K,
36
- )
37
-
38
- def depth_and_segmentation_to_point_clouds(
39
- self,
40
- depth_image: np.ndarray,
41
- segmentation_mask: np.ndarray,
42
- fx: float,
43
- fy: float,
44
- cx: float,
45
- cy: float,
46
- rgb_image: np.ndarray = None,
47
- target_object_id: int = 1,
48
- remove_object_from_scene: bool = False,
49
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
50
- return point_cloud_utils.depth_and_segmentation_to_point_clouds(
51
- depth_image=depth_image,
52
- segmentation_mask=segmentation_mask,
53
- fx=fx,
54
- fy=fy,
55
- cx=cx,
56
- cy=cy,
57
- rgb_image=rgb_image,
58
- target_object_id=target_object_id,
59
- remove_object_from_scene=remove_object_from_scene,
60
- )
61
-
62
- def filter_colliding_grasps(
63
- self,
64
- scene_pc: np.ndarray,
65
- grasp_poses: np.ndarray,
66
- gripper_collision_mesh: trimesh.Trimesh,
67
- collision_threshold: float = 0.002,
68
- num_collision_samples: int = 2000,
69
- ) -> np.ndarray:
70
- return point_cloud_utils.filter_colliding_grasps(
71
- scene_pc=scene_pc,
72
- grasp_poses=grasp_poses,
73
- gripper_collision_mesh=gripper_collision_mesh,
74
- collision_threshold=collision_threshold,
75
- num_collision_samples=num_collision_samples,
76
- )
77
-
78
-
79
- __all__ = ["GraspPoseGeneration"]
File without changes
File without changes