neuromeka-vfm 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {neuromeka_vfm-0.1.4/src/neuromeka_vfm.egg-info → neuromeka_vfm-0.1.5}/PKG-INFO +101 -2
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/README.md +98 -1
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/pyproject.toml +4 -2
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/__init__.py +2 -0
- neuromeka_vfm-0.1.5/src/neuromeka_vfm/grasp_gen.py +79 -0
- neuromeka_vfm-0.1.5/src/neuromeka_vfm/point_cloud_utils.py +377 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/segmentation.py +54 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5/src/neuromeka_vfm.egg-info}/PKG-INFO +101 -2
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm.egg-info/SOURCES.txt +2 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm.egg-info/requires.txt +2 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/LICENSE +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/setup.cfg +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/compression.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/examples/__init__.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/examples/pose_demo.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/pickle_client.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/pose_estimation.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm/upload_mesh.py +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm.egg-info/dependency_links.txt +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm.egg-info/entry_points.txt +0 -0
- {neuromeka_vfm-0.1.4 → neuromeka_vfm-0.1.5}/src/neuromeka_vfm.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: neuromeka_vfm
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)
|
|
5
5
|
Author: Neuromeka
|
|
6
6
|
License: MIT License
|
|
@@ -41,6 +41,8 @@ Requires-Dist: numpy
|
|
|
41
41
|
Requires-Dist: pyzmq
|
|
42
42
|
Requires-Dist: paramiko
|
|
43
43
|
Requires-Dist: av
|
|
44
|
+
Requires-Dist: trimesh
|
|
45
|
+
Requires-Dist: tqdm
|
|
44
46
|
Dynamic: license-file
|
|
45
47
|
|
|
46
48
|
# neuromeka_vfm
|
|
@@ -89,12 +91,109 @@ seg.register_first_frame(frame=first_rgb,
|
|
|
89
91
|
use_image_prompt=False)
|
|
90
92
|
|
|
91
93
|
# 등록된 mask에 대한 SAM2 tracking
|
|
92
|
-
|
|
94
|
+
resp = seg.get_next(next_rgb)
|
|
95
|
+
if isinstance(resp, dict) and resp.get("result") == "ERROR":
|
|
96
|
+
print(f"Tracking error: {resp.get('message')}")
|
|
97
|
+
seg.reset()
|
|
98
|
+
else:
|
|
99
|
+
masks = resp
|
|
100
|
+
|
|
101
|
+
# Segmentation 설정/모델 선택 (nrmk_realtime_segmentation v0.2+)
|
|
102
|
+
caps = seg.get_capabilities()["data"]
|
|
103
|
+
current = seg.get_config()["data"]
|
|
104
|
+
seg.set_config(
|
|
105
|
+
{
|
|
106
|
+
"grounding_dino": {
|
|
107
|
+
"backbone": "Swin-B", # Swin-T | Swin-B
|
|
108
|
+
"box_threshold": 0.35,
|
|
109
|
+
"text_threshold": 0.25,
|
|
110
|
+
},
|
|
111
|
+
"dino_detection": {
|
|
112
|
+
"threshold": 0.5,
|
|
113
|
+
"target_multiplier": 25,
|
|
114
|
+
"img_multiplier": 50,
|
|
115
|
+
"background_threshold": -1.0,
|
|
116
|
+
"final_erosion_count": 10,
|
|
117
|
+
"segment_min_size": 20,
|
|
118
|
+
},
|
|
119
|
+
"sam2": {
|
|
120
|
+
"model": "facebook/sam2.1-hiera-large",
|
|
121
|
+
"use_legacy": False,
|
|
122
|
+
"compile": False,
|
|
123
|
+
"offload_state_to_cpu": False,
|
|
124
|
+
"offload_video_to_cpu": False,
|
|
125
|
+
},
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# SAM2 object 제거 (v0.2+, use_legacy=False에서만 지원)
|
|
130
|
+
seg.remove_object("cup_0")
|
|
93
131
|
|
|
94
132
|
|
|
95
133
|
seg.close()
|
|
96
134
|
```
|
|
97
135
|
|
|
136
|
+
#### Segmentation v0.2 설정 요약 (defaults/choices)
|
|
137
|
+
`seg.get_capabilities()` 결과는 서버 설정에 따라 달라질 수 있습니다. 아래는 v0.2 기본값입니다.
|
|
138
|
+
```yaml
|
|
139
|
+
grounding_dino:
|
|
140
|
+
backbone:
|
|
141
|
+
choices:
|
|
142
|
+
- Swin-B
|
|
143
|
+
- Swin-T
|
|
144
|
+
default: Swin-T
|
|
145
|
+
box_threshold:
|
|
146
|
+
default: 0.35
|
|
147
|
+
min: 0.0
|
|
148
|
+
max: 1.0
|
|
149
|
+
text_threshold:
|
|
150
|
+
default: 0.25
|
|
151
|
+
min: 0.0
|
|
152
|
+
max: 1.0
|
|
153
|
+
|
|
154
|
+
dino_detection:
|
|
155
|
+
threshold:
|
|
156
|
+
default: 0.5
|
|
157
|
+
target_multiplier:
|
|
158
|
+
default: 25
|
|
159
|
+
img_multiplier:
|
|
160
|
+
default: 50
|
|
161
|
+
background_threshold:
|
|
162
|
+
default: -1.0
|
|
163
|
+
final_erosion_count:
|
|
164
|
+
default: 10
|
|
165
|
+
segment_min_size:
|
|
166
|
+
default: 20
|
|
167
|
+
|
|
168
|
+
sam2:
|
|
169
|
+
model:
|
|
170
|
+
choices:
|
|
171
|
+
- facebook/sam2-hiera-base-plus
|
|
172
|
+
- facebook/sam2-hiera-large
|
|
173
|
+
- facebook/sam2-hiera-small
|
|
174
|
+
- facebook/sam2-hiera-tiny
|
|
175
|
+
- facebook/sam2.1-hiera-base-plus
|
|
176
|
+
- facebook/sam2.1-hiera-large
|
|
177
|
+
- facebook/sam2.1-hiera-small
|
|
178
|
+
- facebook/sam2.1-hiera-tiny
|
|
179
|
+
default: facebook/sam2.1-hiera-large
|
|
180
|
+
use_legacy:
|
|
181
|
+
default: false
|
|
182
|
+
compile:
|
|
183
|
+
default: false
|
|
184
|
+
offload_state_to_cpu:
|
|
185
|
+
default: false
|
|
186
|
+
offload_video_to_cpu:
|
|
187
|
+
default: false
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
#### Segmentation v0.2 주의사항/변경사항
|
|
191
|
+
- SAM2 VRAM 추정 실패 시 `seg.get_next()`가 `{"result":"ERROR"}`로 반환될 수 있으니 에러 처리 후 `reset`/재등록을 권장합니다.
|
|
192
|
+
- SAM2 `compile=True`는 첫 프레임 등록 및 `reset`이 느려질 수 있습니다.
|
|
193
|
+
- SAM2 CPU offloading은 `offload_state_to_cpu=True`와 `offload_video_to_cpu=True`를 함께 설정할 때 효과가 큽니다(legacy 모드에서는 `offload_video_to_cpu` 미지원).
|
|
194
|
+
- SAM2 `remove_object`는 `use_legacy=False`에서만 지원됩니다.
|
|
195
|
+
- GroundingDINO는 Swin-B 백본이 추가되었고, 프롬프트 토큰 병합 이슈가 수정되었습니다.
|
|
196
|
+
|
|
98
197
|
### Pose Estimation
|
|
99
198
|
|
|
100
199
|
**Mesh 파일 업로드**: 등록/인식하고자 하는 mesh 파일 (stl)을 호스트PC의 '/opt/meshes/' 경로에 업로드 (직접 SSH 통해 파일을 옮겨도 됨)
|
|
@@ -44,12 +44,109 @@ seg.register_first_frame(frame=first_rgb,
|
|
|
44
44
|
use_image_prompt=False)
|
|
45
45
|
|
|
46
46
|
# 등록된 mask에 대한 SAM2 tracking
|
|
47
|
-
|
|
47
|
+
resp = seg.get_next(next_rgb)
|
|
48
|
+
if isinstance(resp, dict) and resp.get("result") == "ERROR":
|
|
49
|
+
print(f"Tracking error: {resp.get('message')}")
|
|
50
|
+
seg.reset()
|
|
51
|
+
else:
|
|
52
|
+
masks = resp
|
|
53
|
+
|
|
54
|
+
# Segmentation 설정/모델 선택 (nrmk_realtime_segmentation v0.2+)
|
|
55
|
+
caps = seg.get_capabilities()["data"]
|
|
56
|
+
current = seg.get_config()["data"]
|
|
57
|
+
seg.set_config(
|
|
58
|
+
{
|
|
59
|
+
"grounding_dino": {
|
|
60
|
+
"backbone": "Swin-B", # Swin-T | Swin-B
|
|
61
|
+
"box_threshold": 0.35,
|
|
62
|
+
"text_threshold": 0.25,
|
|
63
|
+
},
|
|
64
|
+
"dino_detection": {
|
|
65
|
+
"threshold": 0.5,
|
|
66
|
+
"target_multiplier": 25,
|
|
67
|
+
"img_multiplier": 50,
|
|
68
|
+
"background_threshold": -1.0,
|
|
69
|
+
"final_erosion_count": 10,
|
|
70
|
+
"segment_min_size": 20,
|
|
71
|
+
},
|
|
72
|
+
"sam2": {
|
|
73
|
+
"model": "facebook/sam2.1-hiera-large",
|
|
74
|
+
"use_legacy": False,
|
|
75
|
+
"compile": False,
|
|
76
|
+
"offload_state_to_cpu": False,
|
|
77
|
+
"offload_video_to_cpu": False,
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# SAM2 object 제거 (v0.2+, use_legacy=False에서만 지원)
|
|
83
|
+
seg.remove_object("cup_0")
|
|
48
84
|
|
|
49
85
|
|
|
50
86
|
seg.close()
|
|
51
87
|
```
|
|
52
88
|
|
|
89
|
+
#### Segmentation v0.2 설정 요약 (defaults/choices)
|
|
90
|
+
`seg.get_capabilities()` 결과는 서버 설정에 따라 달라질 수 있습니다. 아래는 v0.2 기본값입니다.
|
|
91
|
+
```yaml
|
|
92
|
+
grounding_dino:
|
|
93
|
+
backbone:
|
|
94
|
+
choices:
|
|
95
|
+
- Swin-B
|
|
96
|
+
- Swin-T
|
|
97
|
+
default: Swin-T
|
|
98
|
+
box_threshold:
|
|
99
|
+
default: 0.35
|
|
100
|
+
min: 0.0
|
|
101
|
+
max: 1.0
|
|
102
|
+
text_threshold:
|
|
103
|
+
default: 0.25
|
|
104
|
+
min: 0.0
|
|
105
|
+
max: 1.0
|
|
106
|
+
|
|
107
|
+
dino_detection:
|
|
108
|
+
threshold:
|
|
109
|
+
default: 0.5
|
|
110
|
+
target_multiplier:
|
|
111
|
+
default: 25
|
|
112
|
+
img_multiplier:
|
|
113
|
+
default: 50
|
|
114
|
+
background_threshold:
|
|
115
|
+
default: -1.0
|
|
116
|
+
final_erosion_count:
|
|
117
|
+
default: 10
|
|
118
|
+
segment_min_size:
|
|
119
|
+
default: 20
|
|
120
|
+
|
|
121
|
+
sam2:
|
|
122
|
+
model:
|
|
123
|
+
choices:
|
|
124
|
+
- facebook/sam2-hiera-base-plus
|
|
125
|
+
- facebook/sam2-hiera-large
|
|
126
|
+
- facebook/sam2-hiera-small
|
|
127
|
+
- facebook/sam2-hiera-tiny
|
|
128
|
+
- facebook/sam2.1-hiera-base-plus
|
|
129
|
+
- facebook/sam2.1-hiera-large
|
|
130
|
+
- facebook/sam2.1-hiera-small
|
|
131
|
+
- facebook/sam2.1-hiera-tiny
|
|
132
|
+
default: facebook/sam2.1-hiera-large
|
|
133
|
+
use_legacy:
|
|
134
|
+
default: false
|
|
135
|
+
compile:
|
|
136
|
+
default: false
|
|
137
|
+
offload_state_to_cpu:
|
|
138
|
+
default: false
|
|
139
|
+
offload_video_to_cpu:
|
|
140
|
+
default: false
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
#### Segmentation v0.2 주의사항/변경사항
|
|
144
|
+
- SAM2 VRAM 추정 실패 시 `seg.get_next()`가 `{"result":"ERROR"}`로 반환될 수 있으니 에러 처리 후 `reset`/재등록을 권장합니다.
|
|
145
|
+
- SAM2 `compile=True`는 첫 프레임 등록 및 `reset`이 느려질 수 있습니다.
|
|
146
|
+
- SAM2 CPU offloading은 `offload_state_to_cpu=True`와 `offload_video_to_cpu=True`를 함께 설정할 때 효과가 큽니다(legacy 모드에서는 `offload_video_to_cpu` 미지원).
|
|
147
|
+
- SAM2 `remove_object`는 `use_legacy=False`에서만 지원됩니다.
|
|
148
|
+
- GroundingDINO는 Swin-B 백본이 추가되었고, 프롬프트 토큰 병합 이슈가 수정되었습니다.
|
|
149
|
+
|
|
53
150
|
### Pose Estimation
|
|
54
151
|
|
|
55
152
|
**Mesh 파일 업로드**: 등록/인식하고자 하는 mesh 파일 (stl)을 호스트PC의 '/opt/meshes/' 경로에 업로드 (직접 SSH 통해 파일을 옮겨도 됨)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "neuromeka_vfm"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.5"
|
|
8
8
|
description = "Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -13,7 +13,9 @@ dependencies = [
|
|
|
13
13
|
"numpy",
|
|
14
14
|
"pyzmq",
|
|
15
15
|
"paramiko",
|
|
16
|
-
"av",
|
|
16
|
+
"av",
|
|
17
|
+
"trimesh",
|
|
18
|
+
"tqdm",
|
|
17
19
|
]
|
|
18
20
|
authors = [{name = "Neuromeka"}]
|
|
19
21
|
classifiers = [
|
|
@@ -2,6 +2,7 @@ from .pose_estimation import PoseEstimation, FoundationPoseClient
|
|
|
2
2
|
from .upload_mesh import upload_mesh
|
|
3
3
|
from .segmentation import Segmentation, NrmkRealtimeSegmentation
|
|
4
4
|
from .compression import STRATEGIES as SEGMENTATION_COMPRESSION_STRATEGIES
|
|
5
|
+
from .grasp_gen import GraspPoseGeneration
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"PoseEstimation",
|
|
@@ -10,4 +11,5 @@ __all__ = [
|
|
|
10
11
|
"Segmentation",
|
|
11
12
|
"NrmkRealtimeSegmentation",
|
|
12
13
|
"SEGMENTATION_COMPRESSION_STRATEGIES",
|
|
14
|
+
"GraspPoseGeneration",
|
|
13
15
|
]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import trimesh
|
|
5
|
+
|
|
6
|
+
from . import point_cloud_utils
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GraspPoseGeneration:
|
|
10
|
+
"""
|
|
11
|
+
Wrapper class for point cloud utilities used in grasp pose workflows.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def knn_points(self, X: np.ndarray, K: int, norm: int):
|
|
15
|
+
return point_cloud_utils.knn_points(X=X, K=K, norm=norm)
|
|
16
|
+
|
|
17
|
+
def point_cloud_outlier_removal(
|
|
18
|
+
self, obj_pc: np.ndarray, threshold: float = 0.014, K: int = 20
|
|
19
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
20
|
+
return point_cloud_utils.point_cloud_outlier_removal(
|
|
21
|
+
obj_pc=obj_pc, threshold=threshold, K=K
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def point_cloud_outlier_removal_with_color(
|
|
25
|
+
self,
|
|
26
|
+
obj_pc: np.ndarray,
|
|
27
|
+
obj_pc_color: np.ndarray,
|
|
28
|
+
threshold: float = 0.014,
|
|
29
|
+
K: int = 20,
|
|
30
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
31
|
+
return point_cloud_utils.point_cloud_outlier_removal_with_color(
|
|
32
|
+
obj_pc=obj_pc,
|
|
33
|
+
obj_pc_color=obj_pc_color,
|
|
34
|
+
threshold=threshold,
|
|
35
|
+
K=K,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def depth_and_segmentation_to_point_clouds(
|
|
39
|
+
self,
|
|
40
|
+
depth_image: np.ndarray,
|
|
41
|
+
segmentation_mask: np.ndarray,
|
|
42
|
+
fx: float,
|
|
43
|
+
fy: float,
|
|
44
|
+
cx: float,
|
|
45
|
+
cy: float,
|
|
46
|
+
rgb_image: np.ndarray = None,
|
|
47
|
+
target_object_id: int = 1,
|
|
48
|
+
remove_object_from_scene: bool = False,
|
|
49
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
50
|
+
return point_cloud_utils.depth_and_segmentation_to_point_clouds(
|
|
51
|
+
depth_image=depth_image,
|
|
52
|
+
segmentation_mask=segmentation_mask,
|
|
53
|
+
fx=fx,
|
|
54
|
+
fy=fy,
|
|
55
|
+
cx=cx,
|
|
56
|
+
cy=cy,
|
|
57
|
+
rgb_image=rgb_image,
|
|
58
|
+
target_object_id=target_object_id,
|
|
59
|
+
remove_object_from_scene=remove_object_from_scene,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def filter_colliding_grasps(
|
|
63
|
+
self,
|
|
64
|
+
scene_pc: np.ndarray,
|
|
65
|
+
grasp_poses: np.ndarray,
|
|
66
|
+
gripper_collision_mesh: trimesh.Trimesh,
|
|
67
|
+
collision_threshold: float = 0.002,
|
|
68
|
+
num_collision_samples: int = 2000,
|
|
69
|
+
) -> np.ndarray:
|
|
70
|
+
return point_cloud_utils.filter_colliding_grasps(
|
|
71
|
+
scene_pc=scene_pc,
|
|
72
|
+
grasp_poses=grasp_poses,
|
|
73
|
+
gripper_collision_mesh=gripper_collision_mesh,
|
|
74
|
+
collision_threshold=collision_threshold,
|
|
75
|
+
num_collision_samples=num_collision_samples,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
__all__ = ["GraspPoseGeneration"]
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
4
|
+
# and proprietary rights in and to this software, related documentation
|
|
5
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
6
|
+
# distribution of this software and related documentation without an express
|
|
7
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Tuple, Dict
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import trimesh
|
|
14
|
+
import trimesh.transformations as tra
|
|
15
|
+
from tqdm import tqdm
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _pairwise_distances(X: np.ndarray, Y: np.ndarray, norm: int) -> np.ndarray:
|
|
21
|
+
if norm == 1:
|
|
22
|
+
return np.sum(np.abs(X[:, None, :] - Y[None, :, :]), axis=2)
|
|
23
|
+
if norm == 2:
|
|
24
|
+
diff = X[:, None, :] - Y[None, :, :]
|
|
25
|
+
return np.sqrt(np.sum(diff * diff, axis=2))
|
|
26
|
+
diff = X[:, None, :] - Y[None, :, :]
|
|
27
|
+
return np.linalg.norm(diff, ord=norm, axis=2)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def knn_points(X: np.ndarray, K: int, norm: int):
|
|
31
|
+
"""
|
|
32
|
+
Computes the K-nearest neighbors for each point in the point cloud X.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
X: (N, 3) array representing the point cloud.
|
|
36
|
+
K: Number of nearest neighbors.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
dists: (N, K) array containing distances to the K nearest neighbors.
|
|
40
|
+
idxs: (N, K) array containing indices of the K nearest neighbors.
|
|
41
|
+
"""
|
|
42
|
+
X = np.asarray(X, dtype=np.float32)
|
|
43
|
+
if X.ndim != 2 or X.shape[1] != 3:
|
|
44
|
+
raise ValueError("X must be a (N, 3) array")
|
|
45
|
+
if K <= 0:
|
|
46
|
+
raise ValueError("K must be positive")
|
|
47
|
+
N, _ = X.shape
|
|
48
|
+
if K >= N:
|
|
49
|
+
raise ValueError("K must be smaller than number of points")
|
|
50
|
+
|
|
51
|
+
dists_out = np.empty((N, K), dtype=np.float32)
|
|
52
|
+
idxs_out = np.empty((N, K), dtype=np.int64)
|
|
53
|
+
|
|
54
|
+
max_bytes = 64 * 1024 * 1024
|
|
55
|
+
bytes_per_row = N * X.dtype.itemsize
|
|
56
|
+
chunk_size = max(1, min(N, max_bytes // max(bytes_per_row, 1)))
|
|
57
|
+
|
|
58
|
+
for start in range(0, N, chunk_size):
|
|
59
|
+
end = min(start + chunk_size, N)
|
|
60
|
+
chunk = X[start:end]
|
|
61
|
+
dist_matrix = _pairwise_distances(chunk, X, norm=norm)
|
|
62
|
+
|
|
63
|
+
row_idx = np.arange(end - start)
|
|
64
|
+
col_idx = row_idx + start
|
|
65
|
+
dist_matrix[row_idx, col_idx] = np.inf
|
|
66
|
+
|
|
67
|
+
idx_part = np.argpartition(dist_matrix, K, axis=1)[:, :K]
|
|
68
|
+
dist_part = np.take_along_axis(dist_matrix, idx_part, axis=1)
|
|
69
|
+
order = np.argsort(dist_part, axis=1)
|
|
70
|
+
idxs = np.take_along_axis(idx_part, order, axis=1)
|
|
71
|
+
dists = np.take_along_axis(dist_part, order, axis=1)
|
|
72
|
+
|
|
73
|
+
dists_out[start:end] = dists
|
|
74
|
+
idxs_out[start:end] = idxs
|
|
75
|
+
|
|
76
|
+
return dists_out, idxs_out
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def point_cloud_outlier_removal(
|
|
80
|
+
obj_pc: np.ndarray, threshold: float = 0.014, K: int = 20
|
|
81
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
82
|
+
"""
|
|
83
|
+
Remove outliers from a point cloud. K-nearest neighbors is used to compute
|
|
84
|
+
the distance to the nearest neighbor for each point. If the distance is
|
|
85
|
+
greater than a threshold, the point is considered an outlier and removed.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
obj_pc (np.ndarray): (N, 3) array representing the point cloud.
|
|
89
|
+
threshold (float): Distance threshold for outlier detection. Points with mean distance to
|
|
90
|
+
K nearest neighbors greater than this threshold are removed.
|
|
91
|
+
K (int): Number of nearest neighbors to consider for outlier detection.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Tuple[np.ndarray, np.ndarray]: Tuple containing filtered and removed point clouds.
|
|
95
|
+
"""
|
|
96
|
+
obj_pc = np.asarray(obj_pc, dtype=np.float32)
|
|
97
|
+
if obj_pc.ndim != 2 or obj_pc.shape[1] != 3:
|
|
98
|
+
raise ValueError("obj_pc must be a (N, 3) array")
|
|
99
|
+
|
|
100
|
+
nn_dists, _ = knn_points(obj_pc, K=K, norm=1)
|
|
101
|
+
|
|
102
|
+
mask = nn_dists.mean(axis=1) < threshold
|
|
103
|
+
filtered_pc = obj_pc[mask]
|
|
104
|
+
removed_pc = obj_pc[~mask]
|
|
105
|
+
|
|
106
|
+
logger.info(
|
|
107
|
+
"Removed %s points from point cloud",
|
|
108
|
+
obj_pc.shape[0] - filtered_pc.shape[0],
|
|
109
|
+
)
|
|
110
|
+
return filtered_pc, removed_pc
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def point_cloud_outlier_removal_with_color(
|
|
114
|
+
obj_pc: np.ndarray,
|
|
115
|
+
obj_pc_color: np.ndarray,
|
|
116
|
+
threshold: float = 0.014,
|
|
117
|
+
K: int = 20,
|
|
118
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
119
|
+
"""
|
|
120
|
+
Remove outliers from a point cloud with colors.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
obj_pc (np.ndarray): (N, 3) array representing the point cloud.
|
|
124
|
+
obj_pc_color (np.ndarray): (N, 3) array representing the point cloud color.
|
|
125
|
+
threshold (float): Distance threshold for outlier detection. Points with mean distance to
|
|
126
|
+
K nearest neighbors greater than this threshold are removed.
|
|
127
|
+
K (int): Number of nearest neighbors to consider for outlier detection.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: Tuple containing filtered and
|
|
131
|
+
removed point clouds and colors.
|
|
132
|
+
"""
|
|
133
|
+
obj_pc = np.asarray(obj_pc, dtype=np.float32)
|
|
134
|
+
obj_pc_color = np.asarray(obj_pc_color, dtype=np.float32)
|
|
135
|
+
if obj_pc.ndim != 2 or obj_pc.shape[1] != 3:
|
|
136
|
+
raise ValueError("obj_pc must be a (N, 3) array")
|
|
137
|
+
if obj_pc_color.shape != obj_pc.shape:
|
|
138
|
+
raise ValueError("obj_pc_color must match obj_pc shape")
|
|
139
|
+
|
|
140
|
+
nn_dists, _ = knn_points(obj_pc, K=K, norm=1)
|
|
141
|
+
|
|
142
|
+
mask = nn_dists.mean(axis=1) < threshold
|
|
143
|
+
filtered_pc = obj_pc[mask]
|
|
144
|
+
removed_pc = obj_pc[~mask]
|
|
145
|
+
|
|
146
|
+
filtered_pc_color = obj_pc_color[mask]
|
|
147
|
+
removed_pc_color = obj_pc_color[~mask]
|
|
148
|
+
|
|
149
|
+
logger.info(
|
|
150
|
+
"Removed %s points from point cloud",
|
|
151
|
+
obj_pc.shape[0] - filtered_pc.shape[0],
|
|
152
|
+
)
|
|
153
|
+
return filtered_pc, removed_pc, filtered_pc_color, removed_pc_color
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def depth2points(
|
|
157
|
+
depth: np.array,
|
|
158
|
+
fx: int,
|
|
159
|
+
fy: int,
|
|
160
|
+
cx: int,
|
|
161
|
+
cy: int,
|
|
162
|
+
xmap: np.array = None,
|
|
163
|
+
ymap: np.array = None,
|
|
164
|
+
rgb: np.array = None,
|
|
165
|
+
seg: np.array = None,
|
|
166
|
+
mask: np.arange = None,
|
|
167
|
+
) -> Dict:
|
|
168
|
+
"""Compute point cloud from a depth image."""
|
|
169
|
+
if rgb is not None:
|
|
170
|
+
assert rgb.shape[0] == depth.shape[0] and rgb.shape[1] == depth.shape[1]
|
|
171
|
+
if xmap is not None:
|
|
172
|
+
assert xmap.shape[0] == depth.shape[0] and xmap.shape[1] == depth.shape[1]
|
|
173
|
+
if ymap is not None:
|
|
174
|
+
assert ymap.shape[0] == depth.shape[0] and ymap.shape[1] == depth.shape[1]
|
|
175
|
+
|
|
176
|
+
im_height, im_width = depth.shape[0], depth.shape[1]
|
|
177
|
+
|
|
178
|
+
if xmap is None or ymap is None:
|
|
179
|
+
ww = np.linspace(0, im_width - 1, im_width)
|
|
180
|
+
hh = np.linspace(0, im_height - 1, im_height)
|
|
181
|
+
xmap, ymap = np.meshgrid(ww, hh)
|
|
182
|
+
|
|
183
|
+
pt2 = depth
|
|
184
|
+
pt0 = (xmap - cx) * pt2 / fx
|
|
185
|
+
pt1 = (ymap - cy) * pt2 / fy
|
|
186
|
+
|
|
187
|
+
mask_depth = np.ma.getmaskarray(np.ma.masked_greater(pt2, 0))
|
|
188
|
+
if mask is None:
|
|
189
|
+
mask = mask_depth
|
|
190
|
+
else:
|
|
191
|
+
mask_semantic = np.ma.getmaskarray(np.ma.masked_equal(mask, 1))
|
|
192
|
+
mask = mask_depth * mask_semantic
|
|
193
|
+
|
|
194
|
+
index = mask.flatten().nonzero()[0]
|
|
195
|
+
|
|
196
|
+
pt2_valid = pt2.flatten()[:, np.newaxis].astype(np.float32)
|
|
197
|
+
pt0_valid = pt0.flatten()[:, np.newaxis].astype(np.float32)
|
|
198
|
+
pt1_valid = pt1.flatten()[:, np.newaxis].astype(np.float32)
|
|
199
|
+
pc_xyz = np.concatenate((pt0_valid, pt1_valid, pt2_valid), axis=1)
|
|
200
|
+
if rgb is not None:
|
|
201
|
+
r = rgb[:, :, 0].flatten()[:, np.newaxis]
|
|
202
|
+
g = rgb[:, :, 1].flatten()[:, np.newaxis]
|
|
203
|
+
b = rgb[:, :, 2].flatten()[:, np.newaxis]
|
|
204
|
+
pc_rgb = np.concatenate((r, g, b), axis=1)
|
|
205
|
+
else:
|
|
206
|
+
pc_rgb = None
|
|
207
|
+
|
|
208
|
+
if seg is not None:
|
|
209
|
+
pc_seg = seg.flatten()[:, np.newaxis]
|
|
210
|
+
else:
|
|
211
|
+
pc_seg = None
|
|
212
|
+
|
|
213
|
+
return {"xyz": pc_xyz, "rgb": pc_rgb, "seg": pc_seg, "index": index}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def depth_and_segmentation_to_point_clouds(
|
|
217
|
+
depth_image: np.ndarray,
|
|
218
|
+
segmentation_mask: np.ndarray,
|
|
219
|
+
fx: float,
|
|
220
|
+
fy: float,
|
|
221
|
+
cx: float,
|
|
222
|
+
cy: float,
|
|
223
|
+
rgb_image: np.ndarray = None,
|
|
224
|
+
target_object_id: int = 1,
|
|
225
|
+
remove_object_from_scene: bool = False,
|
|
226
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
227
|
+
"""
|
|
228
|
+
Convert depth image and instance segmentation mask to scene and object point clouds.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
depth_image: HxW depth image in meters
|
|
232
|
+
segmentation_mask: HxW instance segmentation mask with integer labels
|
|
233
|
+
fx, fy, cx, cy: Camera intrinsic parameters
|
|
234
|
+
rgb_image: HxWx3 RGB image (optional, for colored point clouds)
|
|
235
|
+
target_object_id: ID of the target object in the segmentation mask
|
|
236
|
+
remove_object_from_scene: If True, removes object points from scene point cloud
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
scene_pc: Nx3 point cloud of the entire scene (excluding object if remove_object_from_scene=True)
|
|
240
|
+
object_pc: Mx3 point cloud of the target object only
|
|
241
|
+
scene_colors: Nx3 RGB colors for scene points (or None)
|
|
242
|
+
object_colors: Mx3 RGB colors for object points (or None)
|
|
243
|
+
|
|
244
|
+
Raises:
|
|
245
|
+
ValueError: If no target object found or multiple objects detected
|
|
246
|
+
"""
|
|
247
|
+
unique_ids = np.unique(segmentation_mask)
|
|
248
|
+
if target_object_id not in unique_ids:
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f"Target object ID {target_object_id} not found in segmentation mask. Available IDs: {unique_ids}"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
non_background_ids = unique_ids[unique_ids != 0]
|
|
254
|
+
if len(non_background_ids) > 1:
|
|
255
|
+
raise ValueError(
|
|
256
|
+
"Multiple objects detected in segmentation mask: "
|
|
257
|
+
f"{non_background_ids}. Please ensure only one object is present."
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
pts_data = depth2points(
|
|
261
|
+
depth=depth_image,
|
|
262
|
+
fx=int(fx),
|
|
263
|
+
fy=int(fy),
|
|
264
|
+
cx=int(cx),
|
|
265
|
+
cy=int(cy),
|
|
266
|
+
rgb=rgb_image,
|
|
267
|
+
seg=segmentation_mask,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
xyz = pts_data["xyz"]
|
|
271
|
+
rgb = pts_data["rgb"]
|
|
272
|
+
seg = pts_data["seg"]
|
|
273
|
+
index = pts_data["index"]
|
|
274
|
+
|
|
275
|
+
xyz_valid = xyz[index]
|
|
276
|
+
seg_valid = seg[index] if seg is not None else None
|
|
277
|
+
rgb_valid = rgb[index] if rgb is not None else None
|
|
278
|
+
|
|
279
|
+
scene_pc = xyz_valid
|
|
280
|
+
scene_colors = rgb_valid
|
|
281
|
+
|
|
282
|
+
if seg_valid is not None:
|
|
283
|
+
object_mask = seg_valid.flatten() == target_object_id
|
|
284
|
+
object_pc = xyz_valid[object_mask]
|
|
285
|
+
object_colors = rgb_valid[object_mask] if rgb_valid is not None else None
|
|
286
|
+
|
|
287
|
+
if remove_object_from_scene:
|
|
288
|
+
scene_mask = ~object_mask
|
|
289
|
+
scene_pc = xyz_valid[scene_mask]
|
|
290
|
+
scene_colors = rgb_valid[scene_mask] if rgb_valid is not None else None
|
|
291
|
+
logger.info(
|
|
292
|
+
"Removed %s object points from scene point cloud",
|
|
293
|
+
np.sum(object_mask),
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
raise ValueError("Segmentation data not available from depth2points")
|
|
297
|
+
|
|
298
|
+
if len(object_pc) == 0:
|
|
299
|
+
raise ValueError(f"No points found for target object ID {target_object_id}")
|
|
300
|
+
|
|
301
|
+
logger.info("Scene point cloud: %s points", len(scene_pc))
|
|
302
|
+
logger.info("Object point cloud: %s points", len(object_pc))
|
|
303
|
+
|
|
304
|
+
return scene_pc, object_pc, scene_colors, object_colors
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def filter_colliding_grasps(
|
|
308
|
+
scene_pc: np.ndarray,
|
|
309
|
+
grasp_poses: np.ndarray,
|
|
310
|
+
gripper_collision_mesh: trimesh.Trimesh,
|
|
311
|
+
collision_threshold: float = 0.002,
|
|
312
|
+
num_collision_samples: int = 2000,
|
|
313
|
+
) -> np.ndarray:
|
|
314
|
+
"""
|
|
315
|
+
Filter grasps based on collision detection with scene point cloud.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
scene_pc: Nx3 scene point cloud
|
|
319
|
+
grasp_poses: Kx4x4 array of grasp poses
|
|
320
|
+
gripper_collision_mesh: Trimesh of gripper collision geometry
|
|
321
|
+
collision_threshold: Distance threshold for collision detection (meters)
|
|
322
|
+
num_collision_samples: Number of points to sample from gripper mesh surface
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
collision_mask: K-length boolean array, True if grasp is collision-free
|
|
326
|
+
"""
|
|
327
|
+
gripper_surface_points, _ = trimesh.sample.sample_surface(
|
|
328
|
+
gripper_collision_mesh, num_collision_samples
|
|
329
|
+
)
|
|
330
|
+
gripper_surface_points = np.array(gripper_surface_points)
|
|
331
|
+
|
|
332
|
+
scene_pc = np.asarray(scene_pc, dtype=np.float32)
|
|
333
|
+
collision_free_mask = []
|
|
334
|
+
|
|
335
|
+
logger.info(
|
|
336
|
+
"Checking collision for %s grasps against %s scene points...",
|
|
337
|
+
len(grasp_poses),
|
|
338
|
+
len(scene_pc),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
for _, grasp_pose in tqdm(
|
|
342
|
+
enumerate(grasp_poses), total=len(grasp_poses), desc="Collision checking"
|
|
343
|
+
):
|
|
344
|
+
gripper_points_transformed = tra.transform_points(
|
|
345
|
+
gripper_surface_points, grasp_pose
|
|
346
|
+
).astype(np.float32, copy=False)
|
|
347
|
+
|
|
348
|
+
min_distances_sq = []
|
|
349
|
+
batch_size = 100
|
|
350
|
+
for j in range(0, len(gripper_points_transformed), batch_size):
|
|
351
|
+
batch_gripper_points = gripper_points_transformed[j : j + batch_size]
|
|
352
|
+
diff = batch_gripper_points[:, None, :] - scene_pc[None, :, :]
|
|
353
|
+
dist_sq = np.einsum("ijk,ijk->ij", diff, diff)
|
|
354
|
+
batch_min_dist_sq = np.min(dist_sq, axis=1)
|
|
355
|
+
min_distances_sq.append(batch_min_dist_sq)
|
|
356
|
+
|
|
357
|
+
all_min_distances_sq = np.concatenate(min_distances_sq, axis=0)
|
|
358
|
+
collision_detected = np.any(
|
|
359
|
+
all_min_distances_sq < collision_threshold * collision_threshold
|
|
360
|
+
)
|
|
361
|
+
collision_free_mask.append(not bool(collision_detected))
|
|
362
|
+
|
|
363
|
+
collision_free_mask = np.array(collision_free_mask)
|
|
364
|
+
num_collision_free = np.sum(collision_free_mask)
|
|
365
|
+
logger.info("Found %s/%s collision-free grasps", num_collision_free, len(grasp_poses))
|
|
366
|
+
|
|
367
|
+
return collision_free_mask
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
__all__ = [
|
|
371
|
+
"knn_points",
|
|
372
|
+
"point_cloud_outlier_removal",
|
|
373
|
+
"point_cloud_outlier_removal_with_color",
|
|
374
|
+
"depth2points",
|
|
375
|
+
"depth_and_segmentation_to_point_clouds",
|
|
376
|
+
"filter_colliding_grasps",
|
|
377
|
+
]
|
|
@@ -16,6 +16,7 @@ class Segmentation:
|
|
|
16
16
|
self.client = PickleClient(hostname, port)
|
|
17
17
|
self.tracking_object_ids = []
|
|
18
18
|
self.current_frame_masks = {}
|
|
19
|
+
self.invisible_object_ids = []
|
|
19
20
|
self.image_prompt_names = set()
|
|
20
21
|
if compression_strategy in STRATEGIES:
|
|
21
22
|
self.compression_strategy_name = compression_strategy
|
|
@@ -51,14 +52,28 @@ class Segmentation:
|
|
|
51
52
|
else:
|
|
52
53
|
raise ValueError(f"Only valid compression strategies are {list(STRATEGIES.keys())}")
|
|
53
54
|
|
|
55
|
+
def set_config(self, config):
|
|
56
|
+
data = {"operation": "set_config", "config": config}
|
|
57
|
+
return self.client.send_data(data)
|
|
58
|
+
|
|
59
|
+
def get_capabilities(self):
|
|
60
|
+
data = {"operation": "get_capabilities"}
|
|
61
|
+
return self.client.send_data(data)
|
|
62
|
+
|
|
63
|
+
def get_config(self):
|
|
64
|
+
data = {"operation": "get_config"}
|
|
65
|
+
return self.client.send_data(data)
|
|
66
|
+
|
|
54
67
|
def reset(self):
|
|
55
68
|
self.first_frame_registered = False
|
|
56
69
|
self.tracking_object_ids = []
|
|
57
70
|
self.current_frame_masks = {}
|
|
71
|
+
self.invisible_object_ids = []
|
|
58
72
|
self.encoder = None
|
|
59
73
|
if self.benchmark:
|
|
60
74
|
self.call_time = {"add_image_prompt": 0, "register_first_frame": 0, "get_next": 0}
|
|
61
75
|
self.call_count = {"add_image_prompt": 0, "register_first_frame": 0, "get_next": 0}
|
|
76
|
+
self.client.send_data({"operation": "reset"})
|
|
62
77
|
|
|
63
78
|
def add_image_prompt(self, object_name, object_image):
|
|
64
79
|
if self.benchmark:
|
|
@@ -100,6 +115,9 @@ class Segmentation:
|
|
|
100
115
|
if np.any(mask):
|
|
101
116
|
masks[obj_id] = mask
|
|
102
117
|
self.current_frame_masks = masks
|
|
118
|
+
self.invisible_object_ids = [
|
|
119
|
+
obj_id for obj_id in self.tracking_object_ids if obj_id not in masks
|
|
120
|
+
]
|
|
103
121
|
if self.benchmark:
|
|
104
122
|
self.call_time["register_first_frame"] += time.time() - start
|
|
105
123
|
self.call_count["register_first_frame"] += 1
|
|
@@ -124,21 +142,57 @@ class Segmentation:
|
|
|
124
142
|
if np.any(mask):
|
|
125
143
|
masks[obj_id] = mask
|
|
126
144
|
self.current_frame_masks = masks
|
|
145
|
+
self.invisible_object_ids = [
|
|
146
|
+
obj_id for obj_id in self.tracking_object_ids if obj_id not in masks
|
|
147
|
+
]
|
|
127
148
|
if self.benchmark:
|
|
128
149
|
self.call_time["get_next"] += time.time() - start
|
|
129
150
|
self.call_count["get_next"] += 1
|
|
130
151
|
return masks
|
|
152
|
+
if isinstance(response, dict) and any(
|
|
153
|
+
key in response for key in ("result", "status", "success", "message")
|
|
154
|
+
):
|
|
155
|
+
if self.benchmark:
|
|
156
|
+
self.call_time["get_next"] += time.time() - start
|
|
157
|
+
self.call_count["get_next"] += 1
|
|
158
|
+
return response
|
|
131
159
|
if self.benchmark:
|
|
132
160
|
self.call_time["get_next"] += time.time() - start
|
|
133
161
|
self.call_count["get_next"] += 1
|
|
134
162
|
return None
|
|
135
163
|
|
|
164
|
+
def remove_object(self, obj_id, strict=False, need_output=False):
|
|
165
|
+
if not self.first_frame_registered:
|
|
166
|
+
print("Segmentation: register_first_frame must be called first")
|
|
167
|
+
return None
|
|
168
|
+
data = {
|
|
169
|
+
"operation": "remove_object",
|
|
170
|
+
"obj_id": obj_id,
|
|
171
|
+
"strict": strict,
|
|
172
|
+
"need_output": need_output,
|
|
173
|
+
}
|
|
174
|
+
response = self.client.send_data(data)
|
|
175
|
+
if self._is_success(response):
|
|
176
|
+
obj_ids = response.get("data", {}).get("obj_ids")
|
|
177
|
+
if obj_ids is not None:
|
|
178
|
+
self.tracking_object_ids = obj_ids
|
|
179
|
+
self.current_frame_masks = {
|
|
180
|
+
obj_id: mask
|
|
181
|
+
for obj_id, mask in self.current_frame_masks.items()
|
|
182
|
+
if obj_id in obj_ids
|
|
183
|
+
}
|
|
184
|
+
self.invisible_object_ids = [
|
|
185
|
+
obj_id for obj_id in obj_ids if obj_id not in self.current_frame_masks
|
|
186
|
+
]
|
|
187
|
+
return response
|
|
188
|
+
|
|
136
189
|
def finish(self):
|
|
137
190
|
if not self.first_frame_registered:
|
|
138
191
|
print("Warning: Segmentation: register_first_frame must be called first")
|
|
139
192
|
self.first_frame_registered = False
|
|
140
193
|
self.tracking_object_ids = []
|
|
141
194
|
self.current_frame_masks = {}
|
|
195
|
+
self.invisible_object_ids = []
|
|
142
196
|
|
|
143
197
|
def close(self):
|
|
144
198
|
"""Close underlying ZeroMQ socket/context."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: neuromeka_vfm
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Client utilities for Neuromeka VFM FoundationPose RPC (upload meshes, call server)
|
|
5
5
|
Author: Neuromeka
|
|
6
6
|
License: MIT License
|
|
@@ -41,6 +41,8 @@ Requires-Dist: numpy
|
|
|
41
41
|
Requires-Dist: pyzmq
|
|
42
42
|
Requires-Dist: paramiko
|
|
43
43
|
Requires-Dist: av
|
|
44
|
+
Requires-Dist: trimesh
|
|
45
|
+
Requires-Dist: tqdm
|
|
44
46
|
Dynamic: license-file
|
|
45
47
|
|
|
46
48
|
# neuromeka_vfm
|
|
@@ -89,12 +91,109 @@ seg.register_first_frame(frame=first_rgb,
|
|
|
89
91
|
use_image_prompt=False)
|
|
90
92
|
|
|
91
93
|
# 등록된 mask에 대한 SAM2 tracking
|
|
92
|
-
|
|
94
|
+
resp = seg.get_next(next_rgb)
|
|
95
|
+
if isinstance(resp, dict) and resp.get("result") == "ERROR":
|
|
96
|
+
print(f"Tracking error: {resp.get('message')}")
|
|
97
|
+
seg.reset()
|
|
98
|
+
else:
|
|
99
|
+
masks = resp
|
|
100
|
+
|
|
101
|
+
# Segmentation 설정/모델 선택 (nrmk_realtime_segmentation v0.2+)
|
|
102
|
+
caps = seg.get_capabilities()["data"]
|
|
103
|
+
current = seg.get_config()["data"]
|
|
104
|
+
seg.set_config(
|
|
105
|
+
{
|
|
106
|
+
"grounding_dino": {
|
|
107
|
+
"backbone": "Swin-B", # Swin-T | Swin-B
|
|
108
|
+
"box_threshold": 0.35,
|
|
109
|
+
"text_threshold": 0.25,
|
|
110
|
+
},
|
|
111
|
+
"dino_detection": {
|
|
112
|
+
"threshold": 0.5,
|
|
113
|
+
"target_multiplier": 25,
|
|
114
|
+
"img_multiplier": 50,
|
|
115
|
+
"background_threshold": -1.0,
|
|
116
|
+
"final_erosion_count": 10,
|
|
117
|
+
"segment_min_size": 20,
|
|
118
|
+
},
|
|
119
|
+
"sam2": {
|
|
120
|
+
"model": "facebook/sam2.1-hiera-large",
|
|
121
|
+
"use_legacy": False,
|
|
122
|
+
"compile": False,
|
|
123
|
+
"offload_state_to_cpu": False,
|
|
124
|
+
"offload_video_to_cpu": False,
|
|
125
|
+
},
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# SAM2 object 제거 (v0.2+, use_legacy=False에서만 지원)
|
|
130
|
+
seg.remove_object("cup_0")
|
|
93
131
|
|
|
94
132
|
|
|
95
133
|
seg.close()
|
|
96
134
|
```
|
|
97
135
|
|
|
136
|
+
#### Segmentation v0.2 설정 요약 (defaults/choices)
|
|
137
|
+
`seg.get_capabilities()` 결과는 서버 설정에 따라 달라질 수 있습니다. 아래는 v0.2 기본값입니다.
|
|
138
|
+
```yaml
|
|
139
|
+
grounding_dino:
|
|
140
|
+
backbone:
|
|
141
|
+
choices:
|
|
142
|
+
- Swin-B
|
|
143
|
+
- Swin-T
|
|
144
|
+
default: Swin-T
|
|
145
|
+
box_threshold:
|
|
146
|
+
default: 0.35
|
|
147
|
+
min: 0.0
|
|
148
|
+
max: 1.0
|
|
149
|
+
text_threshold:
|
|
150
|
+
default: 0.25
|
|
151
|
+
min: 0.0
|
|
152
|
+
max: 1.0
|
|
153
|
+
|
|
154
|
+
dino_detection:
|
|
155
|
+
threshold:
|
|
156
|
+
default: 0.5
|
|
157
|
+
target_multiplier:
|
|
158
|
+
default: 25
|
|
159
|
+
img_multiplier:
|
|
160
|
+
default: 50
|
|
161
|
+
background_threshold:
|
|
162
|
+
default: -1.0
|
|
163
|
+
final_erosion_count:
|
|
164
|
+
default: 10
|
|
165
|
+
segment_min_size:
|
|
166
|
+
default: 20
|
|
167
|
+
|
|
168
|
+
sam2:
|
|
169
|
+
model:
|
|
170
|
+
choices:
|
|
171
|
+
- facebook/sam2-hiera-base-plus
|
|
172
|
+
- facebook/sam2-hiera-large
|
|
173
|
+
- facebook/sam2-hiera-small
|
|
174
|
+
- facebook/sam2-hiera-tiny
|
|
175
|
+
- facebook/sam2.1-hiera-base-plus
|
|
176
|
+
- facebook/sam2.1-hiera-large
|
|
177
|
+
- facebook/sam2.1-hiera-small
|
|
178
|
+
- facebook/sam2.1-hiera-tiny
|
|
179
|
+
default: facebook/sam2.1-hiera-large
|
|
180
|
+
use_legacy:
|
|
181
|
+
default: false
|
|
182
|
+
compile:
|
|
183
|
+
default: false
|
|
184
|
+
offload_state_to_cpu:
|
|
185
|
+
default: false
|
|
186
|
+
offload_video_to_cpu:
|
|
187
|
+
default: false
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
#### Segmentation v0.2 주의사항/변경사항
|
|
191
|
+
- SAM2 VRAM 추정 실패 시 `seg.get_next()`가 `{"result":"ERROR"}`로 반환될 수 있으니 에러 처리 후 `reset`/재등록을 권장합니다.
|
|
192
|
+
- SAM2 `compile=True`는 첫 프레임 등록 및 `reset`이 느려질 수 있습니다.
|
|
193
|
+
- SAM2 CPU offloading은 `offload_state_to_cpu=True`와 `offload_video_to_cpu=True`를 함께 설정할 때 효과가 큽니다(legacy 모드에서는 `offload_video_to_cpu` 미지원).
|
|
194
|
+
- SAM2 `remove_object`는 `use_legacy=False`에서만 지원됩니다.
|
|
195
|
+
- GroundingDINO는 Swin-B 백본이 추가되었고, 프롬프트 토큰 병합 이슈가 수정되었습니다.
|
|
196
|
+
|
|
98
197
|
### Pose Estimation
|
|
99
198
|
|
|
100
199
|
**Mesh 파일 업로드**: 등록/인식하고자 하는 mesh 파일 (stl)을 호스트PC의 '/opt/meshes/' 경로에 업로드 (직접 SSH 통해 파일을 옮겨도 됨)
|
|
@@ -3,7 +3,9 @@ README.md
|
|
|
3
3
|
pyproject.toml
|
|
4
4
|
src/neuromeka_vfm/__init__.py
|
|
5
5
|
src/neuromeka_vfm/compression.py
|
|
6
|
+
src/neuromeka_vfm/grasp_gen.py
|
|
6
7
|
src/neuromeka_vfm/pickle_client.py
|
|
8
|
+
src/neuromeka_vfm/point_cloud_utils.py
|
|
7
9
|
src/neuromeka_vfm/pose_estimation.py
|
|
8
10
|
src/neuromeka_vfm/segmentation.py
|
|
9
11
|
src/neuromeka_vfm/upload_mesh.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|