copick-utils 0.6.1__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- copick_utils/__init__.py +1 -1
- copick_utils/cli/__init__.py +33 -0
- copick_utils/cli/clipmesh.py +161 -0
- copick_utils/cli/clippicks.py +154 -0
- copick_utils/cli/clipseg.py +163 -0
- copick_utils/cli/conversion_commands.py +32 -0
- copick_utils/cli/enclosed.py +191 -0
- copick_utils/cli/filter_components.py +166 -0
- copick_utils/cli/fit_spline.py +191 -0
- copick_utils/cli/hull.py +138 -0
- copick_utils/cli/input_output_selection.py +76 -0
- copick_utils/cli/logical_commands.py +29 -0
- copick_utils/cli/mesh2picks.py +170 -0
- copick_utils/cli/mesh2seg.py +167 -0
- copick_utils/cli/meshop.py +262 -0
- copick_utils/cli/picks2ellipsoid.py +171 -0
- copick_utils/cli/picks2mesh.py +181 -0
- copick_utils/cli/picks2plane.py +156 -0
- copick_utils/cli/picks2seg.py +134 -0
- copick_utils/cli/picks2sphere.py +170 -0
- copick_utils/cli/picks2surface.py +164 -0
- copick_utils/cli/picksin.py +146 -0
- copick_utils/cli/picksout.py +148 -0
- copick_utils/cli/processing_commands.py +18 -0
- copick_utils/cli/seg2mesh.py +135 -0
- copick_utils/cli/seg2picks.py +128 -0
- copick_utils/cli/segop.py +248 -0
- copick_utils/cli/separate_components.py +155 -0
- copick_utils/cli/skeletonize.py +164 -0
- copick_utils/cli/util.py +580 -0
- copick_utils/cli/validbox.py +155 -0
- copick_utils/converters/__init__.py +35 -0
- copick_utils/converters/converter_common.py +543 -0
- copick_utils/converters/ellipsoid_from_picks.py +335 -0
- copick_utils/converters/lazy_converter.py +576 -0
- copick_utils/converters/mesh_from_picks.py +209 -0
- copick_utils/converters/mesh_from_segmentation.py +119 -0
- copick_utils/converters/picks_from_mesh.py +542 -0
- copick_utils/converters/picks_from_segmentation.py +168 -0
- copick_utils/converters/plane_from_picks.py +251 -0
- copick_utils/converters/segmentation_from_mesh.py +291 -0
- copick_utils/{segmentation → converters}/segmentation_from_picks.py +123 -13
- copick_utils/converters/sphere_from_picks.py +306 -0
- copick_utils/converters/surface_from_picks.py +337 -0
- copick_utils/logical/__init__.py +43 -0
- copick_utils/logical/distance_operations.py +604 -0
- copick_utils/logical/enclosed_operations.py +222 -0
- copick_utils/logical/mesh_operations.py +443 -0
- copick_utils/logical/point_operations.py +303 -0
- copick_utils/logical/segmentation_operations.py +399 -0
- copick_utils/process/__init__.py +47 -0
- copick_utils/process/connected_components.py +360 -0
- copick_utils/process/filter_components.py +306 -0
- copick_utils/process/hull.py +106 -0
- copick_utils/process/skeletonize.py +326 -0
- copick_utils/process/spline_fitting.py +648 -0
- copick_utils/process/validbox.py +333 -0
- copick_utils/util/__init__.py +6 -0
- copick_utils/util/config_models.py +614 -0
- {copick_utils-0.6.1.dist-info → copick_utils-1.0.1.dist-info}/METADATA +15 -2
- copick_utils-1.0.1.dist-info/RECORD +71 -0
- {copick_utils-0.6.1.dist-info → copick_utils-1.0.1.dist-info}/WHEEL +1 -1
- copick_utils-1.0.1.dist-info/entry_points.txt +29 -0
- copick_utils/segmentation/picks_from_segmentation.py +0 -81
- copick_utils-0.6.1.dist-info/RECORD +0 -14
- /copick_utils/{segmentation → io}/__init__.py +0 -0
- {copick_utils-0.6.1.dist-info → copick_utils-1.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
"""Lazy task discovery architecture for parallel object discovery and processing."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
from copick.util.log import get_logger
|
|
7
|
+
|
|
8
|
+
from copick_utils.util.config_models import ReferenceConfig, SelectorConfig, TaskConfig
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from copick.models import CopickRoot, CopickRun
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_selector_config(
|
|
17
|
+
input_type: str,
|
|
18
|
+
output_type: str,
|
|
19
|
+
input_object_name: str,
|
|
20
|
+
input_user_id: str,
|
|
21
|
+
input_session_id: str,
|
|
22
|
+
output_object_name: Optional[str] = None,
|
|
23
|
+
output_user_id: str = "converter",
|
|
24
|
+
output_session_id: str = "0",
|
|
25
|
+
individual_outputs: bool = False,
|
|
26
|
+
segmentation_name: Optional[str] = None,
|
|
27
|
+
voxel_spacing: Optional[float] = None,
|
|
28
|
+
) -> SelectorConfig:
|
|
29
|
+
"""
|
|
30
|
+
Create selector configuration using Pydantic model with validation.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
input_type: Type of input ('picks', 'mesh', 'segmentation')
|
|
34
|
+
output_type: Type of output ('picks', 'mesh', 'segmentation')
|
|
35
|
+
input_object_name: Name of the input object
|
|
36
|
+
input_user_id: User ID of the input
|
|
37
|
+
input_session_id: Session ID or regex pattern of the input
|
|
38
|
+
output_object_name: Name of the output object (defaults to input_object_name)
|
|
39
|
+
output_user_id: User ID for created output
|
|
40
|
+
output_session_id: Session ID or template for created output
|
|
41
|
+
individual_outputs: Whether to create individual output files
|
|
42
|
+
segmentation_name: Name for segmentation (when input or output is segmentation)
|
|
43
|
+
voxel_spacing: Voxel spacing for segmentation
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Validated SelectorConfig model
|
|
47
|
+
"""
|
|
48
|
+
return SelectorConfig(
|
|
49
|
+
input_type=input_type,
|
|
50
|
+
output_type=output_type,
|
|
51
|
+
input_object_name=input_object_name,
|
|
52
|
+
input_user_id=input_user_id,
|
|
53
|
+
input_session_id=input_session_id,
|
|
54
|
+
output_object_name=output_object_name or input_object_name,
|
|
55
|
+
output_user_id=output_user_id,
|
|
56
|
+
output_session_id=output_session_id,
|
|
57
|
+
individual_outputs=individual_outputs,
|
|
58
|
+
segmentation_name=segmentation_name,
|
|
59
|
+
voxel_spacing=voxel_spacing,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def create_reference_config(
|
|
64
|
+
reference_type: str, # "mesh" or "segmentation"
|
|
65
|
+
object_name: Optional[str] = None,
|
|
66
|
+
user_id: Optional[str] = None,
|
|
67
|
+
session_id: Optional[str] = None,
|
|
68
|
+
voxel_spacing: Optional[float] = None,
|
|
69
|
+
**additional_params,
|
|
70
|
+
) -> ReferenceConfig:
|
|
71
|
+
"""
|
|
72
|
+
Create reference discovery configuration using Pydantic model with validation.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
reference_type: Type of reference ("mesh" or "segmentation")
|
|
76
|
+
object_name: Name of reference object
|
|
77
|
+
user_id: User ID of reference
|
|
78
|
+
session_id: Session ID of reference
|
|
79
|
+
voxel_spacing: Voxel spacing for segmentation references
|
|
80
|
+
**additional_params: Additional parameters (max_distance, etc.)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Validated ReferenceConfig model
|
|
84
|
+
"""
|
|
85
|
+
return ReferenceConfig(
|
|
86
|
+
reference_type=reference_type,
|
|
87
|
+
object_name=object_name,
|
|
88
|
+
user_id=user_id,
|
|
89
|
+
session_id=session_id,
|
|
90
|
+
voxel_spacing=voxel_spacing,
|
|
91
|
+
additional_params=additional_params,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _is_regex_pattern(pattern: str) -> bool:
|
|
96
|
+
"""Check if string is a regex pattern."""
|
|
97
|
+
regex_chars = r"[.*+?^${}()|[\]\\"
|
|
98
|
+
has_regex_chars = any(char in pattern for char in regex_chars)
|
|
99
|
+
|
|
100
|
+
if not has_regex_chars:
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
re.compile(pattern)
|
|
105
|
+
return True
|
|
106
|
+
except re.error:
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def discover_tasks_for_run(run: "CopickRun", selector_config: SelectorConfig) -> List[Dict[str, Any]]:
|
|
111
|
+
"""
|
|
112
|
+
Discover conversion tasks for a single run using selector configuration.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
run: CopickRun to discover tasks for
|
|
116
|
+
selector_config: Pydantic SelectorConfig model
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
List of task dictionaries for this run
|
|
120
|
+
"""
|
|
121
|
+
# Use copick's official URI resolution for proper pattern matching
|
|
122
|
+
from copick.util.uri import get_copick_objects_by_type
|
|
123
|
+
|
|
124
|
+
# Determine pattern type based on session_id
|
|
125
|
+
pattern_type = "regex" if _is_regex_pattern(selector_config.input_session_id) else "glob"
|
|
126
|
+
|
|
127
|
+
# Build filter dict based on input type
|
|
128
|
+
filters = {"pattern_type": pattern_type}
|
|
129
|
+
|
|
130
|
+
if selector_config.input_type == "picks" or selector_config.input_type == "mesh":
|
|
131
|
+
filters["object_name"] = selector_config.input_object_name
|
|
132
|
+
filters["user_id"] = selector_config.input_user_id
|
|
133
|
+
filters["session_id"] = selector_config.input_session_id
|
|
134
|
+
elif selector_config.input_type == "segmentation":
|
|
135
|
+
filters["name"] = selector_config.input_object_name
|
|
136
|
+
filters["user_id"] = selector_config.input_user_id
|
|
137
|
+
filters["session_id"] = selector_config.input_session_id
|
|
138
|
+
filters["voxel_spacing"] = selector_config.voxel_spacing
|
|
139
|
+
|
|
140
|
+
# Find matching input objects using copick's official resolution
|
|
141
|
+
matching_inputs = get_copick_objects_by_type(
|
|
142
|
+
root=run.root,
|
|
143
|
+
object_type=selector_config.input_type,
|
|
144
|
+
run_name=run.name,
|
|
145
|
+
**filters,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if not matching_inputs:
|
|
149
|
+
return []
|
|
150
|
+
|
|
151
|
+
# Generate type-specific input parameter name
|
|
152
|
+
if selector_config.input_type == "mesh":
|
|
153
|
+
input_param_name = "mesh"
|
|
154
|
+
elif selector_config.input_type == "segmentation":
|
|
155
|
+
input_param_name = "segmentation"
|
|
156
|
+
elif selector_config.input_type == "picks":
|
|
157
|
+
input_param_name = "picks"
|
|
158
|
+
else:
|
|
159
|
+
input_param_name = "input_object" # fallback
|
|
160
|
+
|
|
161
|
+
tasks = []
|
|
162
|
+
for input_object in matching_inputs:
|
|
163
|
+
# Resolve output session ID from template
|
|
164
|
+
resolved_session_id = selector_config.output_session_id.replace(
|
|
165
|
+
"{input_session_id}",
|
|
166
|
+
input_object.session_id,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
task = {
|
|
170
|
+
input_param_name: input_object, # Use type-specific parameter name
|
|
171
|
+
"output_object_name": selector_config.output_object_name,
|
|
172
|
+
"output_user_id": selector_config.output_user_id,
|
|
173
|
+
"output_session_id": resolved_session_id,
|
|
174
|
+
"individual_outputs": selector_config.individual_outputs,
|
|
175
|
+
"input_type": selector_config.input_type,
|
|
176
|
+
"output_type": selector_config.output_type,
|
|
177
|
+
"segmentation_name": selector_config.segmentation_name,
|
|
178
|
+
"voxel_spacing": selector_config.voxel_spacing,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# Add session ID template for individual outputs
|
|
182
|
+
if selector_config.individual_outputs:
|
|
183
|
+
task["session_id_template"] = resolved_session_id
|
|
184
|
+
|
|
185
|
+
tasks.append(task)
|
|
186
|
+
|
|
187
|
+
return tasks
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def add_references_to_tasks(
|
|
191
|
+
run: "CopickRun",
|
|
192
|
+
tasks: List[Dict[str, Any]],
|
|
193
|
+
reference_config: ReferenceConfig,
|
|
194
|
+
) -> List[Dict[str, Any]]:
|
|
195
|
+
"""
|
|
196
|
+
Add reference object information to tasks for distance operations.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
run: CopickRun to search for references
|
|
200
|
+
tasks: List of tasks to augment
|
|
201
|
+
reference_config: Pydantic ReferenceConfig model
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of tasks with reference information added
|
|
205
|
+
"""
|
|
206
|
+
reference_type = reference_config.reference_type
|
|
207
|
+
|
|
208
|
+
# Find reference objects
|
|
209
|
+
if reference_type == "mesh":
|
|
210
|
+
ref_objects = run.get_meshes(
|
|
211
|
+
object_name=reference_config.object_name,
|
|
212
|
+
user_id=reference_config.user_id,
|
|
213
|
+
session_id=reference_config.session_id,
|
|
214
|
+
)
|
|
215
|
+
ref_key = "reference_mesh"
|
|
216
|
+
alt_key = "reference_segmentation"
|
|
217
|
+
else: # segmentation
|
|
218
|
+
ref_objects = run.get_segmentations(
|
|
219
|
+
name=reference_config.object_name,
|
|
220
|
+
user_id=reference_config.user_id,
|
|
221
|
+
session_id=reference_config.session_id,
|
|
222
|
+
voxel_size=reference_config.voxel_spacing,
|
|
223
|
+
)
|
|
224
|
+
ref_key = "reference_segmentation"
|
|
225
|
+
alt_key = "reference_mesh"
|
|
226
|
+
|
|
227
|
+
if not ref_objects:
|
|
228
|
+
logger.warning(f"No reference {reference_type} found in run {run.name}")
|
|
229
|
+
return []
|
|
230
|
+
|
|
231
|
+
# Add reference information to all tasks
|
|
232
|
+
augmented_tasks = []
|
|
233
|
+
for task in tasks:
|
|
234
|
+
task[ref_key] = ref_objects[0]
|
|
235
|
+
task[alt_key] = None
|
|
236
|
+
|
|
237
|
+
# Add additional reference parameters
|
|
238
|
+
for key, value in reference_config.additional_params.items():
|
|
239
|
+
task[key] = value
|
|
240
|
+
|
|
241
|
+
augmented_tasks.append(task)
|
|
242
|
+
|
|
243
|
+
return augmented_tasks
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def pair_tasks_within_run(
|
|
247
|
+
tasks1: List[Dict[str, Any]],
|
|
248
|
+
tasks2: List[Dict[str, Any]],
|
|
249
|
+
input_type: str = "segmentation",
|
|
250
|
+
) -> List[Dict[str, Any]]:
|
|
251
|
+
"""
|
|
252
|
+
Pair tasks from two selectors within a single run for boolean operations.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
tasks1: Tasks from first selector
|
|
256
|
+
tasks2: Tasks from second selector
|
|
257
|
+
input_type: Type of input objects to determine parameter names
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
List of paired tasks for boolean operations
|
|
261
|
+
"""
|
|
262
|
+
# Generate type-specific parameter names
|
|
263
|
+
if input_type == "mesh":
|
|
264
|
+
param1, param2 = "mesh1", "mesh2"
|
|
265
|
+
elif input_type == "segmentation":
|
|
266
|
+
param1, param2 = "segmentation1", "segmentation2"
|
|
267
|
+
elif input_type == "picks":
|
|
268
|
+
param1, param2 = "picks1", "picks2"
|
|
269
|
+
else:
|
|
270
|
+
# Fallback to generic names
|
|
271
|
+
param1, param2 = "input_object1", "input_object2"
|
|
272
|
+
|
|
273
|
+
paired_tasks = []
|
|
274
|
+
|
|
275
|
+
# Determine input key based on type (matches discover_tasks_for_run)
|
|
276
|
+
if input_type == "mesh":
|
|
277
|
+
input_key = "mesh"
|
|
278
|
+
elif input_type == "segmentation":
|
|
279
|
+
input_key = "segmentation"
|
|
280
|
+
elif input_type == "picks":
|
|
281
|
+
input_key = "picks"
|
|
282
|
+
else:
|
|
283
|
+
input_key = "input_object"
|
|
284
|
+
|
|
285
|
+
# Pair in order (same logic as current segop.py)
|
|
286
|
+
for i, task1 in enumerate(tasks1):
|
|
287
|
+
if i < len(tasks2):
|
|
288
|
+
task2 = tasks2[i]
|
|
289
|
+
|
|
290
|
+
# Create combined task for boolean operation with type-specific parameter names
|
|
291
|
+
paired_task = {
|
|
292
|
+
param1: task1[input_key],
|
|
293
|
+
param2: task2[input_key],
|
|
294
|
+
"object_name": task1["output_object_name"],
|
|
295
|
+
"user_id": task1["output_user_id"],
|
|
296
|
+
"session_id": task1["output_session_id"],
|
|
297
|
+
# Copy other parameters from task1
|
|
298
|
+
"voxel_spacing": task1.get("voxel_spacing"),
|
|
299
|
+
"is_multilabel": False, # Boolean ops work on binary
|
|
300
|
+
}
|
|
301
|
+
paired_tasks.append(paired_task)
|
|
302
|
+
|
|
303
|
+
return paired_tasks
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def pair_multi_tasks_within_run(
|
|
307
|
+
tasks_list: List[List[Dict[str, Any]]],
|
|
308
|
+
input_type: str = "segmentation",
|
|
309
|
+
) -> List[Dict[str, Any]]:
|
|
310
|
+
"""
|
|
311
|
+
Pair tasks from N selectors within a single run for N-way operations.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
tasks_list: List of task lists from N selectors
|
|
315
|
+
input_type: Type of input objects
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
List of N-way paired tasks with inputs as a list
|
|
319
|
+
"""
|
|
320
|
+
# Use plural parameter name for N inputs
|
|
321
|
+
if input_type == "mesh":
|
|
322
|
+
param_name = "meshes"
|
|
323
|
+
elif input_type == "segmentation":
|
|
324
|
+
param_name = "segmentations"
|
|
325
|
+
else:
|
|
326
|
+
param_name = "inputs"
|
|
327
|
+
|
|
328
|
+
paired_tasks = []
|
|
329
|
+
|
|
330
|
+
# Pair in order across all selectors
|
|
331
|
+
min_length = min(len(tasks) for tasks in tasks_list) if tasks_list else 0
|
|
332
|
+
|
|
333
|
+
for i in range(min_length):
|
|
334
|
+
# Collect all input objects at this index
|
|
335
|
+
input_objects = []
|
|
336
|
+
for tasks in tasks_list:
|
|
337
|
+
task = tasks[i]
|
|
338
|
+
# Extract input object using various possible keys
|
|
339
|
+
input_obj = task.get("segmentation") or task.get("mesh") or task.get("picks") or task.get("input_object")
|
|
340
|
+
input_objects.append(input_obj)
|
|
341
|
+
|
|
342
|
+
# Create combined task with list of inputs
|
|
343
|
+
first_task = tasks_list[0][i]
|
|
344
|
+
paired_task = {
|
|
345
|
+
param_name: input_objects, # List of N input objects
|
|
346
|
+
"object_name": first_task["output_object_name"],
|
|
347
|
+
"user_id": first_task["output_user_id"],
|
|
348
|
+
"session_id": first_task["output_session_id"],
|
|
349
|
+
"voxel_spacing": first_task.get("voxel_spacing"),
|
|
350
|
+
"is_multilabel": False,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
paired_tasks.append(paired_task)
|
|
354
|
+
|
|
355
|
+
return paired_tasks
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def lazy_conversion_worker(
|
|
359
|
+
run: "CopickRun",
|
|
360
|
+
config: TaskConfig,
|
|
361
|
+
converter_func: Callable,
|
|
362
|
+
**converter_kwargs,
|
|
363
|
+
) -> Dict[str, Any]:
|
|
364
|
+
"""
|
|
365
|
+
Universal lazy worker that discovers and processes tasks for a single run.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
run: CopickRun to process
|
|
369
|
+
config: Pydantic TaskConfig model with validated configuration
|
|
370
|
+
converter_func: Converter function to call
|
|
371
|
+
**converter_kwargs: Additional arguments for converter
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
Processing results dictionary
|
|
375
|
+
"""
|
|
376
|
+
try:
|
|
377
|
+
if config.type == "single_selector":
|
|
378
|
+
# Simple conversion command
|
|
379
|
+
tasks = discover_tasks_for_run(run, config.selector)
|
|
380
|
+
|
|
381
|
+
elif config.type == "single_selector_with_reference":
|
|
382
|
+
# Distance-based command
|
|
383
|
+
tasks = discover_tasks_for_run(run, config.selector)
|
|
384
|
+
tasks = add_references_to_tasks(run, tasks, config.reference)
|
|
385
|
+
|
|
386
|
+
elif config.type == "dual_selector":
|
|
387
|
+
# Boolean operation command
|
|
388
|
+
tasks1 = discover_tasks_for_run(run, config.selectors[0])
|
|
389
|
+
tasks2 = discover_tasks_for_run(run, config.selectors[1])
|
|
390
|
+
# Use input type from first selector to determine parameter names
|
|
391
|
+
input_type = config.selectors[0].input_type
|
|
392
|
+
tasks = pair_tasks_within_run(tasks1, tasks2, input_type)
|
|
393
|
+
|
|
394
|
+
# Add additional parameters to all tasks
|
|
395
|
+
if config.additional_params:
|
|
396
|
+
for task in tasks:
|
|
397
|
+
task.update(config.additional_params)
|
|
398
|
+
|
|
399
|
+
elif config.type == "multi_selector":
|
|
400
|
+
# N-way operation (N≥2)
|
|
401
|
+
# Discover tasks for all selectors
|
|
402
|
+
tasks_list = []
|
|
403
|
+
for selector in config.selectors:
|
|
404
|
+
selector_tasks = discover_tasks_for_run(run, selector)
|
|
405
|
+
tasks_list.append(selector_tasks)
|
|
406
|
+
|
|
407
|
+
# Use input type from first selector
|
|
408
|
+
input_type = config.selectors[0].input_type
|
|
409
|
+
tasks = pair_multi_tasks_within_run(tasks_list, input_type)
|
|
410
|
+
|
|
411
|
+
# Add additional parameters
|
|
412
|
+
if config.additional_params:
|
|
413
|
+
for task in tasks:
|
|
414
|
+
task.update(config.additional_params)
|
|
415
|
+
|
|
416
|
+
elif config.type == "single_selector_multi_union":
|
|
417
|
+
# Single input pattern that expands to N-way union
|
|
418
|
+
discovered_tasks = discover_tasks_for_run(run, config.selector)
|
|
419
|
+
|
|
420
|
+
if len(discovered_tasks) < 2:
|
|
421
|
+
# Not enough matches for union operation
|
|
422
|
+
return {
|
|
423
|
+
"processed": 0,
|
|
424
|
+
"errors": [
|
|
425
|
+
f"Pattern matched {len(discovered_tasks)} segmentation(s) in {run.name}, but union requires at least 2",
|
|
426
|
+
],
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Extract all input objects from discovered tasks
|
|
430
|
+
input_type = config.selector.input_type
|
|
431
|
+
if input_type == "segmentation":
|
|
432
|
+
param_name = "segmentations"
|
|
433
|
+
input_key = "segmentation"
|
|
434
|
+
elif input_type == "mesh":
|
|
435
|
+
param_name = "meshes"
|
|
436
|
+
input_key = "mesh"
|
|
437
|
+
else:
|
|
438
|
+
param_name = "inputs"
|
|
439
|
+
input_key = "input_object"
|
|
440
|
+
|
|
441
|
+
input_objects = [task[input_key] for task in discovered_tasks]
|
|
442
|
+
|
|
443
|
+
# Create single N-way task from all matched objects
|
|
444
|
+
first_task = discovered_tasks[0]
|
|
445
|
+
tasks = [
|
|
446
|
+
{
|
|
447
|
+
param_name: input_objects,
|
|
448
|
+
"object_name": first_task["output_object_name"],
|
|
449
|
+
"user_id": first_task["output_user_id"],
|
|
450
|
+
"session_id": first_task["output_session_id"],
|
|
451
|
+
"voxel_spacing": first_task.get("voxel_spacing"),
|
|
452
|
+
"is_multilabel": False,
|
|
453
|
+
},
|
|
454
|
+
]
|
|
455
|
+
|
|
456
|
+
else:
|
|
457
|
+
raise ValueError(f"Unknown config type: {config.type}")
|
|
458
|
+
|
|
459
|
+
if not tasks:
|
|
460
|
+
return {"processed": 0, "errors": [f"No tasks found for {run.name}"]}
|
|
461
|
+
|
|
462
|
+
# Process all discovered tasks for this run
|
|
463
|
+
total_processed = 0
|
|
464
|
+
all_errors = []
|
|
465
|
+
accumulated_stats = {}
|
|
466
|
+
|
|
467
|
+
for task in tasks:
|
|
468
|
+
try:
|
|
469
|
+
# Call converter function with task parameters
|
|
470
|
+
task_params = dict(task)
|
|
471
|
+
task_params["run"] = run
|
|
472
|
+
task_params.update(converter_kwargs)
|
|
473
|
+
|
|
474
|
+
result = converter_func(**task_params)
|
|
475
|
+
|
|
476
|
+
if result:
|
|
477
|
+
output_obj, stats = result
|
|
478
|
+
total_processed += 1
|
|
479
|
+
|
|
480
|
+
# Accumulate stats
|
|
481
|
+
for key, value in stats.items():
|
|
482
|
+
if key not in accumulated_stats:
|
|
483
|
+
accumulated_stats[key] = 0
|
|
484
|
+
accumulated_stats[key] += value
|
|
485
|
+
else:
|
|
486
|
+
# Try to find the input object using different possible parameter names
|
|
487
|
+
input_obj = (
|
|
488
|
+
task.get("input_object")
|
|
489
|
+
or task.get("segmentation")
|
|
490
|
+
or task.get("mesh")
|
|
491
|
+
or task.get("picks")
|
|
492
|
+
or task.get("segmentation1")
|
|
493
|
+
or task.get("mesh1")
|
|
494
|
+
or task.get("picks1")
|
|
495
|
+
)
|
|
496
|
+
session_id = getattr(input_obj, "session_id", "unknown")
|
|
497
|
+
all_errors.append(f"No output generated for {session_id} in {run.name}")
|
|
498
|
+
|
|
499
|
+
except Exception as e:
|
|
500
|
+
logger.exception(f"Error processing task in {run.name}: {e}")
|
|
501
|
+
all_errors.append(f"Error processing task in {run.name}: {e}")
|
|
502
|
+
|
|
503
|
+
return {
|
|
504
|
+
"processed": total_processed,
|
|
505
|
+
"errors": all_errors,
|
|
506
|
+
**accumulated_stats,
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
except Exception as e:
|
|
510
|
+
logger.exception(f"Error in lazy worker for {run.name}: {e}")
|
|
511
|
+
return {"processed": 0, "errors": [f"Worker error in {run.name}: {e}"]}
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def create_lazy_batch_converter(
|
|
515
|
+
converter_func: Callable,
|
|
516
|
+
task_description: str,
|
|
517
|
+
) -> Callable:
|
|
518
|
+
"""
|
|
519
|
+
Create a lazy batch converter that does parallel task discovery and processing.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
converter_func: The converter function to call for each task
|
|
523
|
+
task_description: Description for progress bar
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
Lazy batch converter function
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
def lazy_batch_converter(
|
|
530
|
+
root: "CopickRoot",
|
|
531
|
+
config: TaskConfig,
|
|
532
|
+
run_names: Optional[List[str]] = None,
|
|
533
|
+
workers: int = 8,
|
|
534
|
+
**converter_kwargs,
|
|
535
|
+
) -> Dict[str, Any]:
|
|
536
|
+
"""
|
|
537
|
+
Lazy batch converter with parallel task discovery.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
root: The copick root containing runs to process
|
|
541
|
+
config: Validated TaskConfig Pydantic model
|
|
542
|
+
run_names: List of run names to process. If None, processes all runs.
|
|
543
|
+
workers: Number of worker processes
|
|
544
|
+
**converter_kwargs: Additional arguments passed to converter function
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
Dictionary with processing results and statistics
|
|
548
|
+
"""
|
|
549
|
+
from copick.ops.run import map_runs
|
|
550
|
+
|
|
551
|
+
runs_to_process = [run.name for run in root.runs] if run_names is None else run_names
|
|
552
|
+
|
|
553
|
+
if not runs_to_process:
|
|
554
|
+
return {}
|
|
555
|
+
|
|
556
|
+
# Create worker function for this specific converter
|
|
557
|
+
def run_worker(run: "CopickRun", **kwargs) -> Dict[str, Any]:
|
|
558
|
+
return lazy_conversion_worker(
|
|
559
|
+
run=run,
|
|
560
|
+
config=config,
|
|
561
|
+
converter_func=converter_func,
|
|
562
|
+
**converter_kwargs,
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Execute in parallel - no sequential discovery!
|
|
566
|
+
results = map_runs(
|
|
567
|
+
callback=run_worker,
|
|
568
|
+
root=root,
|
|
569
|
+
runs=runs_to_process,
|
|
570
|
+
workers=workers,
|
|
571
|
+
task_desc=task_description,
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
return results
|
|
575
|
+
|
|
576
|
+
return lazy_batch_converter
|