cellmap-analyze 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {cellmap_analyze-0.2.0/src/cellmap_analyze.egg-info → cellmap_analyze-0.2.2}/PKG-INFO +1 -1
  2. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/pyproject.toml +1 -1
  3. cellmap_analyze-0.2.2/src/cellmap_analyze/analyze/assign_to_organelles.py +446 -0
  4. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/centers.cpp +152 -152
  5. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/touching.c +152 -152
  6. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/image_data_interface.py +28 -8
  7. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/io_util.py +1 -1
  8. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/zarr_io.py +59 -5
  9. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2/src/cellmap_analyze.egg-info}/PKG-INFO +1 -1
  10. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/tests/test_zarr_v3.py +48 -0
  11. cellmap_analyze-0.2.0/src/cellmap_analyze/analyze/assign_to_organelles.py +0 -237
  12. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/LICENSE +0 -0
  13. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/MANIFEST.in +0 -0
  14. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/README.md +0 -0
  15. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/setup.cfg +0 -0
  16. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/setup.py +0 -0
  17. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/__init__.py +0 -0
  18. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/analyze/__init__.py +0 -0
  19. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/analyze/fit_lines_to_segmentations.py +0 -0
  20. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/analyze/measure.py +0 -0
  21. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/__init__.py +0 -0
  22. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/bresenham3D.c +0 -0
  23. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/bresenham3D.pyx +0 -0
  24. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/centers.pyx +0 -0
  25. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/impl/centers.hpp +0 -0
  26. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/process_arrays.c +0 -0
  27. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/process_arrays.pyx +0 -0
  28. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/cythonizing/touching.pyx +0 -0
  29. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/__init__.py +0 -0
  30. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/clean_connected_components.py +0 -0
  31. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/connected_components.py +0 -0
  32. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/contact_sites.py +0 -0
  33. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/fill_holes.py +0 -0
  34. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/filter_ids.py +0 -0
  35. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/label_with_mask.py +0 -0
  36. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/morphological_operations.py +0 -0
  37. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/mutex_watershed.py +0 -0
  38. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/process/skeletonize.py +0 -0
  39. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/__init__.py +0 -0
  40. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/block_util.py +0 -0
  41. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/cellmap_array.py +0 -0
  42. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/dask_util.py +0 -0
  43. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/information_holders.py +0 -0
  44. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/mask_util.py +0 -0
  45. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/measure_util.py +0 -0
  46. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/mixins.py +0 -0
  47. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/neuroglancer_util.py +0 -0
  48. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/skeleton_util.py +0 -0
  49. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/voxel_size_utils.py +0 -0
  50. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze/util/zarr_util.py +0 -0
  51. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze.egg-info/SOURCES.txt +0 -0
  52. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze.egg-info/dependency_links.txt +0 -0
  53. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze.egg-info/entry_points.txt +0 -0
  54. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze.egg-info/requires.txt +0 -0
  55. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cellmap_analyze.egg-info/top_level.txt +0 -0
  56. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cli/__init__.py +0 -0
  57. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/src/cli/cli.py +0 -0
  58. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/tests/test_non_integer_voxel_size_integration.py +0 -0
  59. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/tests/test_utils.py +0 -0
  60. {cellmap_analyze-0.2.0 → cellmap_analyze-0.2.2}/tests/test_voxel_size_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellmap-analyze
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Code to perform analysis on segmentations like those produced by CellMap
5
5
  Author-email: David Ackerman <ackermand@janelia.hhmi.org>
6
6
  Maintainer-email: David Ackerman <ackermand@janelia.hhmi.org>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cellmap-analyze"
3
- version = "0.2.0"
3
+ version = "0.2.2"
4
4
  description = "Code to perform analysis on segmentations like those produced by CellMap"
5
5
  readme = "README.md"
6
6
  license-files = ["LICENSE"]
@@ -0,0 +1,446 @@
1
+ # %%
2
+ from typing import Union, List
3
+ from cellmap_analyze.util import io_util
4
+ from cellmap_analyze.util.io_util import (
5
+ get_leaf_name_from_path,
6
+ get_output_path_from_input_path,
7
+ )
8
+ from cellmap_analyze.util.image_data_interface import (
9
+ ImageDataInterface,
10
+ )
11
+ from cellmap_analyze.util.mixins import ComputeConfigMixin
12
+ from cellmap_analyze.util import dask_util
13
+ from cellmap_analyze.util.dask_util import create_block_from_index
14
+ from funlib.geometry import Coordinate
15
+ import logging
16
+ import pandas as pd
17
+ import numpy as np
18
+ import os
19
+ from scipy import spatial
20
+ import fastremap
21
+ import fastmorph
22
+
23
+ logging.basicConfig(
24
+ format="%(asctime)s %(levelname)-8s %(message)s",
25
+ level=logging.INFO,
26
+ datefmt="%Y-%m-%d %H:%M:%S",
27
+ )
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class AssignToOrganelles(ComputeConfigMixin):
32
+ def __init__(
33
+ self,
34
+ organelle_csvs: Union[str, List[str]],
35
+ target_organelle_ds_path: str,
36
+ output_path: str,
37
+ assignment_type: int = 0,
38
+ iteration_distance_nm=10_000,
39
+ num_workers: int = 1,
40
+ ):
41
+ super().__init__(num_workers)
42
+ if isinstance(organelle_csvs, str):
43
+ organelle_csvs = [organelle_csvs]
44
+
45
+ self.organelle_info_dict = {}
46
+
47
+ for organelle_csv in organelle_csvs:
48
+ df = pd.read_csv(organelle_csv)
49
+ if "Total Objects" in df.columns:
50
+ # delete last two columns of dataframe
51
+ df = df.iloc[:, :-2]
52
+ self.organelle_info_dict[organelle_csv] = df
53
+
54
+ self.organelle_idi = ImageDataInterface(target_organelle_ds_path)
55
+ self.organelle_name = get_leaf_name_from_path(
56
+ target_organelle_ds_path
57
+ ).capitalize()
58
+ self.assignment_type = assignment_type
59
+ self.output_path = str(output_path).rstrip("/")
60
+ self.iteration_distance_nm = iteration_distance_nm
61
+
62
+ @staticmethod
63
+ def _group_coms_by_block(coms_nm, organelle_idi, com_row_indices=None):
64
+ """Group COM positions by block index.
65
+
66
+ Args:
67
+ coms_nm: Array of COM positions in nm, shape (N, 3).
68
+ organelle_idi: ImageDataInterface for the organelle dataset.
69
+ com_row_indices: Optional list of original DataFrame row indices.
70
+ If None, uses range(len(coms_nm)).
71
+
72
+ Returns:
73
+ Tuple of (block_to_com_rows dict, nchunks array).
74
+ block_to_com_rows maps block_linear_index -> list of df row indices.
75
+ """
76
+ if com_row_indices is None:
77
+ com_row_indices = list(range(len(coms_nm)))
78
+
79
+ voxel_size = np.array(organelle_idi.voxel_size)
80
+ chunk_shape = np.array(organelle_idi.chunk_shape)
81
+ if len(chunk_shape) == 4:
82
+ chunk_shape = chunk_shape[1:]
83
+ block_size = chunk_shape * voxel_size
84
+ roi_start = np.array(organelle_idi.roi.get_begin())
85
+ roi_end = np.array(organelle_idi.roi.get_end())
86
+ nchunks = np.ceil((roi_end - roi_start) / block_size).astype(int)
87
+
88
+ # Compute block coordinates for each COM, clamp to valid range
89
+ block_coords = ((coms_nm - roi_start) / block_size).astype(int)
90
+ block_coords = np.clip(block_coords, 0, nchunks - 1)
91
+
92
+ # Convert to linear indices
93
+ block_indices = np.ravel_multi_index(block_coords.T, nchunks)
94
+
95
+ # Group by block
96
+ block_to_com_rows = {}
97
+ for i, block_idx in enumerate(block_indices):
98
+ block_to_com_rows.setdefault(int(block_idx), []).append(
99
+ com_row_indices[i]
100
+ )
101
+
102
+ return block_to_com_rows, nchunks
103
+
104
+ @staticmethod
105
+ def _process_containing_block(
106
+ partition_index,
107
+ organelle_idi,
108
+ coms_path,
109
+ block_indices,
110
+ block_to_com_rows,
111
+ ):
112
+ block_index = block_indices[partition_index]
113
+ com_rows = block_to_com_rows[block_index]
114
+
115
+ coms_nm = np.load(coms_path, mmap_mode="r")
116
+
117
+ block = create_block_from_index(organelle_idi, block_index)
118
+ seg = organelle_idi.to_ndarray_ts(block.read_roi)
119
+
120
+ voxel_size = np.array(organelle_idi.voxel_size)
121
+ roi_begin = np.array(block.read_roi.get_begin())
122
+
123
+ results = {}
124
+ for row_idx in com_rows:
125
+ com = coms_nm[row_idx]
126
+ local_idx = ((com - roi_begin) / voxel_size).astype(int)
127
+
128
+ if np.all(local_idx >= 0) and np.all(local_idx < seg.shape):
129
+ results[row_idx] = int(
130
+ seg[local_idx[0], local_idx[1], local_idx[2]]
131
+ )
132
+ else:
133
+ results[row_idx] = 0
134
+
135
+ return results
136
+
137
+ @staticmethod
138
+ def _process_n_nearest_block(
139
+ partition_index,
140
+ organelle_idi,
141
+ coms_path,
142
+ block_indices,
143
+ block_to_com_rows,
144
+ n,
145
+ initial_padding,
146
+ ):
147
+ block_index = block_indices[partition_index]
148
+ com_rows = block_to_com_rows[block_index]
149
+
150
+ coms_nm = np.load(coms_path, mmap_mode="r")
151
+
152
+ voxel_size = np.array(organelle_idi.voxel_size)
153
+ padding = initial_padding
154
+ dataset_roi = organelle_idi.roi
155
+ results = {}
156
+
157
+ while True:
158
+ block = create_block_from_index(
159
+ organelle_idi,
160
+ block_index,
161
+ padding=padding,
162
+ read_beyond_roi=False,
163
+ )
164
+ # Check if we've covered the full dataset
165
+ covers_full_dataset = block.read_roi.contains(dataset_roi)
166
+ seg = organelle_idi.to_ndarray_ts(block.read_roi)
167
+
168
+ roi_begin = np.array(block.read_roi.get_begin())
169
+ roi_end = np.array(block.read_roi.get_end())
170
+
171
+ # Extract boundaries
172
+ boundaries = seg - fastmorph.erode(seg, erode_border=False)
173
+ boundary_voxels = np.argwhere(boundaries > 0)
174
+
175
+ if len(boundary_voxels) == 0:
176
+ padding = Coordinate(p * 2 for p in padding)
177
+ continue
178
+
179
+ boundary_ids = boundaries[
180
+ boundary_voxels[:, 0],
181
+ boundary_voxels[:, 1],
182
+ boundary_voxels[:, 2],
183
+ ]
184
+ boundary_coords_nm = (
185
+ roi_begin + (boundary_voxels + 0.5) * voxel_size
186
+ )
187
+ unique_boundary_ids = fastremap.unique(boundary_ids)
188
+
189
+ # Filter to only COMs not yet verified
190
+ pending_rows = [r for r in com_rows if r not in results]
191
+ if not pending_rows:
192
+ break
193
+
194
+ pending_coms = np.array([coms_nm[r] for r in pending_rows])
195
+
196
+ # Margins: min distance from each COM to any face of read_roi
197
+ # that is interior to the dataset (faces at the dataset edge
198
+ # don't need checking — there's nothing beyond them)
199
+ ds_begin = np.array(dataset_roi.get_begin())
200
+ ds_end = np.array(dataset_roi.get_end())
201
+ face_dists_neg = pending_coms - roi_begin # (C, 3)
202
+ face_dists_pos = roi_end - pending_coms # (C, 3)
203
+ # Mask out faces that touch the dataset boundary (set to inf)
204
+ at_ds_begin = np.isclose(roi_begin, ds_begin)
205
+ at_ds_end = np.isclose(roi_end, ds_end)
206
+ face_dists_neg[:, at_ds_begin] = np.inf
207
+ face_dists_pos[:, at_ds_end] = np.inf
208
+ margins = np.minimum(
209
+ np.min(face_dists_neg, axis=1),
210
+ np.min(face_dists_pos, axis=1),
211
+ )
212
+
213
+ # Containing organelle check (vectorized)
214
+ local_inds = ((pending_coms - roi_begin) / voxel_size).astype(
215
+ int
216
+ )
217
+ in_bounds = np.all(local_inds >= 0, axis=1) & np.all(
218
+ local_inds < seg.shape, axis=1
219
+ )
220
+ containing_ids = np.zeros(len(pending_rows), dtype=int)
221
+ if np.any(in_bounds):
222
+ valid = local_inds[in_bounds]
223
+ containing_ids[in_bounds] = seg[
224
+ valid[:, 0], valid[:, 1], valid[:, 2]
225
+ ]
226
+
227
+ # Per-organelle KDTree: build one tree per organelle,
228
+ # query all COMs for nearest boundary point
229
+ num_unique = len(unique_boundary_ids)
230
+ min_dist_per_org = np.full(
231
+ (len(pending_rows), num_unique), np.inf
232
+ )
233
+ for j, uid in enumerate(unique_boundary_ids):
234
+ mask = boundary_ids == uid
235
+ tree = spatial.KDTree(boundary_coords_nm[mask])
236
+ dists, _ = tree.query(pending_coms)
237
+ min_dist_per_org[:, j] = dists
238
+
239
+ # Override distance to 0 for COMs inside this organelle
240
+ is_containing = containing_ids == uid
241
+ if np.any(is_containing):
242
+ min_dist_per_org[is_containing, j] = 0.0
243
+
244
+ # For each COM, sort organelles by distance, take top n
245
+ sort_order = np.argsort(min_dist_per_org, axis=1)
246
+ sorted_dists = np.take_along_axis(
247
+ min_dist_per_org, sort_order, axis=1
248
+ )
249
+ sorted_ids = unique_boundary_ids[sort_order]
250
+
251
+ unverified_rows = []
252
+ for i, row_idx in enumerate(pending_rows):
253
+ top_n_ids = np.zeros(n, dtype=int)
254
+ top_n_dists = np.full(n, np.inf)
255
+ num_to_take = min(n, num_unique)
256
+ top_n_ids[:num_to_take] = sorted_ids[i, :num_to_take]
257
+ top_n_dists[:num_to_take] = sorted_dists[i, :num_to_take]
258
+
259
+ # Handle containing organelle not in boundary set
260
+ cid = containing_ids[i]
261
+ if cid > 0 and cid not in top_n_ids[:num_to_take]:
262
+ top_n_ids[num_to_take - 1] = cid
263
+ top_n_dists[num_to_take - 1] = 0.0
264
+ order = np.argsort(top_n_dists)
265
+ top_n_ids = top_n_ids[order]
266
+ top_n_dists = top_n_dists[order]
267
+
268
+ d_n = top_n_dists[n - 1] if num_unique >= n else np.inf
269
+ if covers_full_dataset or d_n < margins[i]:
270
+ results[row_idx] = {
271
+ "ids": top_n_ids,
272
+ "distances": top_n_dists,
273
+ }
274
+ else:
275
+ unverified_rows.append(row_idx)
276
+
277
+ if not unverified_rows:
278
+ break
279
+
280
+ # Double padding and retry for unverified COMs only
281
+ padding = Coordinate(p * 2 for p in padding)
282
+ com_rows = unverified_rows
283
+
284
+ return results
285
+
286
+ @staticmethod
287
+ def _merge_dicts(list_of_dicts):
288
+ merged = {}
289
+ for d in list_of_dicts:
290
+ if d is not None:
291
+ merged.update(d)
292
+ return merged
293
+
294
+ def _save_coms_to_tmp(self, coms):
295
+ """Save COM array to a temp file for workers to read."""
296
+ tmp_dir = os.path.join(self.output_path, ".tmp_assign")
297
+ os.makedirs(tmp_dir, exist_ok=True)
298
+ coms_path = os.path.join(tmp_dir, "coms.npy")
299
+ np.save(coms_path, coms)
300
+ return coms_path
301
+
302
+ def assign_to_containing_organelle(self, df, organelle_name):
303
+ coms = df[["COM Z (nm)", "COM Y (nm)", "COM X (nm)"]].to_numpy()
304
+ sf = self.organelle_idi.voxel_size_scale_factor
305
+ coms_scaled = coms * sf
306
+ coms_path = self._save_coms_to_tmp(coms_scaled)
307
+
308
+ block_to_com_rows, _ = self._group_coms_by_block(
309
+ coms_scaled, self.organelle_idi
310
+ )
311
+ block_indices = sorted(block_to_com_rows.keys())
312
+
313
+ if not block_indices:
314
+ return
315
+
316
+ output_dir = os.path.join(self.output_path, ".tmp_assign_containing")
317
+ results = dask_util.compute_blockwise_partitions(
318
+ len(block_indices),
319
+ self.num_workers,
320
+ self.compute_args,
321
+ logger,
322
+ f"assigning containing {organelle_name}",
323
+ AssignToOrganelles._process_containing_block,
324
+ self.organelle_idi,
325
+ coms_path,
326
+ block_indices,
327
+ block_to_com_rows,
328
+ merge_info=(AssignToOrganelles._merge_dicts, output_dir),
329
+ )
330
+
331
+ id_col = f"{organelle_name} ID"
332
+ for row_idx, org_id in results.items():
333
+ df.at[row_idx, id_col] = org_id
334
+ df[id_col] = df[id_col].astype(int)
335
+
336
+ os.remove(coms_path)
337
+
338
+ def assign_to_n_nearest_organelles(self, df, n, organelle_name):
339
+ coms = df[["COM Z (nm)", "COM Y (nm)", "COM X (nm)"]].to_numpy()
340
+ sf = self.organelle_idi.voxel_size_scale_factor
341
+ coms_scaled = coms * sf
342
+ coms_path = self._save_coms_to_tmp(coms_scaled)
343
+
344
+ voxel_size = np.array(self.organelle_idi.voxel_size)
345
+ chunk_shape = np.array(self.organelle_idi.chunk_shape)
346
+ if len(chunk_shape) == 4:
347
+ chunk_shape = chunk_shape[1:]
348
+ block_size_nm = chunk_shape * voxel_size
349
+
350
+ # Initial padding: half block size, aligned to voxel grid
351
+ half_block = max(block_size_nm) / 2
352
+ padding = Coordinate(
353
+ int(np.ceil(half_block / vs)) * vs for vs in voxel_size
354
+ )
355
+
356
+ block_to_com_rows, _ = self._group_coms_by_block(
357
+ coms_scaled, self.organelle_idi
358
+ )
359
+ block_indices = sorted(block_to_com_rows.keys())
360
+
361
+ if not block_indices:
362
+ return
363
+
364
+ output_dir = os.path.join(self.output_path, ".tmp_assign_nearest")
365
+ # Each block handles its own padding expansion internally
366
+ results = dask_util.compute_blockwise_partitions(
367
+ len(block_indices),
368
+ self.num_workers,
369
+ self.compute_args,
370
+ logger,
371
+ f"assigning {n} nearest {organelle_name}",
372
+ AssignToOrganelles._process_n_nearest_block,
373
+ self.organelle_idi,
374
+ coms_path,
375
+ block_indices,
376
+ block_to_com_rows,
377
+ n,
378
+ padding,
379
+ merge_info=(AssignToOrganelles._merge_dicts, output_dir),
380
+ )
381
+
382
+ os.remove(coms_path)
383
+
384
+ # Apply results to DataFrame
385
+ id_col = f"{organelle_name} ID"
386
+ dist_col = f"{organelle_name} Distance (nm)"
387
+
388
+ if n > 1:
389
+ df[id_col] = [[] for _ in range(len(df))]
390
+ df[dist_col] = [[] for _ in range(len(df))]
391
+ for row_idx, result in results.items():
392
+ df.at[row_idx, id_col] = result["ids"].tolist()
393
+ df.at[row_idx, dist_col] = (result["distances"] / sf).tolist()
394
+ else:
395
+ for row_idx, result in results.items():
396
+ df.at[row_idx, id_col] = int(result["ids"][0])
397
+ df.at[row_idx, dist_col] = float(result["distances"][0] / sf)
398
+
399
+ def assign_to_organelles(self):
400
+ with io_util.TimingMessager("Assigning objects to organelles", logger):
401
+ for organelle_csv, df in self.organelle_info_dict.items():
402
+ id_col = f"{self.organelle_name} ID"
403
+ df[id_col] = 0
404
+ if self.assignment_type == 0:
405
+ self.assign_to_containing_organelle(
406
+ df, self.organelle_name
407
+ )
408
+ continue
409
+ dist_col = f"{self.organelle_name} Distance (nm)"
410
+ df[dist_col] = 0.0
411
+ self.assign_to_n_nearest_organelles(
412
+ df, self.assignment_type, self.organelle_name
413
+ )
414
+
415
+ def write_updated_csvs(self):
416
+ name = self.organelle_name.lower()
417
+ with io_util.TimingMessager("Writing out updated dataframes", logger):
418
+ os.makedirs(self.output_path, exist_ok=True)
419
+ for csv, df in self.organelle_info_dict.items():
420
+ csv_name = os.path.basename(csv.split(".csv")[0])
421
+ output_path = self.output_path
422
+ if csv_name.endswith("contacts"): # pragma: no cover
423
+ # Use helper function to generate contact_sites path (handles root datasets correctly)
424
+ output_path = get_output_path_from_input_path(
425
+ self.output_path, "/contact_sites"
426
+ )
427
+ os.makedirs(output_path, exist_ok=True)
428
+
429
+ if self.assignment_type == 0:
430
+ output_name = (
431
+ f"{output_path}/{csv_name}_assigned_to_containing_{name}"
432
+ )
433
+ elif self.assignment_type == 1:
434
+ output_name = (
435
+ f"{output_path}/{csv_name}_assigned_to_nearest_{name}"
436
+ )
437
+ else:
438
+ output_name = f"{output_path}/{csv_name}_assigned_to_{self.assignment_type}_nearest_{name}s"
439
+ df["Object ID"] = df["Object ID"].astype(
440
+ int
441
+ ) # in case was converted to float
442
+ df.to_csv(output_name + ".csv", index=False)
443
+
444
+ def get_organelle_assignments(self):
445
+ self.assign_to_organelles()
446
+ self.write_updated_csvs()