Simple-Track 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,962 @@
1
+ from typing import Union
2
+
3
+ import numpy as np
4
+ from numpy.typing import NDArray
5
+
6
+ from simpletrack.feature import Feature
7
+ from simpletrack.frame import Frame
8
+ from simpletrack.utils import check_arrays, check_valid_ids, native
9
+
10
+
11
+ class FrameTracker:
12
+ """
13
+ Handles tracking of Features between Frames. This is the main class that will
14
+ match features between Frames, ensure connsistent feature ids are assigned to
15
+ matched features, give new features new ids, identify merging and splitting
16
+ features, and update feature and lifetime fields in the current Frame
17
+ accordingly.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ overlap_nbhood: int = 5,
23
+ overlap_threshold: float = 0.6,
24
+ retain_lifetime_on_split: bool = True,
25
+ _nbhood_coeff_test=False,
26
+ # If True, uses overlap_nbhood to multiply feature size to get radial mask size
27
+ ):
28
+ """
29
+ Initialise FrameTracker class to track Features between Frames
30
+
31
+ Args:
32
+ overlap_nbhood (int, optional):
33
+ When calculating overlap between Features in advected and current
34
+ Frames, code may apply a neighbourhood (nbhood) surrounding the Feature
35
+ centroid if there is not a sufficient overlap found initilly. This value
36
+ sets the radius of this nbhood in pixels.
37
+ Defaults to 5.
38
+ overlap_threshold (float, optional):
39
+ Sets the minimum normalised overlap required between Features in
40
+ advected and current Frames to be considered a match.
41
+ Defaults to 0.6.
42
+ retain_lifetime_on_split (bool, optional):
43
+ If a child Feature splits from its parent feature, this determines
44
+ whether the child Feature should carry over the lifetime from the parent
45
+ or whether its lifetime should be set to 1
46
+ Defaults to True
47
+ """
48
+ self.overlap_nbhood = int(overlap_nbhood)
49
+ self.overlap_threshold = overlap_threshold
50
+ self.retain_lifetime_on_split = retain_lifetime_on_split
51
+ self._nbhood_coeff_test = _nbhood_coeff_test
52
+
53
+ def run(self, prev_frame: Frame, current_frame: Frame) -> None:
54
+ """
55
+ Runs through the full Frame tracking procedure between two inputs.
56
+ Step 1: Artifically advect features in the previous frame using its flow field.
57
+ This will provide a best guess of where the features in the previous Frame
58
+ should be located at the current timestep.
59
+
60
+ Step 2: Match features between the advected frame and the current frame by
61
+ assigning a new, proviosonal id to each Feature in the current Frame based on
62
+ overlap with the advected Frame. Matched features will provisionally inherit
63
+ the id and lifetime from the advected Frame. Also determine any accreted
64
+ features during this matching. Any unmatched features are designated as new
65
+ features and assigned a new id.
66
+
67
+ Step 3: Check accreted ids from frame matching are not also present as
68
+ provisional ids. Accreted ids should already be removed from the field.
69
+ If any are present, remove them from the accreted list.
70
+
71
+ Step 4: After Feature matching, there may be multiple Features in current Frame
72
+ that were matched to the same previous feature. These features will now have
73
+ the same provisional ids. Find these ids and distinguish the most appropriate
74
+ match (to retain the id) from the other matches (which will be designated as
75
+ children and given a new id.)
76
+
77
+ Step 5: Now that there is self consistent matched data in the current frame,
78
+ use this to produce updated feature and lifetime fields in the current Frame
79
+ using the provisional ids.
80
+
81
+ Step 6: Promote provisional ids to final ids in current frame
82
+
83
+ Step 7: Identify Features in the previous Frame that aren't matched with a
84
+ Feature in the current Frame. This is useful for output statistics
85
+
86
+ Args:
87
+ prev_frame (Frame):
88
+ Frame containing Features at the previous timestep and flow field
89
+ between timesteps
90
+ current_frame (Frame):
91
+ Frame containing Features at the current timestep
92
+
93
+ Raises:
94
+ TypeError: _description_
95
+ """
96
+ if not all(isinstance(frame, Frame) for frame in [prev_frame, current_frame]):
97
+ raise TypeError(
98
+ f"Expected type Frame, got {type(prev_frame)} and {type(current_frame)}"
99
+ )
100
+
101
+ # Step 1: Advect features in the previous frame using flow field
102
+ y_flow, x_flow = current_frame.get_flow()
103
+ advected_frame = self.advect_frame(prev_frame, y_flow, x_flow)
104
+
105
+ # Step 2: Features in the current Frame will have an id that is not related to
106
+ # the previous/advected Frame in anyway.
107
+ # Match features between the advected frame and the current frame by assigning a
108
+ # new, proviosonal id to each Feature in the current Frame based on overlap
109
+ self.match_advected_and_current_frame_features(
110
+ advected_frame, current_frame, prev_frame
111
+ )
112
+
113
+ # Step 3: Check accreted ids from frame matching are not also present as
114
+ # provisional ids. Remove any accreted ids found as a provisional id in
115
+ # current frame
116
+ self.check_for_accreted_ids_still_in_domain(current_frame)
117
+
118
+ # Step 4: After Feature matching, there may be multiple Features in current
119
+ # Frame that were matched to the same previous feature. Resolve these conflicts
120
+ self.resolve_provisional_id_conflicts(advected_frame, current_frame)
121
+
122
+ # Step 5: Now that there is self consistent data in current frame, use this to
123
+ # produce updated fields
124
+ current_frame.update_fields_using_provisional_ids()
125
+
126
+ # Step 6: Promote provisional ids to final ids in current frame
127
+ current_frame.promote_provisional_ids()
128
+
129
+ # Step 7: For tracing Features in the previous Frame that aren't matched with a
130
+ # Feature in the current Frame. This is useful for output statistics
131
+ self.identify_unmatched_features_in_prev_frame(prev_frame, current_frame)
132
+
133
+ def advect_frame(self, frame: Frame, y_flow: NDArray, x_flow: NDArray) -> Frame:
134
+ """
135
+ Construct a new Frame with all Features in the input Frame advected by the
136
+ given flow field
137
+
138
+ Args:
139
+ frame (Frame): Frame containing Features and a flow field
140
+ y_flow (NDArray):
141
+ 2D array of same shape as frame feature field, containing
142
+ y motion vectors
143
+ x_flow (NDArray):
144
+ 2D array of same shape as frame feature field, containing
145
+ x motion vectors
146
+
147
+ Returns:
148
+ Frame: advected Frame
149
+ """
150
+
151
+ if not isinstance(frame, Frame):
152
+ raise TypeError(f"Expected 'Frame', got {type(frame)}")
153
+
154
+ # If there is no flow field, return the un-advected frame
155
+ if y_flow is None or x_flow is None:
156
+ print(f"y_flow: {y_flow}")
157
+ print(f"x_flow: {x_flow}")
158
+ print("Continuing with unadvected Frame")
159
+ return frame
160
+
161
+ feature_field = frame.feature_field
162
+ advected_feature_field = advect_field_using_motion_vectors(
163
+ feature_field, y_flow, x_flow
164
+ )
165
+
166
+ advected_frame = Frame()
167
+ advected_frame.feature_field = advected_feature_field
168
+ advected_frame.populate_features()
169
+
170
+ # Transfer lifetimes to advected frame
171
+ for advected_feature in advected_frame.features.values():
172
+ advected_id = advected_feature.id
173
+ advected_feature.lifetime = frame.get_feature(advected_id).lifetime
174
+
175
+ return advected_frame
176
+
177
+ def match_advected_and_current_frame_features(
178
+ self, advected_frame: Frame, current_frame: Frame, prev_frame: Frame
179
+ ) -> None:
180
+ """
181
+ For each Feature in the current Frame, attempt to match it to a Feature in
182
+ the advected Frame by calcualating the overlap between the two fields.
183
+
184
+ Matched Features are assigned to the provisional id property of the current
185
+ Feature and inherit the lifetime from the advected Feature.
186
+
187
+ Other Features in the advected field that contain a sufficient overlap but
188
+ that are not the best match are added to the accreted property of the current
189
+ Feature.
190
+
191
+ Unmatched Features in the current Frame are assigned a new provisional id.
192
+
193
+ Args:
194
+ advected_frame (Frame):
195
+ Frame containing advected Features from previous timestep
196
+ current_frame (Frame):
197
+ Frame containing Features at current timestep
198
+ """
199
+ # Get the feature fields to analyse
200
+ advected_feature_field = advected_frame.feature_field
201
+ current_feature_field = current_frame.feature_field
202
+
203
+ # Attempt to match features in the advected frame with current frame
204
+ for current_feature in current_frame.features.values():
205
+ if not isinstance(current_feature, Feature):
206
+ raise TypeError(f"Expected Feature, got {type(current_feature)}")
207
+
208
+ feature_id = current_feature.id
209
+
210
+ # Count the ids contained in advected_feature_field that are in the same
211
+ # position as the feature_id in the current_feature_field (normalised by
212
+ # the feature sizes)
213
+ overlap_hist = self.calculate_overlap_histogram(
214
+ advected_feature_field, current_feature_field, feature_id, nbhood=0
215
+ )
216
+
217
+ # If the maximum overlap is not achieved, rerun with a nbhood surrouding the
218
+ # feature centroid.
219
+ if np.max(overlap_hist) < self.overlap_threshold:
220
+ overlap_hist = self.calculate_overlap_histogram(
221
+ advected_feature_field,
222
+ current_feature_field,
223
+ feature_id,
224
+ nbhood=self.overlap_nbhood,
225
+ )
226
+
227
+ # Get the closest matching feature id to advected field, and any other ids
228
+ # that have a sufficient overlap
229
+ matching_id, other_sufficient_ids = self.find_ids_of_closest_overlaps(
230
+ overlap_hist, advected_feature_field, current_feature_field, feature_id
231
+ )
232
+
233
+ # If a matching feature couldn't be found, this is a new Feature
234
+ if matching_id is None:
235
+ matching_id = current_frame.get_next_available_feature_id()
236
+ current_feature.lifetime = 1
237
+ else:
238
+ # Inherit lifetime from matching feature
239
+ matching_feature = advected_frame.get_feature(matching_id)
240
+ current_feature.lifetime = matching_feature.lifetime + 1
241
+
242
+ # Provisionally assign the matching_id to this feature
243
+ current_feature.provisional_id = matching_id
244
+
245
+ if other_sufficient_ids is not None:
246
+ # Add other ids to Feature accretion list
247
+ current_feature.accrete_ids(other_sufficient_ids)
248
+
249
+ # Update the accreted_in_next_frame_by property of Features
250
+ # in prev_frame
251
+ for accreted_id in other_sufficient_ids:
252
+ accreted_feature = prev_frame.get_feature(accreted_id)
253
+ accreted_feature.accreted_in_next_frame_by = feature_id
254
+ accreted_feature.set_as_final_timestep()
255
+
256
+ def check_for_accreted_ids_still_in_domain(self, frame: Frame) -> None:
257
+ """
258
+ Any features that have been accreted by another feature should not therefore
259
+ appear as a provisional id in the feature field (since it should no longer
260
+ exist). This method checks that this is the case for all accreted ids.
261
+ If any accreted ids are found to still exist as a provisional id, the accreted
262
+ id is removed from its respective Feature
263
+
264
+ Args:
265
+ frame (Frame): Frame to inspect accreted ids
266
+ """
267
+ if not isinstance(frame, Frame):
268
+ raise TypeError(f"Expected type Frame, got {type(frame)}")
269
+
270
+ all_features = frame.features.values()
271
+ all_provisional_ids = [feature.provisional_id for feature in all_features]
272
+
273
+ # Check each feature for accreted values
274
+ for feature in all_features:
275
+ if feature.accreted is None:
276
+ continue
277
+ if not isinstance(feature.accreted, list):
278
+ raise TypeError(f"Expected list, got f{type(feature.accreted)}")
279
+
280
+ # Copy accreted id to new list if it is not a provisional id
281
+ new_accreted_list = [
282
+ acc_id
283
+ for acc_id in feature.accreted
284
+ if acc_id not in all_provisional_ids
285
+ ]
286
+ # Reset the accreted feature id list
287
+ feature.accrete_ids(new_accreted_list, replace=True)
288
+
289
+ def resolve_provisional_id_conflicts(
290
+ self, advected_frame: Frame, current_frame: Frame
291
+ ) -> None:
292
+ """
293
+ After Feature matching, there may be multiple Features in current Frame that
294
+ were matched to the same previous feature. These features will now have the
295
+ same provisional ids. Find these ids and distinguish the most appropriate match
296
+ (to retain the id) from the other matches which will be designated as children
297
+ and given a new id.
298
+
299
+ Matched "Parent" Features determined by largest overlap, and will retain the
300
+ provisional id. Any other child Features will be assigned a new provisional id
301
+ and have their parent attribute set to the retained provisional id. The parent
302
+ Feature will have its children attribute updated to include the new child
303
+ Feature ids.
304
+
305
+ Args:
306
+ advected_frame (Frame):
307
+ Frame containing advected Features from previous timestep
308
+ current_frame (Frame):
309
+ Frame containing Features at current timestep
310
+
311
+ """
312
+ # First, list all provisional ids
313
+ all_features = current_frame.features.values()
314
+ all_provisional_ids = [feature.provisional_id for feature in all_features]
315
+
316
+ # Find all provisional ids that are repeated
317
+ unique_ids, counts = np.unique(all_provisional_ids, return_counts=True)
318
+ conflicting_ids = unique_ids[counts > 1]
319
+
320
+ # Loop over all Features with repeated provisional ids and
321
+ # designate parent/child
322
+ for conflicting_id in conflicting_ids:
323
+ # Find all Features with this provisional id
324
+ matching_features = [
325
+ feature
326
+ for feature in all_features
327
+ if feature.provisional_id == conflicting_id
328
+ ]
329
+
330
+ if not all(isinstance(feature, Feature) for feature in matching_features):
331
+ raise TypeError("Expected all matching features to be of type Feature")
332
+
333
+ # Get parent and child features
334
+ parent_feature, child_features = self.identify_parent_and_child_features(
335
+ conflicting_id,
336
+ matching_features,
337
+ advected_frame.feature_field,
338
+ current_frame.feature_field,
339
+ )
340
+
341
+ # TODO: should some of this functionality be moved to Feature?
342
+ # Preserve provisional id for the parent feature
343
+ # All child features need new ids and are assigned the conflicting id as
344
+ # parent
345
+ for feature in child_features:
346
+ feature.parent = conflicting_id
347
+ feature.provisional_id = current_frame.get_next_available_feature_id()
348
+ # Handle lifetime depending on init input
349
+ if self.retain_lifetime_on_split:
350
+ feature.lifetime = parent_feature.lifetime
351
+ else:
352
+ feature.lifetime = 1
353
+
354
+ # Update parent feature to include child ids
355
+ parent_feature.spawns(
356
+ [feature.provisional_id for feature in child_features], replace=True
357
+ )
358
+
359
+ def identify_unmatched_features_in_prev_frame(
360
+ self, prev_frame: Frame, current_frame: Frame
361
+ ) -> None:
362
+ """
363
+ Identify Features in the previous Frame that are not matched with a Feature in
364
+ the current Frame. This is useful for output statistics, e.g., for tracing
365
+ dissipation events.
366
+ Any feature that is not matched is designated as a final timestep by setting the
367
+ final_timestep property to True.
368
+
369
+ Args:
370
+ prev_frame (Frame): Frame containing Features at previous timestep
371
+ current_frame (Frame): Frame containing Features at current timestep
372
+ """
373
+ if not isinstance(prev_frame, Frame) or not isinstance(current_frame, Frame):
374
+ raise TypeError("Expected type Frame for both prev_frame and current_frame")
375
+
376
+ current_frame_ids = [feature.id for feature in current_frame.features.values()]
377
+ for feature_id, feature in prev_frame.features.items():
378
+ if feature_id not in current_frame_ids:
379
+ feature.set_as_final_timestep()
380
+
381
+ def identify_parent_and_child_features(
382
+ self,
383
+ parent_id: int,
384
+ matching_features: list[Feature],
385
+ advected_feature_field: NDArray,
386
+ current_feature_field: NDArray,
387
+ ) -> list[Feature, list[Feature]]:
388
+ """
389
+ For a given target parent_id and list of matching Features (that all share this
390
+ provisional parent_id), identify which Feature is the best match to be the
391
+ parent. All others are identified as children.
392
+
393
+ Best match is determined by finding the Feature that has the largest overlap
394
+ with the advected feature with id parent_id.
395
+
396
+ If multiple Features share the same overlap size, then the feature with the
397
+ closest centroid is chosen.
398
+
399
+ If multiple features are equidistant, the feature with the lower id is chosen.
400
+
401
+ Args:
402
+ parent_id (int):
403
+ Provisional feature id to identify parent for
404
+ matching_features (list[Feature]):
405
+ List of matching Features sharing the provisional parent_id
406
+ advected_feature_field (NDArray):
407
+ Array of features advected from previous timestep
408
+ current_feature_field (NDArray):
409
+ Array of features from current timestep
410
+
411
+ Returns:
412
+ list[Feature, list[Feature]]:
413
+ [parent_feature, list of child_features]
414
+ """
415
+ parent_id = check_valid_ids(parent_id)
416
+
417
+ advected_feature_field, current_feature_field = check_arrays(
418
+ advected_feature_field,
419
+ current_feature_field,
420
+ ndim=2,
421
+ equal_shape=True,
422
+ dtype=int,
423
+ )
424
+
425
+ if not all([isinstance(feature, Feature) for feature in matching_features]):
426
+ raise TypeError(
427
+ "Expected all values in matching_features to be type Feature"
428
+ )
429
+
430
+ # Check the parent_id is present in the advected feature field
431
+ if not np.isin(parent_id, advected_feature_field):
432
+ raise ValueError(
433
+ f"Parent id {parent_id} not found in advected feature field"
434
+ )
435
+
436
+ # Find the feature that has the largest overlap with the advected feature
437
+ # For this, need to match the locations of the repeated "provisional_id"
438
+ # from the advetced feature field with the unique "id" in the current x
439
+ # feature field
440
+ matching_feature_ids = [feature.id for feature in matching_features]
441
+ overlap_sizes = self._get_overlap_sizes(
442
+ advected_feature_field=advected_feature_field,
443
+ current_feature_field=current_feature_field,
444
+ advected_id=parent_id,
445
+ matching_ids=matching_feature_ids,
446
+ nbhood=0,
447
+ )
448
+
449
+ # If there is no overlap between the two fields, implies there was a halo used
450
+ # to match these features. Try applying the halo again here
451
+ # TODO: think about a more rigorous way of deciding whether halo is needed here.
452
+ if all(size == 0 for size in overlap_sizes):
453
+ overlap_sizes = self._get_overlap_sizes(
454
+ advected_feature_field=advected_feature_field,
455
+ current_feature_field=current_feature_field,
456
+ advected_id=parent_id,
457
+ matching_ids=matching_feature_ids,
458
+ nbhood=self.overlap_nbhood,
459
+ )
460
+
461
+ # Check that at least one feature has at least some overlap.
462
+ # Don't expect overlap_sizes should all be 0 at this stage
463
+ if all(size == 0 for size in overlap_sizes):
464
+ raise ValueError(
465
+ f"No overlapping features found for provisional id {parent_id}"
466
+ )
467
+
468
+ # If multiple feautures share the same overlap, check centroid
469
+ max_overlap = max(overlap_sizes)
470
+ max_overlap_indices = np.where(np.array(overlap_sizes) == max_overlap)[0]
471
+ if len(max_overlap_indices) == 1:
472
+ max_overlap_idx = np.argmax(overlap_sizes)
473
+ else:
474
+ closest_size_id = self.find_ids_of_closest_centroid(
475
+ advected_feature_field,
476
+ current_feature_field,
477
+ parent_id,
478
+ matching_feature_ids,
479
+ )
480
+ # If multiple features share the same closest size,
481
+ # this will choose the first
482
+ closest_size_id = closest_size_id[0]
483
+ max_overlap_idx = matching_feature_ids.index(closest_size_id)
484
+
485
+ # Pop feature with max overlap to be the parent feature,
486
+ # The remaining features are the child features
487
+ parent_feature = matching_features.pop(max_overlap_idx)
488
+ return parent_feature, matching_features
489
+
490
+ def find_ids_of_closest_overlaps(
491
+ self,
492
+ overlap_hist: NDArray,
493
+ advected_feature_field: NDArray,
494
+ current_feature_field: NDArray,
495
+ current_feature_id: int,
496
+ ) -> list[Union[int, None], Union[NDArray, None]]:
497
+ """
498
+ Use overlap histogram to find the closest matching feature id in the advected
499
+ field for the current_feature_id in the current_field. Any other ids that are
500
+ also a sufficient overlap with the current feature are also separately returned.
501
+
502
+ - If there are no sufficient overlaps, return None, None
503
+
504
+ - If there is one sufficient overlap, return the label of the matching Feature
505
+ from the advected field. Other sufficient ids is None
506
+
507
+ - If there is more than one sufficient overlap, find the Feature label with the
508
+ closest size. If there is more than one Feature shares a closest size, find the
509
+ Feature from these that is closest to the centroid of the current Feature. If
510
+ there is still more than one suitable Feature, choose the one with the smallest
511
+ label. All sufficient ids that are not chosen as the matching id are also
512
+ returned in an NDArray
513
+
514
+ Args:
515
+ overlap_hist (NDArray):
516
+ Histogram of overlaps produced using calculate_overlap_histogram
517
+ advected_feature_field (NDArray):
518
+ Feature field from previous timestep advected by flow
519
+ current_feature_field (NDArray):
520
+ Feature field from current timestep
521
+ current_feature_id (int):
522
+ Feature ID in the current field to match with the previous field
523
+
524
+ Returns:
525
+ Union[int, None]:
526
+ The new label to assign to the Feature. If None, there is no overlap
527
+ Union[NDArray, None]:
528
+ Any other labels that were a sufficient match but were not chosen as the
529
+ best overlap. If None, there are no other sufficient ids.
530
+
531
+ """
532
+ overlap_hist = check_arrays(overlap_hist, ndim=1, non_negative=True)
533
+ advected_feature_field, current_feature_field = check_arrays(
534
+ advected_feature_field,
535
+ current_feature_field,
536
+ equal_shape=True,
537
+ non_negative=True,
538
+ dtype=int,
539
+ )
540
+ current_feature_id = check_valid_ids(current_feature_id)
541
+ # Check number of sufficient overlaps.
542
+ # Get bool array of values meeting threshold
543
+ sufficient_overlaps = overlap_hist >= self.overlap_threshold
544
+ len_sufficient_overlaps = np.count_nonzero(sufficient_overlaps)
545
+
546
+ # Setup returning variable, will only be not None if there are multiple overlaps
547
+ other_sufficient_ids = None
548
+
549
+ if len_sufficient_overlaps == 0:
550
+ matching_id = None
551
+
552
+ if len_sufficient_overlaps == 1:
553
+ matching_id = np.argmax(overlap_hist)
554
+
555
+ # If there is more than one sufficient overlap, keep the properties of the
556
+ # feature with the closest size. If multiple have overlaps,
557
+ # keep nearest in centroid
558
+ if len_sufficient_overlaps > 1:
559
+ # Check for size of each feature in advected_frame with sufficient overlap
560
+ ids_of_sufficient_overlaps = np.argwhere(sufficient_overlaps).squeeze()
561
+ min_size_comparison = self.find_ids_of_closest_size(
562
+ field_with_id=current_feature_field,
563
+ field_to_search=advected_feature_field,
564
+ target_feature_id=current_feature_id,
565
+ candidate_ids=ids_of_sufficient_overlaps.tolist(),
566
+ )
567
+
568
+ # If one id has a closest size to target feature, this is the matching id
569
+ if len(min_size_comparison) == 1:
570
+ matching_id = min_size_comparison[0]
571
+
572
+ # If more than one id shares a closest size, find the closest centroid
573
+ else:
574
+ matching_id = self.find_ids_of_closest_centroid(
575
+ field_with_id=current_feature_field,
576
+ field_to_search=advected_feature_field,
577
+ target_feature_id=current_feature_id,
578
+ candidate_ids=min_size_comparison,
579
+ )
580
+ # If there are still more than 1 possible options at this stage,
581
+ # min returns the first instance
582
+ matching_id = matching_id[0]
583
+
584
+ # Add the other sufficient overlaps to other_sufficient_ids
585
+ # To ensure the matching id is now not included in other sufficient ids,
586
+ # set sufficient overlaps to False at this id
587
+ sufficient_overlaps[matching_id] = False
588
+ other_sufficient_ids = np.argwhere(sufficient_overlaps)
589
+ # Squeeze output, but only one axis so that
590
+ # single element arrays remain arrays
591
+ axis_to_squeeze = other_sufficient_ids.shape.index(1)
592
+ other_sufficient_ids = other_sufficient_ids.squeeze(axis_to_squeeze)
593
+
594
+ return matching_id, other_sufficient_ids
595
+
596
+ def find_ids_of_closest_size(
597
+ self,
598
+ field_with_id: NDArray,
599
+ field_to_search: NDArray,
600
+ target_feature_id: int,
601
+ candidate_ids: list[int],
602
+ ) -> list[int]:
603
+ """
604
+ Given a list of candidate ids, finds the id whose size is closest to the size
605
+ of the feature with feature_id in field_with_id
606
+
607
+ Args:
608
+ field_with_id (NDArray):
609
+ Feature field containing the feature with feature_id
610
+ field_to_search (NDArray):
611
+ Feature field containing the candidate ids
612
+ feature_id (int):
613
+ Id of the feature in field_with_id to compare sizes against
614
+ candidate_ids (list):
615
+ List of candidate ids in field_to_search to compare sizes against
616
+ Returns:
617
+ list[int]:
618
+ List of candidate ids that have the closest size to the target feature
619
+ """
620
+ field_with_id, field_to_search = check_arrays(
621
+ field_with_id,
622
+ field_to_search,
623
+ ndim=2,
624
+ equal_shape=True,
625
+ dtype=int,
626
+ )
627
+ target_feature_id, candidate_ids = check_valid_ids(
628
+ target_feature_id, candidate_ids
629
+ )
630
+
631
+ size_of_target_feature_in_target_field = np.size(
632
+ np.where(field_with_id == target_feature_id), 1
633
+ )
634
+ size_of_candidate_features = {
635
+ candidate_id: np.size(np.where(field_to_search == candidate_id), 1)
636
+ for candidate_id in candidate_ids
637
+ }
638
+ size_comparison = {
639
+ candidate_id: np.abs(size_of_target_feature_in_target_field - size)
640
+ for candidate_id, size in size_of_candidate_features.items()
641
+ }
642
+ min_size_comparison = np.min(list(size_comparison.values()))
643
+ closest_size_ids = [
644
+ candidate_id
645
+ for candidate_id, size_diff in size_comparison.items()
646
+ if size_diff == min_size_comparison
647
+ ]
648
+ return closest_size_ids
649
+
650
+ def find_ids_of_closest_centroid(
651
+ self,
652
+ field_with_id: NDArray,
653
+ field_to_search: NDArray,
654
+ target_feature_id: int,
655
+ candidate_ids: list[int],
656
+ ) -> int:
657
+ """
658
+ Given a list of candidate ids, finds the id whose centroid is closest to
659
+ the centroid of the feature with feature_id in field_with_id.
660
+
661
+ Args:
662
+ field_with_id (NDArray):
663
+ Feature field containing the feature with feature_id
664
+ field_to_search (NDArray):
665
+ Feature field containing the candidate ids
666
+ feature_id (int):
667
+ Id of the feature in field_with_id to compare sizes against
668
+ candidate_ids (list):
669
+ List of candidate ids in field_to_search to compare sizes against
670
+ Returns:
671
+ list[int]:
672
+ Candidate ids that have the closest size to the target feature
673
+ """
674
+ field_with_id, field_to_search = check_arrays(
675
+ field_with_id,
676
+ field_to_search,
677
+ ndim=2,
678
+ equal_shape=True,
679
+ dtype=int,
680
+ )
681
+ target_feature_id, candidate_ids = check_valid_ids(
682
+ target_feature_id, candidate_ids
683
+ )
684
+
685
+ # Get the closest centroid for each feature sharing a minimum distance
686
+ centroid_distances = {}
687
+ current_feature_centroid = get_centroid(field_with_id, target_feature_id)
688
+ for cand_id in candidate_ids:
689
+ cand_id_centroid = get_centroid(field_to_search, cand_id)
690
+ distance = np.linalg.norm(current_feature_centroid - cand_id_centroid)
691
+ centroid_distances[cand_id] = distance
692
+
693
+ min_centroid_comparison = np.min(list(centroid_distances.values()))
694
+ closest_distance_ids = [
695
+ candidate_id
696
+ for candidate_id, size_diff in centroid_distances.items()
697
+ if size_diff == min_centroid_comparison
698
+ ]
699
+ return closest_distance_ids
700
+
701
+ def calculate_overlap_histogram(
702
+ self,
703
+ advected_feature_field: NDArray,
704
+ current_feature_field: NDArray,
705
+ feature_id: int,
706
+ nbhood: int = 0,
707
+ ) -> NDArray:
708
+ """
709
+ Calculate the amount of overlap between two feature fields at the requested
710
+ feature id. Method creates a mask containing areas of the current feature field
711
+ containing the requested feature id. This mask can optionally be expanded using
712
+ a nbhood surrouding the centroid of this feature. This mask is then used to
713
+ select the same locations of the advected feature field. A histogram is produced
714
+ giving the number of feature_ids contained in this reigon in the adveced feature
715
+ field. This is normalised by the pixel size of each Feature in the advected
716
+ field to get the degree of overlap.
717
+
718
+ Args:
719
+ advected_feature_field (NDArray):
720
+ Field containing advected features
721
+ current_feature_field (NDArray):
722
+ Field containing current features
723
+ feature_id (int):
724
+ Feature ID in the current feature field to calculate overlap for
725
+ nbhood (int, optional):
726
+ If nonzero, applied a radial mask surrouding the feature centroid
727
+ of the current_feature_field to expand the overlap calculation.
728
+ Defaults to 0.
729
+
730
+ Returns:
731
+ NDArray: Array contanining normalised overlap values for each feature id in
732
+ the advected feature field
733
+ """
734
+ advected_feature_field, current_feature_field = check_arrays(
735
+ advected_feature_field,
736
+ current_feature_field,
737
+ ndim=2,
738
+ equal_shape=True,
739
+ dtype=int,
740
+ )
741
+ feature_id = check_valid_ids(feature_id)
742
+ if not isinstance(nbhood, (int, np.integer)):
743
+ raise TypeError(f"Expected int, got {type(nbhood)}")
744
+ if nbhood < 0:
745
+ raise ValueError(f"Expected non-negative nbhood, got {nbhood}")
746
+
747
+ # Create feature mask using current feature field, or expand mask using a nbhood
748
+ # if this is flagged in input
749
+ feature_mask = np.where(current_feature_field == feature_id, True, False)
750
+ if nbhood:
751
+ centroid = get_centroid(current_feature_field, feature_id)
752
+ if self._nbhood_coeff_test:
753
+ radial_mask_size = nbhood * np.count_nonzero(feature_mask)
754
+ else:
755
+ radial_mask_size = nbhood
756
+ feature_mask += generate_radial_mask(
757
+ current_feature_field, centroid, radial_mask_size
758
+ )
759
+
760
+ # Setup bins for comparing feature fields using histogram
761
+ # Need to find max value among both input fields
762
+ input_fields = [advected_feature_field, current_feature_field]
763
+ max_val = np.max(input_fields)
764
+ bins = np.arange(int(max_val) + 2)
765
+
766
+ # Find overlap between two feature fields by finding histogram of points
767
+ # using mask of current features applied to the advected feature field
768
+ overlap_hist = np.histogram(advected_feature_field[feature_mask], bins)[0]
769
+
770
+ # Set the first value of the hist to 0 since this represents the background
771
+ overlap_hist[0] = 0
772
+
773
+ # Normalise overlap histogram by size of each feature in advected field only
774
+ norm_sizes = np.array(
775
+ [
776
+ np.count_nonzero(advected_feature_field == idx)
777
+ for idx in range(len(overlap_hist))
778
+ ]
779
+ )
780
+ # Replace any zero sizes with 1 to avoid division by zero
781
+ norm_sizes = np.where(norm_sizes == 0, 1, norm_sizes)
782
+ overlap_normed = overlap_hist / norm_sizes
783
+ return overlap_normed
784
+
785
+ def _get_overlap_sizes(
786
+ self,
787
+ advected_feature_field: NDArray,
788
+ current_feature_field: NDArray,
789
+ advected_id: int,
790
+ matching_ids: list,
791
+ nbhood: int = 0,
792
+ ) -> list:
793
+ advected_id, matching_ids = check_valid_ids(advected_id, matching_ids)
794
+ overlap_sizes = []
795
+ for feature_id in matching_ids:
796
+ advected_feature_mask = advected_feature_field == advected_id
797
+ current_feature_mask = current_feature_field == feature_id
798
+
799
+ if nbhood > 0:
800
+ if self._nbhood_coeff_test:
801
+ adv_nb = nbhood * np.count_nonzero(advected_feature_mask)
802
+ curr_nb = nbhood * np.count_nonzero(current_feature_mask)
803
+ else:
804
+ adv_nb = nbhood
805
+ curr_nb = nbhood
806
+
807
+ advected_feature_mask += generate_radial_mask(
808
+ advected_feature_field,
809
+ get_centroid(advected_feature_field, advected_id),
810
+ adv_nb,
811
+ )
812
+ current_feature_mask += generate_radial_mask(
813
+ current_feature_field,
814
+ get_centroid(current_feature_field, feature_id),
815
+ curr_nb,
816
+ )
817
+
818
+ overlap_size = np.size(
819
+ np.where(advected_feature_mask & current_feature_mask), 1
820
+ )
821
+ overlap_sizes.append(overlap_size)
822
+ return overlap_sizes
823
+
824
+
825
+ def advect_field_using_motion_vectors(
826
+ field: NDArray, y_flow: NDArray, x_flow: NDArray
827
+ ) -> NDArray:
828
+ """
829
+ A function that takes features (non-zero elements) in the 2D input fieldand advects
830
+ them using the given motion vectors. This function performs this advection feature
831
+ by feature, i.e., moves all contiguous elements of data by the same amount,
832
+ retaining their shape. Code handles conflicts from multiple non-zero elements
833
+ being advected to the same position by choosing the closest label centroid.
834
+
835
+ Args:
836
+ field (NDArray):
837
+ Field containing feature labels to be advected.
838
+ Assumes 0 is the background value
839
+ y_flow (NDArray):
840
+ 2D array of same shape as field containing y motion vectors
841
+ x_flow (NDArray):
842
+ 2D array of same shape as field containing x motion vectors
843
+
844
+ Returns:
845
+ NDArray:
846
+ Advected field
847
+ """
848
+ field, y_flow, x_flow = check_arrays(
849
+ field, y_flow, x_flow, ndim=2, equal_shape=True
850
+ )
851
+
852
+ advected_field = np.zeros_like(field)
853
+
854
+ # Loop over all features (non-zero elements) in field and advect by
855
+ # mean flow across the feature. Background field is assumed to be 0
856
+ for feature_id in range(1, np.max(field) + 1):
857
+ feature_mask = np.where(field == feature_id)
858
+ # If mask is empty, this Feature is not in the current field
859
+ if np.size(feature_mask) == 0:
860
+ continue
861
+
862
+ # For the purposes of advecting features, need dy, dx to align with grid points
863
+ # Therefore, perform integer mean with rounding if dy, dx values are floats
864
+ dy = np.mean(y_flow[feature_mask], dtype=int)
865
+ dx = np.mean(x_flow[feature_mask], dtype=int)
866
+
867
+ # Now, advect the feature to the new position
868
+ for y_coord, x_coord in zip(*feature_mask):
869
+ advected_y_coord = y_coord + dy
870
+ advected_x_coord = x_coord + dx
871
+
872
+ # If this coordinate is out of bounds of the field, no further action needed
873
+ oob_y_check = advected_y_coord < 0 or advected_y_coord > field.shape[0] - 1
874
+ oob_x_check = advected_y_coord < 0 or advected_x_coord > field.shape[1] - 1
875
+ if oob_y_check or oob_x_check:
876
+ continue
877
+
878
+ # If there is no label already at this position, can go ahead and place
879
+ # this feature label here.
880
+ id_at_coord = advected_field[advected_y_coord, advected_x_coord]
881
+ if id_at_coord == 0:
882
+ advected_field[advected_y_coord, advected_x_coord] = feature_id
883
+ continue
884
+
885
+ # Otherwise, need to handle conflicting features. Do this by finding
886
+ # distances between the advected coordinate and the centroids of existing
887
+ # and iterating ids. Choose the feature that is closest to its centroid
888
+ existing_id_centroid = get_centroid(field, id_at_coord)
889
+ iterating_id_centroid = get_centroid(field, feature_id)
890
+ advected_coords = np.array([advected_y_coord, advected_x_coord])
891
+ existing_id_centroid_distance = np.linalg.norm(
892
+ existing_id_centroid - advected_coords
893
+ )
894
+ iterating_id_centroid_distance = np.linalg.norm(
895
+ iterating_id_centroid - advected_coords
896
+ )
897
+
898
+ if iterating_id_centroid_distance < existing_id_centroid_distance:
899
+ advected_field[advected_y_coord, advected_x_coord] = feature_id
900
+
901
+ return advected_field
902
+
903
+
904
+ def generate_radial_mask(field: NDArray, coord: NDArray, mask_radius: int) -> NDArray:
905
+ """
906
+ Creates a radial mask of the same shape as the input field, centered on the (y,x)
907
+ coord with radius equal to the mask radius.
908
+
909
+ Args:
910
+ field (NDArray):
911
+ Field to generate mask for. Output will be the same shape
912
+ coord (NDArray):
913
+ Coordinate to centre the mask on
914
+ mask_radius (int):
915
+ Radius of values to include in circular mask.
916
+
917
+ Returns:
918
+ NDArray: Mask of values.
919
+ """
920
+ field = check_arrays(field, ndim=2)
921
+ coord = check_arrays(coord, shape=(2,))
922
+ mask_radius = native(mask_radius) # If this is a numpy int, convert to native int
923
+ if not isinstance(mask_radius, int):
924
+ raise TypeError(f"Expected mask_radius to be int, got {type(mask_radius)}")
925
+ if mask_radius < 1:
926
+ raise ValueError(
927
+ f"Expected mask_radius to be a positive, nonzero integer: got {mask_radius}"
928
+ )
929
+
930
+ temp_y = np.arange(field.shape[0])
931
+ temp_x = np.arange(field.shape[1])
932
+
933
+ y_centroid_dist = (temp_y[:, np.newaxis] - coord[0]) ** 2
934
+ x_centroid_dist = (temp_x[np.newaxis, :] - coord[1]) ** 2
935
+ mask = (y_centroid_dist + x_centroid_dist) < mask_radius**2
936
+ return mask
937
+
938
+
939
+ def get_centroid(field: NDArray, value: int) -> NDArray:
940
+ """
941
+ From an input field, get the centroid location of a value of contiguous data
942
+
943
+ Args:
944
+ field (NDArray):
945
+ Field containing feature to find centroid for
946
+ value (int):
947
+ Value in field to find centroid of
948
+
949
+ Returns:
950
+ NDArray:
951
+ (y, x) centroid
952
+ """
953
+
954
+ field = check_arrays(field, ndim=2)
955
+ value = check_valid_ids(value)
956
+
957
+ if not np.issubdtype(type(value), np.integer):
958
+ raise TypeError(f"Expected int, got {type(value)}")
959
+
960
+ value_coords = np.where(field == value)
961
+ centroid = np.mean(value_coords, axis=1)
962
+ return centroid