dgenerate-ultralytics-headless 8.3.194__py3-none-any.whl → 8.3.196__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/METADATA +1 -2
  2. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/RECORD +107 -106
  3. tests/test_python.py +1 -1
  4. ultralytics/__init__.py +1 -1
  5. ultralytics/cfg/__init__.py +9 -8
  6. ultralytics/cfg/default.yaml +1 -0
  7. ultralytics/data/annotator.py +1 -1
  8. ultralytics/data/augment.py +76 -76
  9. ultralytics/data/base.py +12 -12
  10. ultralytics/data/build.py +5 -1
  11. ultralytics/data/converter.py +4 -4
  12. ultralytics/data/dataset.py +7 -7
  13. ultralytics/data/loaders.py +15 -15
  14. ultralytics/data/split_dota.py +10 -10
  15. ultralytics/data/utils.py +12 -12
  16. ultralytics/engine/exporter.py +19 -31
  17. ultralytics/engine/model.py +13 -13
  18. ultralytics/engine/predictor.py +16 -14
  19. ultralytics/engine/results.py +21 -21
  20. ultralytics/engine/trainer.py +15 -4
  21. ultralytics/engine/validator.py +6 -2
  22. ultralytics/hub/google/__init__.py +2 -2
  23. ultralytics/hub/session.py +7 -7
  24. ultralytics/models/fastsam/model.py +5 -5
  25. ultralytics/models/fastsam/predict.py +11 -11
  26. ultralytics/models/nas/model.py +1 -1
  27. ultralytics/models/rtdetr/predict.py +2 -2
  28. ultralytics/models/rtdetr/val.py +4 -4
  29. ultralytics/models/sam/amg.py +6 -6
  30. ultralytics/models/sam/build.py +9 -9
  31. ultralytics/models/sam/model.py +7 -7
  32. ultralytics/models/sam/modules/blocks.py +6 -6
  33. ultralytics/models/sam/modules/decoders.py +1 -1
  34. ultralytics/models/sam/modules/encoders.py +27 -27
  35. ultralytics/models/sam/modules/sam.py +4 -4
  36. ultralytics/models/sam/modules/tiny_encoder.py +18 -18
  37. ultralytics/models/sam/modules/utils.py +8 -8
  38. ultralytics/models/sam/predict.py +63 -63
  39. ultralytics/models/utils/loss.py +22 -22
  40. ultralytics/models/utils/ops.py +8 -8
  41. ultralytics/models/yolo/classify/predict.py +2 -2
  42. ultralytics/models/yolo/classify/train.py +9 -19
  43. ultralytics/models/yolo/classify/val.py +4 -4
  44. ultralytics/models/yolo/detect/predict.py +3 -3
  45. ultralytics/models/yolo/detect/train.py +38 -12
  46. ultralytics/models/yolo/detect/val.py +38 -37
  47. ultralytics/models/yolo/model.py +6 -6
  48. ultralytics/models/yolo/obb/train.py +1 -10
  49. ultralytics/models/yolo/obb/val.py +13 -13
  50. ultralytics/models/yolo/pose/train.py +1 -9
  51. ultralytics/models/yolo/pose/val.py +12 -12
  52. ultralytics/models/yolo/segment/predict.py +4 -4
  53. ultralytics/models/yolo/segment/train.py +2 -10
  54. ultralytics/models/yolo/segment/val.py +15 -15
  55. ultralytics/models/yolo/world/train.py +13 -13
  56. ultralytics/models/yolo/world/train_world.py +3 -3
  57. ultralytics/models/yolo/yoloe/predict.py +4 -4
  58. ultralytics/models/yolo/yoloe/train.py +7 -16
  59. ultralytics/models/yolo/yoloe/val.py +0 -7
  60. ultralytics/nn/autobackend.py +2 -2
  61. ultralytics/nn/modules/block.py +6 -6
  62. ultralytics/nn/modules/conv.py +2 -2
  63. ultralytics/nn/modules/head.py +6 -5
  64. ultralytics/nn/tasks.py +17 -15
  65. ultralytics/nn/text_model.py +3 -3
  66. ultralytics/solutions/ai_gym.py +2 -2
  67. ultralytics/solutions/analytics.py +3 -3
  68. ultralytics/solutions/config.py +5 -5
  69. ultralytics/solutions/distance_calculation.py +2 -2
  70. ultralytics/solutions/heatmap.py +1 -1
  71. ultralytics/solutions/instance_segmentation.py +4 -4
  72. ultralytics/solutions/object_counter.py +4 -4
  73. ultralytics/solutions/parking_management.py +7 -7
  74. ultralytics/solutions/queue_management.py +3 -3
  75. ultralytics/solutions/region_counter.py +4 -4
  76. ultralytics/solutions/similarity_search.py +2 -2
  77. ultralytics/solutions/solutions.py +48 -48
  78. ultralytics/solutions/streamlit_inference.py +1 -1
  79. ultralytics/solutions/trackzone.py +4 -4
  80. ultralytics/solutions/vision_eye.py +1 -1
  81. ultralytics/trackers/byte_tracker.py +11 -11
  82. ultralytics/trackers/utils/gmc.py +3 -3
  83. ultralytics/trackers/utils/matching.py +5 -5
  84. ultralytics/utils/__init__.py +30 -19
  85. ultralytics/utils/autodevice.py +2 -2
  86. ultralytics/utils/benchmarks.py +10 -10
  87. ultralytics/utils/callbacks/clearml.py +1 -1
  88. ultralytics/utils/callbacks/comet.py +5 -5
  89. ultralytics/utils/callbacks/tensorboard.py +2 -2
  90. ultralytics/utils/checks.py +7 -5
  91. ultralytics/utils/cpu.py +90 -0
  92. ultralytics/utils/dist.py +1 -1
  93. ultralytics/utils/downloads.py +2 -2
  94. ultralytics/utils/export.py +5 -5
  95. ultralytics/utils/instance.py +2 -2
  96. ultralytics/utils/loss.py +14 -8
  97. ultralytics/utils/metrics.py +35 -35
  98. ultralytics/utils/nms.py +4 -4
  99. ultralytics/utils/ops.py +1 -1
  100. ultralytics/utils/patches.py +2 -2
  101. ultralytics/utils/plotting.py +10 -9
  102. ultralytics/utils/torch_utils.py +113 -15
  103. ultralytics/utils/triton.py +5 -5
  104. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/WHEEL +0 -0
  105. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/entry_points.txt +0 -0
  106. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/licenses/LICENSE +0 -0
  107. {dgenerate_ultralytics_headless-8.3.194.dist-info → dgenerate_ultralytics_headless-8.3.196.dist-info}/top_level.txt +0 -0
@@ -51,7 +51,7 @@ class Predictor(BasePredictor):
51
51
  device (torch.device): The device (CPU or GPU) on which the model is loaded.
52
52
  im (torch.Tensor): The preprocessed input image.
53
53
  features (torch.Tensor): Extracted image features.
54
- prompts (Dict[str, Any]): Dictionary to store various types of prompts (e.g., bboxes, points, masks).
54
+ prompts (dict[str, Any]): Dictionary to store various types of prompts (e.g., bboxes, points, masks).
55
55
  segment_all (bool): Flag to indicate if full image segmentation should be performed.
56
56
  mean (torch.Tensor): Mean values for image normalization.
57
57
  std (torch.Tensor): Standard deviation values for image normalization.
@@ -116,7 +116,7 @@ class Predictor(BasePredictor):
116
116
  torch.Tensor and list of np.ndarray as input formats.
117
117
 
118
118
  Args:
119
- im (torch.Tensor | List[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC numpy arrays.
119
+ im (torch.Tensor | list[np.ndarray]): Input image(s) in BCHW tensor format or list of HWC numpy arrays.
120
120
 
121
121
  Returns:
122
122
  (torch.Tensor): The preprocessed image tensor, normalized and converted to the appropriate dtype.
@@ -149,10 +149,10 @@ class Predictor(BasePredictor):
149
149
  Currently, batched inference is not supported; hence the list length should be 1.
150
150
 
151
151
  Args:
152
- im (List[np.ndarray]): List containing a single image in HWC numpy array format.
152
+ im (list[np.ndarray]): List containing a single image in HWC numpy array format.
153
153
 
154
154
  Returns:
155
- (List[np.ndarray]): List containing the transformed image.
155
+ (list[np.ndarray]): List containing the transformed image.
156
156
 
157
157
  Raises:
158
158
  AssertionError: If the input list contains more than one image.
@@ -177,9 +177,9 @@ class Predictor(BasePredictor):
177
177
 
178
178
  Args:
179
179
  im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
180
- bboxes (np.ndarray | List | None): Bounding boxes with shape (N, 4), in XYXY format.
181
- points (np.ndarray | List | None): Points indicating object locations with shape (N, 2), in pixels.
182
- labels (np.ndarray | List | None): Labels for point prompts, shape (N,). 1 = foreground, 0 = background.
180
+ bboxes (np.ndarray | list | None): Bounding boxes with shape (N, 4), in XYXY format.
181
+ points (np.ndarray | list | None): Points indicating object locations with shape (N, 2), in pixels.
182
+ labels (np.ndarray | list | None): Labels for point prompts, shape (N,). 1 = foreground, 0 = background.
183
183
  masks (np.ndarray | None): Low-resolution masks from previous predictions, shape (N, H, W). For SAM H=W=256.
184
184
  multimask_output (bool): Flag to return multiple masks. Helpful for ambiguous prompts.
185
185
  *args (Any): Additional positional arguments.
@@ -215,9 +215,9 @@ class Predictor(BasePredictor):
215
215
 
216
216
  Args:
217
217
  im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W).
218
- bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
219
- points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
220
- labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
218
+ bboxes (np.ndarray | list | None): Bounding boxes in XYXY format with shape (N, 4).
219
+ points (np.ndarray | list | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
220
+ labels (np.ndarray | list | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
221
221
  masks (np.ndarray | None): Low-res masks from previous predictions with shape (N, H, W). For SAM, H=W=256.
222
222
  multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
223
223
 
@@ -250,10 +250,10 @@ class Predictor(BasePredictor):
250
250
 
251
251
  Args:
252
252
  features (torch.Tensor): Extracted image features with shape (B, C, H, W) from the SAM model image encoder.
253
- bboxes (np.ndarray | List[List[float]] | None): Bounding boxes in XYXY format with shape (N, 4).
254
- points (np.ndarray | List[List[float]] | None): Object location points with shape (N, 2), in pixels.
255
- labels (np.ndarray | List[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
256
- masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
253
+ bboxes (np.ndarray | list[list[float]] | None): Bounding boxes in XYXY format with shape (N, 4).
254
+ points (np.ndarray | list[list[float]] | None): Object location points with shape (N, 2), in pixels.
255
+ labels (np.ndarray | list[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
256
+ masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
257
257
  multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
258
258
 
259
259
  Returns:
@@ -282,12 +282,12 @@ class Predictor(BasePredictor):
282
282
  Prepare and transform the input prompts for processing based on the destination shape.
283
283
 
284
284
  Args:
285
- dst_shape (Tuple[int, int]): The target shape (height, width) for the prompts.
286
- src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
287
- bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
288
- points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
289
- labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
290
- masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array with shape (H, W).
285
+ dst_shape (tuple[int, int]): The target shape (height, width) for the prompts.
286
+ src_shape (tuple[int, int]): The source shape (height, width) of the input image.
287
+ bboxes (np.ndarray | list | None): Bounding boxes in XYXY format with shape (N, 4).
288
+ points (np.ndarray | list | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
289
+ labels (np.ndarray | list | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
290
+ masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array with shape (H, W).
291
291
 
292
292
  Returns:
293
293
  bboxes (torch.Tensor | None): Transformed bounding boxes.
@@ -351,7 +351,7 @@ class Predictor(BasePredictor):
351
351
  crop_n_layers (int): Number of layers for additional mask predictions on image crops.
352
352
  crop_overlap_ratio (float): Overlap between crops, scaled down in subsequent layers.
353
353
  crop_downscale_factor (int): Scaling factor for sampled points-per-side in each layer.
354
- point_grids (List[np.ndarray] | None): Custom grids for point sampling normalized to [0,1].
354
+ point_grids (list[np.ndarray] | None): Custom grids for point sampling normalized to [0,1].
355
355
  points_stride (int): Number of points to sample along each side of the image.
356
356
  points_batch_size (int): Batch size for the number of points processed simultaneously.
357
357
  conf_thres (float): Confidence threshold [0,1] for filtering based on mask quality prediction.
@@ -490,10 +490,10 @@ class Predictor(BasePredictor):
490
490
  - pred_scores (torch.Tensor): Confidence scores for each mask with shape (N, 1).
491
491
  - pred_bboxes (torch.Tensor, optional): Predicted bounding boxes if segment_all is True.
492
492
  img (torch.Tensor): The processed input image tensor with shape (C, H, W).
493
- orig_imgs (List[np.ndarray] | torch.Tensor): The original, unprocessed images.
493
+ orig_imgs (list[np.ndarray] | torch.Tensor): The original, unprocessed images.
494
494
 
495
495
  Returns:
496
- (List[Results]): List of Results objects containing detection masks, bounding boxes, and other
496
+ (list[Results]): List of Results objects containing detection masks, bounding boxes, and other
497
497
  metadata for each processed image.
498
498
 
499
499
  Examples:
@@ -623,7 +623,7 @@ class Predictor(BasePredictor):
623
623
 
624
624
  Returns:
625
625
  new_masks (torch.Tensor): Processed masks with small regions removed, shape (N, H, W).
626
- keep (List[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
626
+ keep (list[int]): Indices of remaining masks after NMS, for filtering corresponding boxes.
627
627
 
628
628
  Examples:
629
629
  >>> masks = torch.rand(5, 640, 640) > 0.5 # 5 random binary masks
@@ -673,13 +673,13 @@ class Predictor(BasePredictor):
673
673
  Perform prompts preprocessing and inference on provided image features using the SAM model.
674
674
 
675
675
  Args:
676
- features (torch.Tensor | Dict[str, Any]): Extracted image features from the SAM/SAM2 model image encoder.
677
- src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
678
- dst_shape (Tuple[int, int] | None): The target shape (height, width) for the prompts. If None, defaults to (imgsz, imgsz).
679
- bboxes (np.ndarray | List[List[float]] | None): Bounding boxes in xyxy format with shape (N, 4).
680
- points (np.ndarray | List[List[float]] | None): Points indicating object locations with shape (N, 2), in pixels.
681
- labels (np.ndarray | List[int] | None): Point prompt labels with shape (N, ).
682
- masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
676
+ features (torch.Tensor | dict[str, Any]): Extracted image features from the SAM/SAM2 model image encoder.
677
+ src_shape (tuple[int, int]): The source shape (height, width) of the input image.
678
+ dst_shape (tuple[int, int] | None): The target shape (height, width) for the prompts. If None, defaults to (imgsz, imgsz).
679
+ bboxes (np.ndarray | list[list[float]] | None): Bounding boxes in xyxy format with shape (N, 4).
680
+ points (np.ndarray | list[list[float]] | None): Points indicating object locations with shape (N, 2), in pixels.
681
+ labels (np.ndarray | list[int] | None): Point prompt labels with shape (N, ).
682
+ masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
683
683
  multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
684
684
 
685
685
  Returns:
@@ -688,7 +688,7 @@ class Predictor(BasePredictor):
688
688
  Each box is in xyxy format with additional columns for score and class.
689
689
 
690
690
  Notes:
691
- - The input features is a torch.Tensor of shape (B, C, H, W) if performing on SAM, or a Dict[str, Any] if performing on SAM2.
691
+ - The input features is a torch.Tensor of shape (B, C, H, W) if performing on SAM, or a dict[str, Any] if performing on SAM2.
692
692
  """
693
693
  dst_shape = dst_shape or (self.args.imgsz, self.args.imgsz)
694
694
  prompts = self._prepare_prompts(dst_shape, src_shape, bboxes, points, labels, masks)
@@ -714,12 +714,12 @@ class SAM2Predictor(Predictor):
714
714
  prompt-based inference.
715
715
 
716
716
  Attributes:
717
- _bb_feat_sizes (List[tuple]): Feature sizes for different backbone levels.
717
+ _bb_feat_sizes (list[tuple]): Feature sizes for different backbone levels.
718
718
  model (torch.nn.Module): The loaded SAM2 model.
719
719
  device (torch.device): The device (CPU or GPU) on which the model is loaded.
720
720
  features (dict): Cached image features for efficient inference.
721
721
  segment_all (bool): Flag to indicate if all segments should be predicted.
722
- prompts (Dict[str, Any]): Dictionary to store various types of prompts for inference.
722
+ prompts (dict[str, Any]): Dictionary to store various types of prompts for inference.
723
723
 
724
724
  Methods:
725
725
  get_model: Retrieve and initialize the SAM2 model.
@@ -752,12 +752,12 @@ class SAM2Predictor(Predictor):
752
752
  Prepare and transform the input prompts for processing based on the destination shape.
753
753
 
754
754
  Args:
755
- dst_shape (Tuple[int, int]): The target shape (height, width) for the prompts.
756
- src_shape (Tuple[int, int]): The source shape (height, width) of the input image.
757
- bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
758
- points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
759
- labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
760
- masks (List | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
755
+ dst_shape (tuple[int, int]): The target shape (height, width) for the prompts.
756
+ src_shape (tuple[int, int]): The source shape (height, width) of the input image.
757
+ bboxes (np.ndarray | list | None): Bounding boxes in XYXY format with shape (N, 4).
758
+ points (np.ndarray | list | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
759
+ labels (np.ndarray | list | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
760
+ masks (list | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
761
761
 
762
762
  Returns:
763
763
  points (torch.Tensor | None): Transformed points.
@@ -842,13 +842,13 @@ class SAM2Predictor(Predictor):
842
842
  Perform inference on image features using the SAM2 model.
843
843
 
844
844
  Args:
845
- features (torch.Tensor | Dict[str, Any]): Extracted image features with shape (B, C, H, W) from the SAM2 model image encoder, it
845
+ features (torch.Tensor | dict[str, Any]): Extracted image features with shape (B, C, H, W) from the SAM2 model image encoder, it
846
846
  could also be a dictionary including:
847
847
  - image_embed (torch.Tensor): Image embedding with shape (B, C, H, W).
848
- - high_res_feats (List[torch.Tensor]): List of high-resolution feature maps from the backbone, each with shape (B, C, H, W).
849
- points (np.ndarray | List[List[float]] | None): Object location points with shape (N, 2), in pixels.
850
- labels (np.ndarray | List[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
851
- masks (List[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
848
+ - high_res_feats (list[torch.Tensor]): List of high-resolution feature maps from the backbone, each with shape (B, C, H, W).
849
+ points (np.ndarray | list[list[float]] | None): Object location points with shape (N, 2), in pixels.
850
+ labels (np.ndarray | list[int] | None): Point prompt labels with shape (N,). 1 = foreground, 0 = background.
851
+ masks (list[np.ndarray] | np.ndarray | None): Masks for the objects, where each mask is a 2D array.
852
852
  multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
853
853
  img_idx (int): Index of the image in the batch to process.
854
854
 
@@ -962,9 +962,9 @@ class SAM2VideoPredictor(SAM2Predictor):
962
962
 
963
963
  Args:
964
964
  im (torch.Tensor): The preprocessed input image in tensor format, with shape (N, C, H, W).
965
- bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
966
- points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
967
- labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
965
+ bboxes (np.ndarray | list, optional): Bounding boxes with shape (N, 4), in XYXY format.
966
+ points (np.ndarray | list, optional): Points indicating object locations with shape (N, 2), in pixels.
967
+ labels (np.ndarray | list, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
968
968
  masks (np.ndarray, optional): Low-resolution masks from previous predictions shape (N,H,W). For SAM H=W=256.
969
969
 
970
970
  Returns:
@@ -1036,9 +1036,9 @@ class SAM2VideoPredictor(SAM2Predictor):
1036
1036
  the masks do not overlap, which can be useful for certain applications.
1037
1037
 
1038
1038
  Args:
1039
- preds (Tuple[torch.Tensor, torch.Tensor]): The predicted masks and scores from the model.
1039
+ preds (tuple[torch.Tensor, torch.Tensor]): The predicted masks and scores from the model.
1040
1040
  img (torch.Tensor): The processed image tensor.
1041
- orig_imgs (List[np.ndarray]): The original images before processing.
1041
+ orig_imgs (list[np.ndarray]): The original images before processing.
1042
1042
 
1043
1043
  Returns:
1044
1044
  (list): The post-processed predictions.
@@ -1286,7 +1286,7 @@ class SAM2VideoPredictor(SAM2Predictor):
1286
1286
  Returns:
1287
1287
  vis_feats (torch.Tensor): The visual features extracted from the image.
1288
1288
  vis_pos_embed (torch.Tensor): The positional embeddings for the visual features.
1289
- feat_sizes (List[tuple]): A list containing the sizes of the extracted features.
1289
+ feat_sizes (list[tuple]): A list containing the sizes of the extracted features.
1290
1290
 
1291
1291
  Note:
1292
1292
  - If `batch` is greater than 1, the features are expanded to fit the batch size.
@@ -1442,11 +1442,11 @@ class SAM2VideoPredictor(SAM2Predictor):
1442
1442
  the current batch size.
1443
1443
 
1444
1444
  Args:
1445
- out_maskmem_pos_enc (List[torch.Tensor] | None): The positional encoding for mask memory.
1445
+ out_maskmem_pos_enc (list[torch.Tensor] | None): The positional encoding for mask memory.
1446
1446
  Should be a list of tensors or None.
1447
1447
 
1448
1448
  Returns:
1449
- (List[torch.Tensor]): The positional encoding for mask memory, either cached or expanded.
1449
+ (list[torch.Tensor]): The positional encoding for mask memory, either cached or expanded.
1450
1450
 
1451
1451
  Note:
1452
1452
  - The method assumes that `out_maskmem_pos_enc` is a list of tensors or None.
@@ -1730,10 +1730,10 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1730
1730
  specified overrides
1731
1731
 
1732
1732
  Args:
1733
- cfg (Dict[str, Any]): Configuration dictionary containing default settings.
1734
- overrides (Dict[str, Any] | None): Dictionary of values to override default configuration.
1733
+ cfg (dict[str, Any]): Configuration dictionary containing default settings.
1734
+ overrides (dict[str, Any] | None): Dictionary of values to override default configuration.
1735
1735
  max_obj_num (int): Maximum number of objects to track. Default is 3. this is set to keep fix feature size for the model.
1736
- _callbacks (Dict[str, Any] | None): Dictionary of callback functions to customize behavior.
1736
+ _callbacks (dict[str, Any] | None): Dictionary of callback functions to customize behavior.
1737
1737
 
1738
1738
  Examples:
1739
1739
  >>> predictor = SAM2DynamicInteractivePredictor(cfg=DEFAULT_CFG)
@@ -1778,11 +1778,11 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1778
1778
 
1779
1779
  Args:
1780
1780
  im (torch.Tensor | np.ndarray): The input image tensor or numpy array.
1781
- bboxes (List[List[float]] | None): Optional list of bounding boxes to update the memory.
1782
- masks (List[torch.Tensor | np.ndarray] | None): Optional masks to update the memory.
1783
- points (List[List[float]] | None): Optional list of points to update the memory, each point is [x, y].
1784
- labels (List[int] | None): Optional list of object IDs corresponding to the points (>0 for positive, 0 for negative).
1785
- obj_ids (List[int] | None): Optional list of object IDs corresponding to the prompts.
1781
+ bboxes (list[list[float]] | None): Optional list of bounding boxes to update the memory.
1782
+ masks (list[torch.Tensor | np.ndarray] | None): Optional masks to update the memory.
1783
+ points (list[list[float]] | None): Optional list of points to update the memory, each point is [x, y].
1784
+ labels (list[int] | None): Optional list of object IDs corresponding to the points (>0 for positive, 0 for negative).
1785
+ obj_ids (list[int] | None): Optional list of object IDs corresponding to the prompts.
1786
1786
  update_memory (bool): Flag to indicate whether to update the memory with new objects.
1787
1787
 
1788
1788
  Returns:
@@ -1855,7 +1855,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
1855
1855
  Append the imgState to the memory_bank and update the memory for the model.
1856
1856
 
1857
1857
  Args:
1858
- obj_ids (List[int]): List of object IDs corresponding to the prompts.
1858
+ obj_ids (list[int]): List of object IDs corresponding to the prompts.
1859
1859
  points (torch.Tensor | None): Tensor of shape (B, N, 2) representing the input points for N objects.
1860
1860
  labels (torch.Tensor | None): Tensor of shape (B, N) representing the labels for the input points.
1861
1861
  masks (torch.Tensor | None): Optional tensor of shape (N, H, W) representing the input masks for N objects.
@@ -2009,7 +2009,7 @@ class SAM2DynamicInteractivePredictor(SAM2Predictor):
2009
2009
  mask (torch.Tensor | None): The mask input for the object with shape (H, W).
2010
2010
 
2011
2011
  Returns:
2012
- current_out (Dict[str, Any]): A dictionary containing the current output with mask predictions and object pointers.
2012
+ current_out (dict[str, Any]): A dictionary containing the current output with mask predictions and object pointers.
2013
2013
  Keys include 'point_inputs', 'mask_inputs', 'pred_masks', 'pred_masks_high_res', 'obj_ptr', 'object_score_logits'.
2014
2014
  """
2015
2015
  if mask is not None and self.model.use_mask_input_as_output_without_sam:
@@ -23,7 +23,7 @@ class DETRLoss(nn.Module):
23
23
 
24
24
  Attributes:
25
25
  nc (int): Number of classes.
26
- loss_gain (Dict[str, float]): Coefficients for different loss components.
26
+ loss_gain (dict[str, float]): Coefficients for different loss components.
27
27
  aux_loss (bool): Whether to compute auxiliary losses.
28
28
  use_fl (bool): Whether to use FocalLoss.
29
29
  use_vfl (bool): Whether to use VarifocalLoss.
@@ -55,7 +55,7 @@ class DETRLoss(nn.Module):
55
55
 
56
56
  Args:
57
57
  nc (int): Number of classes.
58
- loss_gain (Dict[str, float], optional): Coefficients for different loss components.
58
+ loss_gain (dict[str, float], optional): Coefficients for different loss components.
59
59
  aux_loss (bool): Whether to use auxiliary losses from each decoder layer.
60
60
  use_fl (bool): Whether to use FocalLoss.
61
61
  use_vfl (bool): Whether to use VarifocalLoss.
@@ -93,7 +93,7 @@ class DETRLoss(nn.Module):
93
93
  postfix (str, optional): String to append to the loss name for identification in multi-loss scenarios.
94
94
 
95
95
  Returns:
96
- (Dict[str, torch.Tensor]): Dictionary containing classification loss value.
96
+ (dict[str, torch.Tensor]): Dictionary containing classification loss value.
97
97
 
98
98
  Notes:
99
99
  The function supports different classification loss types:
@@ -133,7 +133,7 @@ class DETRLoss(nn.Module):
133
133
  postfix (str, optional): String to append to the loss names for identification in multi-loss scenarios.
134
134
 
135
135
  Returns:
136
- (Dict[str, torch.Tensor]): Dictionary containing:
136
+ (dict[str, torch.Tensor]): Dictionary containing:
137
137
  - loss_bbox{postfix}: L1 loss between predicted and ground truth boxes, scaled by the bbox loss gain.
138
138
  - loss_giou{postfix}: GIoU loss between predicted and ground truth boxes, scaled by the giou loss gain.
139
139
 
@@ -207,14 +207,14 @@ class DETRLoss(nn.Module):
207
207
  pred_scores (torch.Tensor): Predicted scores from auxiliary layers.
208
208
  gt_bboxes (torch.Tensor): Ground truth bounding boxes.
209
209
  gt_cls (torch.Tensor): Ground truth classes.
210
- gt_groups (List[int]): Number of ground truths per image.
211
- match_indices (List[Tuple], optional): Pre-computed matching indices.
210
+ gt_groups (list[int]): Number of ground truths per image.
211
+ match_indices (list[tuple], optional): Pre-computed matching indices.
212
212
  postfix (str, optional): String to append to loss names.
213
213
  masks (torch.Tensor, optional): Predicted masks if using segmentation.
214
214
  gt_mask (torch.Tensor, optional): Ground truth masks if using segmentation.
215
215
 
216
216
  Returns:
217
- (Dict[str, torch.Tensor]): Dictionary of auxiliary losses.
217
+ (dict[str, torch.Tensor]): Dictionary of auxiliary losses.
218
218
  """
219
219
  # NOTE: loss class, bbox, giou, mask, dice
220
220
  loss = torch.zeros(5 if masks is not None else 3, device=pred_bboxes.device)
@@ -265,10 +265,10 @@ class DETRLoss(nn.Module):
265
265
  Extract batch indices, source indices, and destination indices from match indices.
266
266
 
267
267
  Args:
268
- match_indices (List[Tuple]): List of tuples containing matched indices.
268
+ match_indices (list[tuple]): List of tuples containing matched indices.
269
269
 
270
270
  Returns:
271
- batch_idx (Tuple[torch.Tensor, torch.Tensor]): Tuple containing (batch_idx, src_idx).
271
+ batch_idx (tuple[torch.Tensor, torch.Tensor]): Tuple containing (batch_idx, src_idx).
272
272
  dst_idx (torch.Tensor): Destination indices.
273
273
  """
274
274
  batch_idx = torch.cat([torch.full_like(src, i) for i, (src, _) in enumerate(match_indices)])
@@ -285,7 +285,7 @@ class DETRLoss(nn.Module):
285
285
  Args:
286
286
  pred_bboxes (torch.Tensor): Predicted bounding boxes.
287
287
  gt_bboxes (torch.Tensor): Ground truth bounding boxes.
288
- match_indices (List[Tuple]): List of tuples containing matched indices.
288
+ match_indices (list[tuple]): List of tuples containing matched indices.
289
289
 
290
290
  Returns:
291
291
  pred_assigned (torch.Tensor): Assigned predicted bounding boxes.
@@ -325,14 +325,14 @@ class DETRLoss(nn.Module):
325
325
  pred_scores (torch.Tensor): Predicted class scores.
326
326
  gt_bboxes (torch.Tensor): Ground truth bounding boxes.
327
327
  gt_cls (torch.Tensor): Ground truth classes.
328
- gt_groups (List[int]): Number of ground truths per image.
328
+ gt_groups (list[int]): Number of ground truths per image.
329
329
  masks (torch.Tensor, optional): Predicted masks if using segmentation.
330
330
  gt_mask (torch.Tensor, optional): Ground truth masks if using segmentation.
331
331
  postfix (str, optional): String to append to loss names.
332
- match_indices (List[Tuple], optional): Pre-computed matching indices.
332
+ match_indices (list[tuple], optional): Pre-computed matching indices.
333
333
 
334
334
  Returns:
335
- (Dict[str, torch.Tensor]): Dictionary of losses.
335
+ (dict[str, torch.Tensor]): Dictionary of losses.
336
336
  """
337
337
  if match_indices is None:
338
338
  match_indices = self.matcher(
@@ -370,12 +370,12 @@ class DETRLoss(nn.Module):
370
370
  Args:
371
371
  pred_bboxes (torch.Tensor): Predicted bounding boxes, shape (L, B, N, 4).
372
372
  pred_scores (torch.Tensor): Predicted class scores, shape (L, B, N, C).
373
- batch (Dict[str, Any]): Batch information containing cls, bboxes, and gt_groups.
373
+ batch (dict[str, Any]): Batch information containing cls, bboxes, and gt_groups.
374
374
  postfix (str, optional): Postfix for loss names.
375
375
  **kwargs (Any): Additional arguments, may include 'match_indices'.
376
376
 
377
377
  Returns:
378
- (Dict[str, torch.Tensor]): Computed losses, including main and auxiliary (if enabled).
378
+ (dict[str, torch.Tensor]): Computed losses, including main and auxiliary (if enabled).
379
379
 
380
380
  Notes:
381
381
  Uses last elements of pred_bboxes and pred_scores for main loss, and the rest for auxiliary losses if
@@ -419,14 +419,14 @@ class RTDETRDetectionLoss(DETRLoss):
419
419
  Forward pass to compute detection loss with optional denoising loss.
420
420
 
421
421
  Args:
422
- preds (Tuple[torch.Tensor, torch.Tensor]): Tuple containing predicted bounding boxes and scores.
423
- batch (Dict[str, Any]): Batch data containing ground truth information.
422
+ preds (tuple[torch.Tensor, torch.Tensor]): Tuple containing predicted bounding boxes and scores.
423
+ batch (dict[str, Any]): Batch data containing ground truth information.
424
424
  dn_bboxes (torch.Tensor, optional): Denoising bounding boxes.
425
425
  dn_scores (torch.Tensor, optional): Denoising scores.
426
- dn_meta (Dict[str, Any], optional): Metadata for denoising.
426
+ dn_meta (dict[str, Any], optional): Metadata for denoising.
427
427
 
428
428
  Returns:
429
- (Dict[str, torch.Tensor]): Dictionary containing total loss and denoising loss if applicable.
429
+ (dict[str, torch.Tensor]): Dictionary containing total loss and denoising loss if applicable.
430
430
  """
431
431
  pred_bboxes, pred_scores = preds
432
432
  total_loss = super().forward(pred_bboxes, pred_scores, batch)
@@ -456,12 +456,12 @@ class RTDETRDetectionLoss(DETRLoss):
456
456
  Get match indices for denoising.
457
457
 
458
458
  Args:
459
- dn_pos_idx (List[torch.Tensor]): List of tensors containing positive indices for denoising.
459
+ dn_pos_idx (list[torch.Tensor]): List of tensors containing positive indices for denoising.
460
460
  dn_num_group (int): Number of denoising groups.
461
- gt_groups (List[int]): List of integers representing number of ground truths per image.
461
+ gt_groups (list[int]): List of integers representing number of ground truths per image.
462
462
 
463
463
  Returns:
464
- (List[Tuple[torch.Tensor, torch.Tensor]]): List of tuples containing matched indices for denoising.
464
+ (list[tuple[torch.Tensor, torch.Tensor]]): List of tuples containing matched indices for denoising.
465
465
  """
466
466
  dn_match_indices = []
467
467
  idx_groups = torch.as_tensor([0, *gt_groups[:-1]]).cumsum_(0)
@@ -22,7 +22,7 @@ class HungarianMatcher(nn.Module):
22
22
  used in end-to-end object detection models like DETR.
23
23
 
24
24
  Attributes:
25
- cost_gain (Dict[str, float]): Dictionary of cost coefficients for 'class', 'bbox', 'giou', 'mask', and 'dice'
25
+ cost_gain (dict[str, float]): Dictionary of cost coefficients for 'class', 'bbox', 'giou', 'mask', and 'dice'
26
26
  components.
27
27
  use_fl (bool): Whether to use Focal Loss for classification cost calculation.
28
28
  with_mask (bool): Whether the model makes mask predictions.
@@ -60,7 +60,7 @@ class HungarianMatcher(nn.Module):
60
60
  Initialize HungarianMatcher for optimal assignment of predicted and ground truth bounding boxes.
61
61
 
62
62
  Args:
63
- cost_gain (Dict[str, float], optional): Dictionary of cost coefficients for different matching cost
63
+ cost_gain (dict[str, float], optional): Dictionary of cost coefficients for different matching cost
64
64
  components. Should contain keys 'class', 'bbox', 'giou', 'mask', and 'dice'.
65
65
  use_fl (bool): Whether to use Focal Loss for classification cost calculation.
66
66
  with_mask (bool): Whether the model makes mask predictions.
@@ -100,12 +100,12 @@ class HungarianMatcher(nn.Module):
100
100
  num_classes).
101
101
  gt_bboxes (torch.Tensor): Ground truth bounding boxes with shape (num_gts, 4).
102
102
  gt_cls (torch.Tensor): Ground truth class labels with shape (num_gts,).
103
- gt_groups (List[int]): Number of ground truth boxes for each image in the batch.
103
+ gt_groups (list[int]): Number of ground truth boxes for each image in the batch.
104
104
  masks (torch.Tensor, optional): Predicted masks with shape (batch_size, num_queries, height, width).
105
- gt_mask (List[torch.Tensor], optional): Ground truth masks, each with shape (num_masks, Height, Width).
105
+ gt_mask (list[torch.Tensor], optional): Ground truth masks, each with shape (num_masks, Height, Width).
106
106
 
107
107
  Returns:
108
- (List[Tuple[torch.Tensor, torch.Tensor]]): A list of size batch_size, each element is a tuple
108
+ (list[tuple[torch.Tensor, torch.Tensor]]): A list of size batch_size, each element is a tuple
109
109
  (index_i, index_j), where index_i is the tensor of indices of the selected predictions (in order)
110
110
  and index_j is the tensor of indices of the corresponding selected ground truth targets (in order).
111
111
  For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes).
@@ -205,8 +205,8 @@ def get_cdn_group(
205
205
  bounding boxes and class labels. It generates both positive and negative samples to improve model robustness.
206
206
 
207
207
  Args:
208
- batch (Dict[str, Any]): Batch dictionary containing 'gt_cls' (torch.Tensor with shape (num_gts,)),
209
- 'gt_bboxes' (torch.Tensor with shape (num_gts, 4)), and 'gt_groups' (List[int]) indicating number of
208
+ batch (dict[str, Any]): Batch dictionary containing 'gt_cls' (torch.Tensor with shape (num_gts,)),
209
+ 'gt_bboxes' (torch.Tensor with shape (num_gts, 4)), and 'gt_groups' (list[int]) indicating number of
210
210
  ground truths per image.
211
211
  num_classes (int): Total number of object classes.
212
212
  num_queries (int): Number of object queries.
@@ -220,7 +220,7 @@ def get_cdn_group(
220
220
  padding_cls (torch.Tensor | None): Modified class embeddings for denoising with shape (bs, num_dn, embed_dim).
221
221
  padding_bbox (torch.Tensor | None): Modified bounding boxes for denoising with shape (bs, num_dn, 4).
222
222
  attn_mask (torch.Tensor | None): Attention mask for denoising with shape (tgt_size, tgt_size).
223
- dn_meta (Dict[str, Any] | None): Meta information dictionary containing denoising parameters.
223
+ dn_meta (dict[str, Any] | None): Meta information dictionary containing denoising parameters.
224
224
 
225
225
  Examples:
226
226
  Generate denoising group for training
@@ -78,10 +78,10 @@ class ClassificationPredictor(BasePredictor):
78
78
  Args:
79
79
  preds (torch.Tensor): Raw predictions from the model.
80
80
  img (torch.Tensor): Input images after preprocessing.
81
- orig_imgs (List[np.ndarray] | torch.Tensor): Original images before preprocessing.
81
+ orig_imgs (list[np.ndarray] | torch.Tensor): Original images before preprocessing.
82
82
 
83
83
  Returns:
84
- (List[Results]): List of Results objects containing classification results for each image.
84
+ (list[Results]): List of Results objects containing classification results for each image.
85
85
  """
86
86
  if not isinstance(orig_imgs, list): # Input images are a torch.Tensor, not a list
87
87
  orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
@@ -25,8 +25,8 @@ class ClassificationTrainer(BaseTrainer):
25
25
 
26
26
  Attributes:
27
27
  model (ClassificationModel): The classification model to be trained.
28
- data (Dict[str, Any]): Dictionary containing dataset information including class names and number of classes.
29
- loss_names (List[str]): Names of the loss functions used during training.
28
+ data (dict[str, Any]): Dictionary containing dataset information including class names and number of classes.
29
+ loss_names (list[str]): Names of the loss functions used during training.
30
30
  validator (ClassificationValidator): Validator instance for model evaluation.
31
31
 
32
32
  Methods:
@@ -55,20 +55,10 @@ class ClassificationTrainer(BaseTrainer):
55
55
  """
56
56
  Initialize a ClassificationTrainer object.
57
57
 
58
- This constructor sets up a trainer for image classification tasks, configuring the task type and default
59
- image size if not specified.
60
-
61
58
  Args:
62
- cfg (Dict[str, Any], optional): Default configuration dictionary containing training parameters.
63
- overrides (Dict[str, Any], optional): Dictionary of parameter overrides for the default configuration.
64
- _callbacks (List[Any], optional): List of callback functions to be executed during training.
65
-
66
- Examples:
67
- Create a trainer with custom configuration
68
- >>> from ultralytics.models.yolo.classify import ClassificationTrainer
69
- >>> args = dict(model="yolo11n-cls.pt", data="imagenet10", epochs=3)
70
- >>> trainer = ClassificationTrainer(overrides=args)
71
- >>> trainer.train()
59
+ cfg (dict[str, Any], optional): Default configuration dictionary containing training parameters.
60
+ overrides (dict[str, Any], optional): Dictionary of parameter overrides for the default configuration.
61
+ _callbacks (list[Any], optional): List of callback functions to be executed during training.
72
62
  """
73
63
  if overrides is None:
74
64
  overrides = {}
@@ -155,7 +145,7 @@ class ClassificationTrainer(BaseTrainer):
155
145
  with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
156
146
  dataset = self.build_dataset(dataset_path, mode)
157
147
 
158
- loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank)
148
+ loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
159
149
  # Attach inference transforms
160
150
  if mode != "train":
161
151
  if is_parallel(self.model):
@@ -196,8 +186,8 @@ class ClassificationTrainer(BaseTrainer):
196
186
  prefix (str, optional): Prefix to prepend to loss names.
197
187
 
198
188
  Returns:
199
- keys (List[str]): List of loss keys if loss_items is None.
200
- loss_dict (Dict[str, float]): Dictionary of loss items if loss_items is provided.
189
+ keys (list[str]): List of loss keys if loss_items is None.
190
+ loss_dict (dict[str, float]): Dictionary of loss items if loss_items is provided.
201
191
  """
202
192
  keys = [f"{prefix}/{x}" for x in self.loss_names]
203
193
  if loss_items is None:
@@ -227,7 +217,7 @@ class ClassificationTrainer(BaseTrainer):
227
217
  Plot training samples with their annotations.
228
218
 
229
219
  Args:
230
- batch (Dict[str, torch.Tensor]): Batch containing images and class labels.
220
+ batch (dict[str, torch.Tensor]): Batch containing images and class labels.
231
221
  ni (int): Number of iterations.
232
222
  """
233
223
  batch["batch_idx"] = torch.arange(len(batch["img"])) # add batch index for plotting
@@ -22,8 +22,8 @@ class ClassificationValidator(BaseValidator):
22
22
  confusion matrix generation, and visualization of results.
23
23
 
24
24
  Attributes:
25
- targets (List[torch.Tensor]): Ground truth class labels.
26
- pred (List[torch.Tensor]): Model predictions.
25
+ targets (list[torch.Tensor]): Ground truth class labels.
26
+ pred (list[torch.Tensor]): Model predictions.
27
27
  metrics (ClassifyMetrics): Object to calculate and store classification metrics.
28
28
  names (dict): Mapping of class indices to class names.
29
29
  nc (int): Number of classes.
@@ -170,7 +170,7 @@ class ClassificationValidator(BaseValidator):
170
170
  Plot validation image samples with their ground truth labels.
171
171
 
172
172
  Args:
173
- batch (Dict[str, Any]): Dictionary containing batch data with 'img' (images) and 'cls' (class labels).
173
+ batch (dict[str, Any]): Dictionary containing batch data with 'img' (images) and 'cls' (class labels).
174
174
  ni (int): Batch index used for naming the output file.
175
175
 
176
176
  Examples:
@@ -191,7 +191,7 @@ class ClassificationValidator(BaseValidator):
191
191
  Plot images with their predicted class labels and save the visualization.
192
192
 
193
193
  Args:
194
- batch (Dict[str, Any]): Batch data containing images and other information.
194
+ batch (dict[str, Any]): Batch data containing images and other information.
195
195
  preds (torch.Tensor): Model predictions with shape (batch_size, num_classes).
196
196
  ni (int): Batch index used for naming the output file.
197
197
 
@@ -96,12 +96,12 @@ class DetectionPredictor(BasePredictor):
96
96
  Construct a list of Results objects from model predictions.
97
97
 
98
98
  Args:
99
- preds (List[torch.Tensor]): List of predicted bounding boxes and scores for each image.
99
+ preds (list[torch.Tensor]): List of predicted bounding boxes and scores for each image.
100
100
  img (torch.Tensor): Batch of preprocessed images used for inference.
101
- orig_imgs (List[np.ndarray]): List of original images before preprocessing.
101
+ orig_imgs (list[np.ndarray]): List of original images before preprocessing.
102
102
 
103
103
  Returns:
104
- (List[Results]): List of Results objects containing detection information for each image.
104
+ (list[Results]): List of Results objects containing detection information for each image.
105
105
  """
106
106
  return [
107
107
  self.construct_result(pred, img, orig_img, img_path)