autogluon.multimodal 1.1.2b20241107__py3-none-any.whl → 1.1.2b20241109__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autogluon/multimodal/configs/pretrain/detection/dino/dino-4scale_r50_8xb2-12e_coco.py +1 -1
- autogluon/multimodal/learners/object_detection.py +62 -31
- autogluon/multimodal/predictor.py +8 -2
- autogluon/multimodal/utils/__init__.py +1 -1
- autogluon/multimodal/utils/hpo.py +14 -5
- autogluon/multimodal/utils/object_detection.py +110 -33
- autogluon/multimodal/version.py +1 -1
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/METADATA +4 -4
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/RECORD +16 -16
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/WHEEL +1 -1
- /autogluon.multimodal-1.1.2b20241107-py3.8-nspkg.pth → /autogluon.multimodal-1.1.2b20241109-py3.8-nspkg.pth +0 -0
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/LICENSE +0 -0
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/NOTICE +0 -0
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/namespace_packages.txt +0 -0
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/top_level.txt +0 -0
- {autogluon.multimodal-1.1.2b20241107.dist-info → autogluon.multimodal-1.1.2b20241109.dist-info}/zip-safe +0 -0
@@ -84,7 +84,7 @@ model = dict(
|
|
84
84
|
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
|
85
85
|
# from the default setting in mmdet.
|
86
86
|
train_pipeline = [
|
87
|
-
dict(type="LoadImageFromFile", backend_args={{_base_.backend_args}}),
|
87
|
+
dict(type="LoadImageFromFile", backend_args={{_base_.backend_args}}), # nosec
|
88
88
|
dict(type="LoadAnnotations", with_bbox=True),
|
89
89
|
dict(type="RandomFlip", prob=0.5),
|
90
90
|
dict(
|
@@ -15,12 +15,13 @@ from ..utils import (
|
|
15
15
|
check_if_packages_installed,
|
16
16
|
cocoeval,
|
17
17
|
convert_pred_to_xywh,
|
18
|
+
convert_result_df,
|
18
19
|
create_fusion_model,
|
19
20
|
extract_from_output,
|
20
21
|
from_coco_or_voc,
|
21
22
|
get_detection_classes,
|
22
23
|
object_detection_data_to_df,
|
23
|
-
|
24
|
+
save_result_coco_format,
|
24
25
|
setup_save_path,
|
25
26
|
split_train_tuning_data,
|
26
27
|
)
|
@@ -39,8 +40,9 @@ class ObjectDetectionLearner(BaseLearner):
|
|
39
40
|
hyperparameters: Optional[dict] = None,
|
40
41
|
path: Optional[str] = None,
|
41
42
|
verbosity: Optional[int] = 2,
|
42
|
-
num_classes: Optional[int] = None,
|
43
|
-
classes: Optional[list] = None,
|
43
|
+
num_classes: Optional[int] = None,
|
44
|
+
classes: Optional[list] = None,
|
45
|
+
category_ids: Optional[list] = None,
|
44
46
|
warn_if_exist: Optional[bool] = True,
|
45
47
|
enable_progress_bar: Optional[bool] = None,
|
46
48
|
pretrained: Optional[bool] = True,
|
@@ -78,12 +80,13 @@ class ObjectDetectionLearner(BaseLearner):
|
|
78
80
|
|
79
81
|
self._output_shape = num_classes
|
80
82
|
self._classes = classes
|
83
|
+
self._category_ids = category_ids
|
81
84
|
self._sample_data_path = sample_data_path
|
82
85
|
|
83
86
|
# TODO: merge object detection and open vocabulary object detection
|
84
87
|
self._label_column = "label"
|
85
88
|
if self._sample_data_path is not None:
|
86
|
-
self._classes = get_detection_classes(self._sample_data_path)
|
89
|
+
self._classes, self._category_ids = get_detection_classes(self._sample_data_path)
|
87
90
|
self._output_shape = len(self._classes)
|
88
91
|
|
89
92
|
# TODO: merge _detection_anno_train and detection_anno_train?
|
@@ -99,7 +102,16 @@ class ObjectDetectionLearner(BaseLearner):
|
|
99
102
|
"""
|
100
103
|
Return the classes of object detection.
|
101
104
|
"""
|
102
|
-
|
105
|
+
if self._model.model.CLASSES is not None and self._classes is not None:
|
106
|
+
assert self._classes == self._model.model.CLASSES, f"{self._classes}\n{self._model.model.CLASSES}"
|
107
|
+
return self._classes if self._classes is not None else self._model.model.CLASSES
|
108
|
+
|
109
|
+
@property
|
110
|
+
def category_ids(self):
|
111
|
+
"""
|
112
|
+
Return the classes of object detection.
|
113
|
+
"""
|
114
|
+
return self._category_ids
|
103
115
|
|
104
116
|
def setup_detection_train_tuning_data(self, max_num_tuning_data, seed, train_data, tuning_data):
|
105
117
|
if isinstance(train_data, str):
|
@@ -669,6 +681,7 @@ class ObjectDetectionLearner(BaseLearner):
|
|
669
681
|
self,
|
670
682
|
data: Union[pd.DataFrame, dict, list, str],
|
671
683
|
as_pandas: Optional[bool] = None,
|
684
|
+
as_coco: Optional[bool] = True,
|
672
685
|
realtime: Optional[bool] = False,
|
673
686
|
save_results: Optional[bool] = None,
|
674
687
|
**kwargs,
|
@@ -683,6 +696,8 @@ class ObjectDetectionLearner(BaseLearner):
|
|
683
696
|
follow same format (except for the `label` column).
|
684
697
|
as_pandas
|
685
698
|
Whether to return the output as a pandas DataFrame(Series) (True) or numpy array (False).
|
699
|
+
as_coco
|
700
|
+
Whether to save the output as a COCO json file (True) or pandas DataFrame (False).
|
686
701
|
realtime
|
687
702
|
Whether to do realtime inference, which is efficient for small data (default False).
|
688
703
|
If provided None, we would infer it on based on the data modalities
|
@@ -695,49 +710,65 @@ class ObjectDetectionLearner(BaseLearner):
|
|
695
710
|
Array of predictions, one corresponding to each row in given dataset.
|
696
711
|
"""
|
697
712
|
self.ensure_predict_ready()
|
713
|
+
if as_pandas is None and isinstance(data, pd.DataFrame):
|
714
|
+
as_pandas = True
|
715
|
+
|
698
716
|
ret_type = BBOX
|
717
|
+
|
718
|
+
# only supports coco/voc format for OBJECT_DETECTION
|
699
719
|
if self._problem_type == OBJECT_DETECTION:
|
700
|
-
|
701
|
-
|
720
|
+
data_path = data
|
721
|
+
data_df = object_detection_data_to_df(
|
722
|
+
data_path,
|
702
723
|
coco_root=self._config.model.mmdet_image.coco_root,
|
703
724
|
)
|
704
|
-
if self._label_column not in
|
725
|
+
if self._label_column not in data_df:
|
705
726
|
self._label_column = None
|
706
727
|
|
707
728
|
outputs = self.predict_per_run(
|
708
|
-
data=
|
729
|
+
data=data_df,
|
709
730
|
realtime=realtime,
|
710
731
|
requires_label=False,
|
711
732
|
)
|
712
733
|
pred = extract_from_output(outputs=outputs, ret_type=ret_type)
|
713
|
-
if self._problem_type == OBJECT_DETECTION:
|
714
|
-
if self._model.output_bbox_format == XYWH:
|
715
|
-
pred = convert_pred_to_xywh(pred)
|
716
734
|
|
717
|
-
|
735
|
+
self._save_path = setup_save_path(
|
736
|
+
old_save_path=self._save_path,
|
737
|
+
warn_if_exist=False,
|
738
|
+
)
|
739
|
+
result_path = os.path.join(self._save_path, "result.txt")
|
740
|
+
|
741
|
+
pred_df = convert_result_df(
|
742
|
+
pred=convert_pred_to_xywh(pred) if self._model.output_bbox_format == XYWH else pred,
|
743
|
+
data=data_df,
|
744
|
+
detection_classes=self.classes,
|
745
|
+
result_path=result_path,
|
746
|
+
)
|
747
|
+
|
748
|
+
if save_results:
|
718
749
|
self._save_path = setup_save_path(
|
719
750
|
old_save_path=self._save_path,
|
720
751
|
warn_if_exist=False,
|
721
752
|
)
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
pred=pred,
|
735
|
-
data=data,
|
736
|
-
detection_classes=self._model.model.CLASSES,
|
737
|
-
result_path=None,
|
738
|
-
)
|
753
|
+
if as_coco:
|
754
|
+
result_path = os.path.join(self._save_path, "result.json")
|
755
|
+
save_result_coco_format(
|
756
|
+
data_path=data_path,
|
757
|
+
pred=pred,
|
758
|
+
category_ids=self.category_ids,
|
759
|
+
result_path=result_path,
|
760
|
+
coco_root=self._config.model.mmdet_image.coco_root,
|
761
|
+
)
|
762
|
+
else:
|
763
|
+
pred_df.to_csv(result_path, index=False)
|
764
|
+
logger.info(f"Saved detection results {'as coco' if as_coco else 'as dataframe'} to {result_path}")
|
739
765
|
|
740
|
-
|
766
|
+
if as_pandas:
|
767
|
+
return pred_df
|
768
|
+
else:
|
769
|
+
if self._model.output_bbox_format == XYWH:
|
770
|
+
pred = convert_pred_to_xywh(pred)
|
771
|
+
return pred
|
741
772
|
|
742
773
|
def predict_proba(
|
743
774
|
self,
|
@@ -605,6 +605,7 @@ class MultiModalPredictor:
|
|
605
605
|
as_pandas: Optional[bool] = None,
|
606
606
|
realtime: Optional[bool] = False,
|
607
607
|
save_results: Optional[bool] = None,
|
608
|
+
**kwargs,
|
608
609
|
):
|
609
610
|
"""
|
610
611
|
Predict the label column values for new data.
|
@@ -617,8 +618,8 @@ class MultiModalPredictor:
|
|
617
618
|
candidate_data
|
618
619
|
The candidate data from which to search the query data's matches.
|
619
620
|
id_mappings
|
620
|
-
|
621
|
-
|
621
|
+
Id-to-content mappings. The contents can be text, image, etc.
|
622
|
+
This is used when data contain the query/response identifiers instead of their contents.
|
622
623
|
as_pandas
|
623
624
|
Whether to return the output as a pandas DataFrame(Series) (True) or numpy array (False).
|
624
625
|
realtime
|
@@ -627,10 +628,14 @@ class MultiModalPredictor:
|
|
627
628
|
and sample number.
|
628
629
|
save_results
|
629
630
|
Whether to save the prediction results (only works for detection now)
|
631
|
+
**kwargs
|
632
|
+
Additional keyword arguments to pass to the underlying learner's predict method.
|
633
|
+
For example, `as_coco` for object detection tasks.
|
630
634
|
|
631
635
|
Returns
|
632
636
|
-------
|
633
637
|
Array of predictions, one corresponding to each row in given dataset.
|
638
|
+
Format depends on the specific learner and provided arguments.
|
634
639
|
"""
|
635
640
|
return self._learner.predict(
|
636
641
|
data=data,
|
@@ -639,6 +644,7 @@ class MultiModalPredictor:
|
|
639
644
|
realtime=realtime,
|
640
645
|
save_results=save_results,
|
641
646
|
id_mappings=id_mappings,
|
647
|
+
**kwargs,
|
642
648
|
)
|
643
649
|
|
644
650
|
def predict_proba(
|
@@ -82,6 +82,7 @@ from .object_detection import (
|
|
82
82
|
bbox_xyxy_to_xywh,
|
83
83
|
cocoeval,
|
84
84
|
convert_pred_to_xywh,
|
85
|
+
convert_result_df,
|
85
86
|
from_coco,
|
86
87
|
from_coco_or_voc,
|
87
88
|
from_dict,
|
@@ -90,7 +91,6 @@ from .object_detection import (
|
|
90
91
|
object_detection_data_to_df,
|
91
92
|
object_detection_df_to_coco,
|
92
93
|
save_result_coco_format,
|
93
|
-
save_result_df,
|
94
94
|
save_result_voc_format,
|
95
95
|
visualize_detection,
|
96
96
|
)
|
@@ -50,9 +50,9 @@ def hpo_trial(sampled_hyperparameters, learner, checkpoint_dir=None, **_fit_args
|
|
50
50
|
resources = context.get_trial_resources().required_resources
|
51
51
|
num_cpus = int(resources.get("CPU"))
|
52
52
|
|
53
|
-
|
54
|
-
|
55
|
-
|
53
|
+
# The original hyperparameters is the search space, replace it with the hyperparameters sampled
|
54
|
+
_fit_args["hyperparameters"] = sampled_hyperparameters
|
55
|
+
|
56
56
|
_fit_args["save_path"] = context.get_trial_dir() # We want to save each trial to a separate directory
|
57
57
|
logger.debug(f"hpo trial save_path: {_fit_args['save_path']}")
|
58
58
|
if checkpoint_dir is not None:
|
@@ -62,7 +62,15 @@ def hpo_trial(sampled_hyperparameters, learner, checkpoint_dir=None, **_fit_args
|
|
62
62
|
learner.fit_per_run(**_fit_args)
|
63
63
|
|
64
64
|
|
65
|
-
def build_final_learner(
|
65
|
+
def build_final_learner(
|
66
|
+
learner,
|
67
|
+
best_trial_path,
|
68
|
+
save_path,
|
69
|
+
last_ckpt_path,
|
70
|
+
is_matching,
|
71
|
+
standalone,
|
72
|
+
clean_ckpts,
|
73
|
+
):
|
66
74
|
"""
|
67
75
|
Build the final learner after HPO is finished.
|
68
76
|
|
@@ -167,12 +175,13 @@ def hyperparameter_tune(hyperparameter_tune_kwargs, resources, is_matching=False
|
|
167
175
|
mode = _fit_args.get("learner")._minmax_mode
|
168
176
|
save_path = _fit_args.get("save_path")
|
169
177
|
time_budget_s = _fit_args.get("max_time")
|
178
|
+
num_to_keep = hyperparameter_tune_kwargs.pop("num_to_keep", 3)
|
170
179
|
if time_budget_s is not None:
|
171
180
|
time_budget_s *= 0.95 # give some buffer time to ray
|
172
181
|
try:
|
173
182
|
run_config_kwargs = {
|
174
183
|
"checkpoint_config": CheckpointConfig(
|
175
|
-
num_to_keep=
|
184
|
+
num_to_keep=num_to_keep,
|
176
185
|
checkpoint_score_attribute=metric,
|
177
186
|
),
|
178
187
|
}
|
@@ -9,6 +9,7 @@ import defusedxml.ElementTree as ET
|
|
9
9
|
import numpy as np
|
10
10
|
import pandas as pd
|
11
11
|
import PIL
|
12
|
+
import torch
|
12
13
|
from torchmetrics.detection.mean_ap import MeanAveragePrecision
|
13
14
|
|
14
15
|
from ..constants import (
|
@@ -58,24 +59,43 @@ def _get_image_info(image_path: str):
|
|
58
59
|
|
59
60
|
def get_df_unique_classes(data: pd.DataFrame):
|
60
61
|
"""
|
61
|
-
Get the unique classes
|
62
|
+
Get the unique classes and their category IDs from the dataframe for object detection.
|
63
|
+
|
62
64
|
Parameters
|
63
65
|
----------
|
64
|
-
data
|
65
|
-
|
66
|
+
data : pd.DataFrame
|
67
|
+
DataFrame holding the data for object detection. Each row should contain a 'rois'
|
68
|
+
column with detection boxes and class labels.
|
69
|
+
|
66
70
|
Returns
|
67
71
|
-------
|
68
|
-
|
72
|
+
tuple
|
73
|
+
A tuple containing (class_names, category_ids) where:
|
74
|
+
- class_names: list of unique class name strings
|
75
|
+
- category_ids: dict mapping class names to their numeric IDs
|
69
76
|
"""
|
70
77
|
unique_classes = {}
|
78
|
+
|
79
|
+
# Iterate through all rows in the dataframe
|
71
80
|
for idx in range(data.shape[0]):
|
72
81
|
row = data.iloc[idx]
|
73
82
|
rois = row["rois"]
|
83
|
+
|
84
|
+
# Process each ROI in the current row
|
74
85
|
for roi in rois:
|
75
|
-
|
86
|
+
# Unpack ROI values (assuming last element is class label)
|
87
|
+
*_, class_label = roi
|
88
|
+
|
89
|
+
# Add new classes to the dictionary with auto-incrementing IDs
|
76
90
|
if class_label not in unique_classes:
|
77
|
-
|
78
|
-
|
91
|
+
# Start IDs from 1, as 0 is often reserved for background
|
92
|
+
unique_classes[class_label] = len(unique_classes) + 1
|
93
|
+
|
94
|
+
# Create the output lists/dicts
|
95
|
+
class_names = list(unique_classes.keys())
|
96
|
+
category_ids = unique_classes
|
97
|
+
|
98
|
+
return class_names, category_ids
|
79
99
|
|
80
100
|
|
81
101
|
def object_detection_df_to_coco(data: pd.DataFrame, save_path: Optional[str] = None):
|
@@ -305,7 +325,7 @@ def from_voc(
|
|
305
325
|
rpath = Path(root).expanduser()
|
306
326
|
img_list = []
|
307
327
|
|
308
|
-
class_names = get_detection_classes(root)
|
328
|
+
class_names, _ = get_detection_classes(root)
|
309
329
|
|
310
330
|
NAME_TO_IDX = dict(zip(class_names, range(len(class_names))))
|
311
331
|
name_to_index = lambda name: NAME_TO_IDX[name]
|
@@ -814,7 +834,7 @@ def get_image_filename(path: str):
|
|
814
834
|
class COCODataset:
|
815
835
|
# The class that load/save COCO data format.
|
816
836
|
# TODO: refactor data loading into here
|
817
|
-
def __init__(self, anno_file: str):
|
837
|
+
def __init__(self, anno_file: str, category_ids: Optional[List] = None):
|
818
838
|
"""
|
819
839
|
Parameters
|
820
840
|
----------
|
@@ -833,7 +853,13 @@ class COCODataset:
|
|
833
853
|
img_id_list.append(int(img["id"]))
|
834
854
|
self.image_filename_to_id = dict(zip(img_filename_list, img_id_list))
|
835
855
|
|
836
|
-
|
856
|
+
if category_ids is None:
|
857
|
+
if "categories" in d:
|
858
|
+
self.category_ids = [cat["id"] for cat in d["categories"]]
|
859
|
+
else:
|
860
|
+
self.category_ids = range(9999) # TODO: remove hardcoding here
|
861
|
+
else:
|
862
|
+
self.category_ids = category_ids
|
837
863
|
|
838
864
|
def get_image_id_from_path(self, image_path: str):
|
839
865
|
"""
|
@@ -1177,49 +1203,63 @@ def from_coco_or_voc(file_path: str, splits: Optional[Union[str]] = None, coco_r
|
|
1177
1203
|
|
1178
1204
|
def get_coco_format_classes(sample_data_path: str):
|
1179
1205
|
"""
|
1180
|
-
|
1206
|
+
Get class names and category IDs for COCO format data.
|
1181
1207
|
|
1182
1208
|
Parameters
|
1183
1209
|
----------
|
1184
|
-
sample_data_path
|
1210
|
+
sample_data_path : str
|
1185
1211
|
The path to COCO format json annotation file. Could be any split, e.g. train/val/test/....
|
1186
1212
|
|
1187
1213
|
Returns
|
1188
1214
|
-------
|
1189
|
-
|
1215
|
+
tuple
|
1216
|
+
A tuple containing (class_names, category_ids) where:
|
1217
|
+
- class_names: list of class name strings
|
1218
|
+
- category_ids: dict mapping class names to their COCO category IDs
|
1190
1219
|
"""
|
1191
1220
|
try:
|
1192
1221
|
with open(sample_data_path, "r") as f:
|
1193
1222
|
annotation = json.load(f)
|
1194
1223
|
except:
|
1195
1224
|
raise ValueError(f"Failed to load json from provided json file: {sample_data_path}.")
|
1196
|
-
|
1225
|
+
|
1226
|
+
# Extract both names and IDs from categories
|
1227
|
+
class_names = [cat["name"] for cat in annotation["categories"]]
|
1228
|
+
|
1229
|
+
# Create mapping of names to their original COCO category IDs
|
1230
|
+
category_ids = [cat["id"] for cat in annotation["categories"]]
|
1231
|
+
|
1232
|
+
return class_names, category_ids
|
1197
1233
|
|
1198
1234
|
|
1199
1235
|
def get_voc_format_classes(root: str):
|
1200
1236
|
"""
|
1201
|
-
|
1237
|
+
Get class names and category IDs for VOC format data.
|
1202
1238
|
|
1203
1239
|
Parameters
|
1204
1240
|
----------
|
1205
|
-
root
|
1241
|
+
root : str
|
1206
1242
|
The path to the root directory of VOC data.
|
1207
1243
|
|
1208
1244
|
Returns
|
1209
1245
|
-------
|
1210
|
-
|
1246
|
+
tuple
|
1247
|
+
A tuple containing (class_names, category_ids) where:
|
1248
|
+
- class_names: list of class name strings
|
1249
|
+
- category_ids: dict mapping class names to their numeric IDs
|
1211
1250
|
"""
|
1212
1251
|
if is_url(root):
|
1213
1252
|
root = download(root)
|
1214
|
-
rpath = Path(root).expanduser()
|
1215
1253
|
|
1254
|
+
rpath = Path(root).expanduser()
|
1216
1255
|
labels_file = os.path.join(rpath, "labels.txt")
|
1256
|
+
|
1217
1257
|
if os.path.exists(labels_file):
|
1218
1258
|
with open(labels_file) as f:
|
1219
1259
|
class_names = [line.rstrip().lower() for line in f]
|
1220
1260
|
print(f"using class_names in labels.txt: {class_names}")
|
1221
1261
|
else:
|
1222
|
-
|
1262
|
+
# read the class names and save results
|
1223
1263
|
logger.warning(
|
1224
1264
|
"labels.txt does not exist, using default VOC names. "
|
1225
1265
|
"Creating labels.txt by scanning the directory: {}".format(os.path.join(root, "Annotations"))
|
@@ -1228,28 +1268,46 @@ def get_voc_format_classes(root: str):
|
|
1228
1268
|
voc_annotation_path=os.path.join(root, "Annotations"), voc_class_names_output_path=labels_file
|
1229
1269
|
)
|
1230
1270
|
|
1231
|
-
|
1271
|
+
# There are no category IDs in VOC format
|
1272
|
+
# Create category IDs dictionary starting from 1
|
1273
|
+
# 0 is typically reserved for background in many frameworks
|
1274
|
+
category_ids = [idx + 1 for idx, name in enumerate(class_names)]
|
1275
|
+
|
1276
|
+
return class_names, category_ids
|
1232
1277
|
|
1233
1278
|
|
1234
1279
|
def get_detection_classes(sample_data_path):
|
1235
1280
|
"""
|
1236
|
-
|
1281
|
+
Get class names and category IDs from detection dataset in various formats.
|
1237
1282
|
|
1238
1283
|
Parameters
|
1239
1284
|
----------
|
1240
|
-
sample_data_path
|
1241
|
-
|
1242
|
-
|
1285
|
+
sample_data_path : Union[str, pd.DataFrame]
|
1286
|
+
The input can be one of:
|
1287
|
+
- str (directory): Path to root directory of VOC format data
|
1288
|
+
- str (file): Path to COCO format JSON annotation file
|
1289
|
+
- pd.DataFrame: DataFrame containing detection data with 'rois' column
|
1243
1290
|
|
1244
1291
|
Returns
|
1245
1292
|
-------
|
1246
|
-
|
1293
|
+
tuple
|
1294
|
+
A tuple containing (class_names, category_ids) where:
|
1295
|
+
- class_names: list of class name strings
|
1296
|
+
- category_ids: dict mapping class names to their numeric IDs
|
1297
|
+
|
1298
|
+
For VOC: IDs start from 1
|
1299
|
+
For COCO: Original category IDs from annotation file
|
1300
|
+
For DataFrame: Sequential IDs starting from 1
|
1247
1301
|
"""
|
1302
|
+
# Handle string paths for VOC and COCO formats
|
1248
1303
|
if isinstance(sample_data_path, str):
|
1249
1304
|
if os.path.isdir(sample_data_path):
|
1305
|
+
# Directory path indicates VOC format
|
1250
1306
|
return get_voc_format_classes(sample_data_path)
|
1251
1307
|
else:
|
1308
|
+
# File path indicates COCO format JSON
|
1252
1309
|
return get_coco_format_classes(sample_data_path)
|
1310
|
+
# Handle DataFrame format
|
1253
1311
|
elif isinstance(sample_data_path, pd.DataFrame):
|
1254
1312
|
return get_df_unique_classes(sample_data_path)
|
1255
1313
|
|
@@ -1497,7 +1555,7 @@ def get_color(idx):
|
|
1497
1555
|
return color
|
1498
1556
|
|
1499
1557
|
|
1500
|
-
def
|
1558
|
+
def convert_result_df(
|
1501
1559
|
pred: Iterable, data: Union[pd.DataFrame, Dict], detection_classes: List[str], result_path: Optional[str] = None
|
1502
1560
|
):
|
1503
1561
|
"""
|
@@ -1544,13 +1602,11 @@ def save_result_df(
|
|
1544
1602
|
return result_df
|
1545
1603
|
|
1546
1604
|
|
1547
|
-
def save_result_coco_format(
|
1548
|
-
coco_dataset = COCODataset(
|
1605
|
+
def save_result_coco_format(data_path, pred, category_ids, result_path, coco_root: Optional[str] = None):
|
1606
|
+
coco_dataset = COCODataset(data_path, category_ids=category_ids)
|
1549
1607
|
result_name, _ = os.path.splitext(result_path)
|
1550
1608
|
result_path = result_name + ".json"
|
1551
|
-
coco_dataset.save_result(
|
1552
|
-
pred, from_coco_or_voc(detection_data_path, "test", coco_root=coco_root), save_path=result_path
|
1553
|
-
)
|
1609
|
+
coco_dataset.save_result(pred, from_coco_or_voc(data_path, "test", coco_root=coco_root), save_path=result_path)
|
1554
1610
|
logger.info(25, f"Saved detection result to {result_path}")
|
1555
1611
|
|
1556
1612
|
|
@@ -1561,9 +1617,30 @@ def save_result_voc_format(pred, result_path):
|
|
1561
1617
|
logger.info(25, f"Saved detection result to {result_path}")
|
1562
1618
|
|
1563
1619
|
|
1564
|
-
def convert_pred_to_xywh(pred: Optional[List]):
|
1620
|
+
def convert_pred_to_xywh(pred: Optional[List]) -> Optional[List]:
|
1621
|
+
"""
|
1622
|
+
Convert prediction bounding boxes from XYXY to XYWH format.
|
1623
|
+
|
1624
|
+
Args:
|
1625
|
+
pred: List of predictions, where each prediction contains 'bboxes' that can be
|
1626
|
+
either a torch.Tensor or numpy.ndarray
|
1627
|
+
|
1628
|
+
Returns:
|
1629
|
+
Modified list of predictions with bboxes in XYWH format
|
1630
|
+
"""
|
1565
1631
|
if not pred:
|
1566
1632
|
return pred
|
1633
|
+
|
1567
1634
|
for i, pred_per_image in enumerate(pred):
|
1568
|
-
|
1635
|
+
bboxes = pred_per_image["bboxes"]
|
1636
|
+
|
1637
|
+
# Handle numpy array case
|
1638
|
+
if isinstance(bboxes, np.ndarray):
|
1639
|
+
pred[i]["bboxes"] = bbox_xyxy_to_xywh(bboxes)
|
1640
|
+
# Handle torch.Tensor case
|
1641
|
+
elif torch.is_tensor(bboxes):
|
1642
|
+
pred[i]["bboxes"] = bbox_xyxy_to_xywh(bboxes.detach().numpy())
|
1643
|
+
else:
|
1644
|
+
raise TypeError(f"Unsupported bbox type: {type(bboxes)}. Expected numpy.ndarray or torch.Tensor")
|
1645
|
+
|
1569
1646
|
return pred
|
autogluon/multimodal/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: autogluon.multimodal
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.2b20241109
|
4
4
|
Summary: Fast and Accurate ML in 3 Lines of Code
|
5
5
|
Home-page: https://github.com/autogluon/autogluon
|
6
6
|
Author: AutoGluon Community
|
@@ -56,9 +56,9 @@ Requires-Dist: text-unidecode<1.4,>=1.3
|
|
56
56
|
Requires-Dist: torchmetrics<1.3.0,>=1.2.0
|
57
57
|
Requires-Dist: nptyping<2.5.0,>=1.4.4
|
58
58
|
Requires-Dist: omegaconf<2.3.0,>=2.1.1
|
59
|
-
Requires-Dist: autogluon.core[raytune]==1.1.
|
60
|
-
Requires-Dist: autogluon.features==1.1.
|
61
|
-
Requires-Dist: autogluon.common==1.1.
|
59
|
+
Requires-Dist: autogluon.core[raytune]==1.1.2b20241109
|
60
|
+
Requires-Dist: autogluon.features==1.1.2b20241109
|
61
|
+
Requires-Dist: autogluon.common==1.1.2b20241109
|
62
62
|
Requires-Dist: pytorch-metric-learning<2.4,>=1.3.0
|
63
63
|
Requires-Dist: nlpaug<1.2.0,>=1.1.10
|
64
64
|
Requires-Dist: nltk<3.9,>=3.4.5
|
@@ -1,11 +1,11 @@
|
|
1
|
-
autogluon.multimodal-1.1.
|
1
|
+
autogluon.multimodal-1.1.2b20241109-py3.8-nspkg.pth,sha256=cQGwpuGPqg1GXscIwt-7PmME1OnSpD-7ixkikJ31WAY,554
|
2
2
|
autogluon/multimodal/__init__.py,sha256=EuWb-QmtFBKePJw4_4Kpp9dKrabv121haYw_Oiu2jfI,238
|
3
3
|
autogluon/multimodal/constants.py,sha256=lFA03ZQeZlp8mwuXLuMOgeByljV5wPYBjN_hi6Xc8zg,8498
|
4
|
-
autogluon/multimodal/predictor.py,sha256=
|
4
|
+
autogluon/multimodal/predictor.py,sha256=beV2gOcTnviYtU8UWTWdqWYTbuk5sC6Sba-pAEaFQyg,40936
|
5
5
|
autogluon/multimodal/presets.py,sha256=VR_arn7X4eiQcGcvJVmwxDopPJGvYP1W1cBZ2AOcdJM,25882
|
6
6
|
autogluon/multimodal/problem_types.py,sha256=JPSoV3Fg-NGQansRlyT2bPZG3iIkgWo7eB8oDoAfg90,9201
|
7
7
|
autogluon/multimodal/registry.py,sha256=vqvONWweZP44aBo1jCvlqLdL0Agn2kyTK8uXUwagZhs,3670
|
8
|
-
autogluon/multimodal/version.py,sha256=
|
8
|
+
autogluon/multimodal/version.py,sha256=9nkI2OUdF0h6sRq5pouT1JYNVvDZ204TllaIaUV7hCs,90
|
9
9
|
autogluon/multimodal/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
autogluon/multimodal/cli/prepare_detection_dataset.py,sha256=9NCYmCUMPRWbxxbN_C7YQjMYlrAm8gbwJ3Qd-79JWH4,5218
|
11
11
|
autogluon/multimodal/cli/voc2coco.py,sha256=LXVu9RIfOZs_1URrzO-_3Nrvb9uGEgPxCY4-B6m1coc,9605
|
@@ -21,7 +21,7 @@ autogluon/multimodal/configs/pretrain/detection/__init__.py,sha256=47DEQpj8HBSa-
|
|
21
21
|
autogluon/multimodal/configs/pretrain/detection/coco_detection.py,sha256=UlSwkWAkST_96RTzPZMuPuqIfv72U03-JdqwPd-NjiQ,3171
|
22
22
|
autogluon/multimodal/configs/pretrain/detection/default_runtime.py,sha256=9hJmjxb6fIo-kbbejQlJy4ayopRFUyA_w95plhAUFDw,793
|
23
23
|
autogluon/multimodal/configs/pretrain/detection/schedule_1x.py,sha256=VhZ8HT-ryeGW-GzxiVsDEIYf9Bw6ImOdPucFVJaN0Os,298
|
24
|
-
autogluon/multimodal/configs/pretrain/detection/dino/dino-4scale_r50_8xb2-12e_coco.py,sha256=
|
24
|
+
autogluon/multimodal/configs/pretrain/detection/dino/dino-4scale_r50_8xb2-12e_coco.py,sha256=r51gu03JBHrTjDXkT2CdNKTzEeWx2qxk8t-TEHgKwSQ,5919
|
25
25
|
autogluon/multimodal/configs/pretrain/detection/dino/dino-5scale_swin-l_8xb2-12e_coco.py,sha256=VYvjELfzX2jYmsQLFvMYWeixx2LmB_LwbPmzgLAPUwo,1137
|
26
26
|
autogluon/multimodal/configs/pretrain/detection/dino/dino-5scale_swin-l_8xb2-36e_coco.py,sha256=OL4Za_hd5IhQU8iHEAVrsFu-MnSmOPu1_WRa39i3QYA,266
|
27
27
|
autogluon/multimodal/configs/pretrain/detection/dino/dino_swinl_tta.py,sha256=vi5rhbaT3mgycIm1W8jQ6l-KuFoL24OiiF-rdj2CTNg,68
|
@@ -71,7 +71,7 @@ autogluon/multimodal/learners/base.py,sha256=IUHRBzwrKqAwo9nDsqzg0rBQaFiVxfyeMnd
|
|
71
71
|
autogluon/multimodal/learners/few_shot_svm.py,sha256=TXauhXr_2hWqaEwO8XhFxWRRPXDYxLpmmKYaCrxFWPM,23934
|
72
72
|
autogluon/multimodal/learners/matching.py,sha256=gueWrqy0g9gVbyBvQOAj03JgBwbJsBXeLLtKsiTzrnU,89891
|
73
73
|
autogluon/multimodal/learners/ner.py,sha256=0VZl_Z1O98A5mOSw8Ee8F9foaCT684DT0j1ALx-8RU4,19035
|
74
|
-
autogluon/multimodal/learners/object_detection.py,sha256=
|
74
|
+
autogluon/multimodal/learners/object_detection.py,sha256=NmaQWaakpnIA1owZNvGeO1B3Sk3p3ngWnNJM21EeeVM,30846
|
75
75
|
autogluon/multimodal/learners/semantic_segmentation.py,sha256=cy2ALYTtqeSnPo75htgr5STZ_utgkzeGxp5j4J1mScc,20183
|
76
76
|
autogluon/multimodal/models/__init__.py,sha256=wynO5U5zY_vElZPGL10Oqb7OVkj2iovqzml22MRL3iE,842
|
77
77
|
autogluon/multimodal/models/adaptation_layers.py,sha256=NuzwU_ghk8D2axmDuD8UEZ_HamoMSCcKMV9DB1AYWAg,38425
|
@@ -109,7 +109,7 @@ autogluon/multimodal/optimization/losses.py,sha256=n1nXpXgGmYfLv0b-51yDFp99szy3j
|
|
109
109
|
autogluon/multimodal/optimization/lr_scheduler.py,sha256=i3GG7T8ZyPXyS7feUVe7W3o6eSLIh_Ei7XujJL50uxw,5829
|
110
110
|
autogluon/multimodal/optimization/semantic_seg_metrics.py,sha256=tIbSk3iyBRRx7HnZdqIxltRBtDiBt-GX_zBxkMOFxQg,32894
|
111
111
|
autogluon/multimodal/optimization/utils.py,sha256=X6UknHcN2XID9WO2N2Of3-7MbWUfZO7ydNOktwwtbiU,34415
|
112
|
-
autogluon/multimodal/utils/__init__.py,sha256=
|
112
|
+
autogluon/multimodal/utils/__init__.py,sha256=GQiubE17Z5FVG7o5kZgwekmTHkYd6aL1MliZF21kbFI,2886
|
113
113
|
autogluon/multimodal/utils/cache.py,sha256=XiLB5xNUYklDc8R9M-2RUD0Y6NEqrZIZx6O1PpRIXxI,7766
|
114
114
|
autogluon/multimodal/utils/checkpoint.py,sha256=Khx4KXqF0S9Aw193kyUWNvJX2XkFv6y4IGYe9-txLJE,7699
|
115
115
|
autogluon/multimodal/utils/cloud_io.py,sha256=FhIJ_oEerJ8QOZz82SzP3tg-Z1mo_vHqlmcShHe077s,3031
|
@@ -120,7 +120,7 @@ autogluon/multimodal/utils/distillation.py,sha256=VYSY_excOESa84Q0w6IbV4wL_ER27W
|
|
120
120
|
autogluon/multimodal/utils/download.py,sha256=eOCw4b_EHBvHB9EcGesyzTm1f2AUrzz8KcZQ6j_D7-Y,10364
|
121
121
|
autogluon/multimodal/utils/environment.py,sha256=J1YYBcIL-YzAnoN5GC1DMF_Rpt0DxSpJp3NZxJZ_q6g,12814
|
122
122
|
autogluon/multimodal/utils/export.py,sha256=h7PizWsMaxMnlY7ssRNJxbExgt7B4XFkKY8hf7M-j4Y,11964
|
123
|
-
autogluon/multimodal/utils/hpo.py,sha256=
|
123
|
+
autogluon/multimodal/utils/hpo.py,sha256=KIpO7DmvowT-xua78yK5akKOFrJFKDLQajwaFkX0WWU,8813
|
124
124
|
autogluon/multimodal/utils/inference.py,sha256=VQAda55sf6rbuuxUS3MGJXyCAgb_xugLv1glCV2NlZk,12349
|
125
125
|
autogluon/multimodal/utils/label_studio.py,sha256=7lFl75zztIy6VCuCbyZkN-BLbtr0j1S4F42zJteGVYY,13437
|
126
126
|
autogluon/multimodal/utils/load.py,sha256=rzfADn6obbZL20QZc034IPhIiza7SA8f5MPpd9hPsAE,5106
|
@@ -131,15 +131,15 @@ autogluon/multimodal/utils/misc.py,sha256=WaDWN-6xCCL4tCkxMr4VMb5oiNmmBLrWo5FC3b
|
|
131
131
|
autogluon/multimodal/utils/mmcv.py,sha256=Jjg5PiPqiRNJk6yWkQQlNiqT7qhStN94QjqQsZO3uVw,922
|
132
132
|
autogluon/multimodal/utils/model.py,sha256=Z_9bev8nRk92cUZjPggVAM3R3CHFlecU-YzjkMGPUsE,21963
|
133
133
|
autogluon/multimodal/utils/nlpaug.py,sha256=rWztiOZDbtEGBdyjkXZ0DoSEpXGKX9akpDpFnz4xIMw,2557
|
134
|
-
autogluon/multimodal/utils/object_detection.py,sha256=
|
134
|
+
autogluon/multimodal/utils/object_detection.py,sha256=fHZxon5LoYRmNu_7jm_pDjesVxTa72nzZwgwP-5Fft8,53535
|
135
135
|
autogluon/multimodal/utils/onnx.py,sha256=rblWnphKTsfbosbieJu8PsH6SMDw4on9BS8bR1plL2U,5607
|
136
136
|
autogluon/multimodal/utils/save.py,sha256=zYIO3mYMGBvHfZcmCUaLpsQa14nVq1LPv2F76uaz89w,3951
|
137
137
|
autogluon/multimodal/utils/visualizer.py,sha256=qAc4_36r3MBDPq1R1jBeb_gcfzIrsylL1S31sRf3wuw,22562
|
138
|
-
autogluon.multimodal-1.1.
|
139
|
-
autogluon.multimodal-1.1.
|
140
|
-
autogluon.multimodal-1.1.
|
141
|
-
autogluon.multimodal-1.1.
|
142
|
-
autogluon.multimodal-1.1.
|
143
|
-
autogluon.multimodal-1.1.
|
144
|
-
autogluon.multimodal-1.1.
|
145
|
-
autogluon.multimodal-1.1.
|
138
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/LICENSE,sha256=CeipvOyAZxBGUsFoaFqwkx54aPnIKEtm9a5u2uXxEws,10142
|
139
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/METADATA,sha256=Z4r9fqRDj7_1aa26OhNowd_pJ0qsBnr8Fi8v9jHe9gM,12880
|
140
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/NOTICE,sha256=7nPQuj8Kp-uXsU0S5so3-2dNU5EctS5hDXvvzzehd7E,114
|
141
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
|
142
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/namespace_packages.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
143
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/top_level.txt,sha256=giERA4R78OkJf2ijn5slgjURlhRPzfLr7waIcGkzYAo,10
|
144
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
145
|
+
autogluon.multimodal-1.1.2b20241109.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|