autogluon.multimodal 1.2.1b20250302__py3-none-any.whl → 1.2.1b20250304__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. autogluon/multimodal/__init__.py +4 -2
  2. autogluon/multimodal/configs/data/default.yaml +4 -2
  3. autogluon/multimodal/configs/{environment → env}/default.yaml +2 -3
  4. autogluon/multimodal/configs/model/default.yaml +58 -11
  5. autogluon/multimodal/configs/{optimization → optim}/default.yaml +21 -4
  6. autogluon/multimodal/constants.py +16 -5
  7. autogluon/multimodal/data/__init__.py +14 -2
  8. autogluon/multimodal/data/dataset.py +2 -2
  9. autogluon/multimodal/data/infer_types.py +16 -2
  10. autogluon/multimodal/data/label_encoder.py +3 -3
  11. autogluon/multimodal/{utils → data}/nlpaug.py +4 -4
  12. autogluon/multimodal/data/preprocess_dataframe.py +55 -38
  13. autogluon/multimodal/data/process_categorical.py +35 -6
  14. autogluon/multimodal/data/process_document.py +59 -33
  15. autogluon/multimodal/data/process_image.py +198 -163
  16. autogluon/multimodal/data/process_label.py +7 -3
  17. autogluon/multimodal/data/process_mmlab/process_mmdet.py +1 -8
  18. autogluon/multimodal/data/process_mmlab/process_mmlab_base.py +2 -9
  19. autogluon/multimodal/data/process_mmlab/process_mmocr.py +1 -9
  20. autogluon/multimodal/data/process_ner.py +192 -4
  21. autogluon/multimodal/data/process_numerical.py +32 -5
  22. autogluon/multimodal/data/process_semantic_seg_img.py +23 -28
  23. autogluon/multimodal/data/process_text.py +95 -58
  24. autogluon/multimodal/data/template_engine.py +7 -9
  25. autogluon/multimodal/data/templates.py +0 -2
  26. autogluon/multimodal/data/trivial_augmenter.py +2 -2
  27. autogluon/multimodal/data/utils.py +564 -338
  28. autogluon/multimodal/learners/__init__.py +2 -1
  29. autogluon/multimodal/learners/base.py +189 -189
  30. autogluon/multimodal/learners/ensemble.py +748 -0
  31. autogluon/multimodal/learners/few_shot_svm.py +6 -15
  32. autogluon/multimodal/learners/matching.py +59 -84
  33. autogluon/multimodal/learners/ner.py +23 -22
  34. autogluon/multimodal/learners/object_detection.py +26 -21
  35. autogluon/multimodal/learners/semantic_segmentation.py +16 -18
  36. autogluon/multimodal/models/__init__.py +12 -3
  37. autogluon/multimodal/models/augmenter.py +175 -0
  38. autogluon/multimodal/models/categorical_mlp.py +13 -8
  39. autogluon/multimodal/models/clip.py +92 -18
  40. autogluon/multimodal/models/custom_transformer.py +75 -75
  41. autogluon/multimodal/models/document_transformer.py +23 -9
  42. autogluon/multimodal/models/ft_transformer.py +40 -35
  43. autogluon/multimodal/models/fusion/base.py +2 -4
  44. autogluon/multimodal/models/fusion/fusion_mlp.py +82 -18
  45. autogluon/multimodal/models/fusion/fusion_ner.py +1 -1
  46. autogluon/multimodal/models/fusion/fusion_transformer.py +23 -23
  47. autogluon/multimodal/models/{huggingface_text.py → hf_text.py} +21 -2
  48. autogluon/multimodal/models/meta_transformer.py +336 -0
  49. autogluon/multimodal/models/mlp.py +6 -6
  50. autogluon/multimodal/models/mmocr_text_detection.py +1 -1
  51. autogluon/multimodal/models/mmocr_text_recognition.py +0 -1
  52. autogluon/multimodal/models/ner_text.py +1 -8
  53. autogluon/multimodal/models/numerical_mlp.py +14 -8
  54. autogluon/multimodal/models/sam.py +12 -2
  55. autogluon/multimodal/models/t_few.py +21 -5
  56. autogluon/multimodal/models/timm_image.py +74 -32
  57. autogluon/multimodal/models/utils.py +877 -16
  58. autogluon/multimodal/optim/__init__.py +17 -0
  59. autogluon/multimodal/{optimization → optim}/lit_distiller.py +2 -1
  60. autogluon/multimodal/{optimization → optim}/lit_matcher.py +4 -10
  61. autogluon/multimodal/{optimization → optim}/lit_mmdet.py +2 -10
  62. autogluon/multimodal/{optimization → optim}/lit_module.py +139 -14
  63. autogluon/multimodal/{optimization → optim}/lit_ner.py +3 -3
  64. autogluon/multimodal/{optimization → optim}/lit_semantic_seg.py +1 -1
  65. autogluon/multimodal/optim/losses/__init__.py +14 -0
  66. autogluon/multimodal/optim/losses/bce_loss.py +25 -0
  67. autogluon/multimodal/optim/losses/focal_loss.py +81 -0
  68. autogluon/multimodal/optim/losses/lemda_loss.py +39 -0
  69. autogluon/multimodal/optim/losses/rkd_loss.py +103 -0
  70. autogluon/multimodal/optim/losses/softmax_losses.py +177 -0
  71. autogluon/multimodal/optim/losses/structure_loss.py +26 -0
  72. autogluon/multimodal/optim/losses/utils.py +313 -0
  73. autogluon/multimodal/optim/lr/__init__.py +1 -0
  74. autogluon/multimodal/optim/lr/utils.py +332 -0
  75. autogluon/multimodal/optim/metrics/__init__.py +4 -0
  76. autogluon/multimodal/optim/metrics/coverage_metrics.py +42 -0
  77. autogluon/multimodal/optim/metrics/hit_rate_metrics.py +78 -0
  78. autogluon/multimodal/optim/metrics/ranking_metrics.py +231 -0
  79. autogluon/multimodal/optim/metrics/utils.py +359 -0
  80. autogluon/multimodal/optim/utils.py +284 -0
  81. autogluon/multimodal/predictor.py +51 -12
  82. autogluon/multimodal/utils/__init__.py +19 -45
  83. autogluon/multimodal/utils/cache.py +23 -2
  84. autogluon/multimodal/utils/checkpoint.py +58 -5
  85. autogluon/multimodal/utils/config.py +127 -55
  86. autogluon/multimodal/utils/device.py +120 -0
  87. autogluon/multimodal/utils/distillation.py +8 -8
  88. autogluon/multimodal/utils/download.py +1 -1
  89. autogluon/multimodal/utils/env.py +22 -0
  90. autogluon/multimodal/utils/export.py +3 -3
  91. autogluon/multimodal/utils/hpo.py +5 -5
  92. autogluon/multimodal/utils/inference.py +37 -4
  93. autogluon/multimodal/utils/install.py +91 -0
  94. autogluon/multimodal/utils/load.py +52 -47
  95. autogluon/multimodal/utils/log.py +6 -41
  96. autogluon/multimodal/utils/matcher.py +3 -2
  97. autogluon/multimodal/utils/onnx.py +0 -4
  98. autogluon/multimodal/utils/path.py +10 -0
  99. autogluon/multimodal/utils/precision.py +130 -0
  100. autogluon/multimodal/{presets.py → utils/presets.py} +259 -66
  101. autogluon/multimodal/{problem_types.py → utils/problem_types.py} +30 -1
  102. autogluon/multimodal/utils/save.py +47 -29
  103. autogluon/multimodal/utils/strategy.py +24 -0
  104. autogluon/multimodal/version.py +1 -1
  105. {autogluon.multimodal-1.2.1b20250302.dist-info → autogluon.multimodal-1.2.1b20250304.dist-info}/METADATA +5 -5
  106. autogluon.multimodal-1.2.1b20250304.dist-info/RECORD +163 -0
  107. autogluon/multimodal/optimization/__init__.py +0 -16
  108. autogluon/multimodal/optimization/losses.py +0 -394
  109. autogluon/multimodal/optimization/utils.py +0 -1054
  110. autogluon/multimodal/utils/cloud_io.py +0 -80
  111. autogluon/multimodal/utils/data.py +0 -701
  112. autogluon/multimodal/utils/environment.py +0 -395
  113. autogluon/multimodal/utils/metric.py +0 -500
  114. autogluon/multimodal/utils/model.py +0 -558
  115. autogluon.multimodal-1.2.1b20250302.dist-info/RECORD +0 -145
  116. /autogluon/multimodal/{optimization → optim}/deepspeed.py +0 -0
  117. /autogluon/multimodal/{optimization/lr_scheduler.py → optim/lr/lr_schedulers.py} +0 -0
  118. /autogluon/multimodal/{optimization → optim/metrics}/semantic_seg_metrics.py +0 -0
  119. /autogluon/multimodal/{registry.py → utils/registry.py} +0 -0
  120. /autogluon.multimodal-1.2.1b20250302-py3.9-nspkg.pth → /autogluon.multimodal-1.2.1b20250304-py3.9-nspkg.pth +0 -0
  121. {autogluon.multimodal-1.2.1b20250302.dist-info → autogluon.multimodal-1.2.1b20250304.dist-info}/LICENSE +0 -0
  122. {autogluon.multimodal-1.2.1b20250302.dist-info → autogluon.multimodal-1.2.1b20250304.dist-info}/NOTICE +0 -0
  123. {autogluon.multimodal-1.2.1b20250302.dist-info → autogluon.multimodal-1.2.1b20250304.dist-info}/WHEEL +0 -0
  124. {autogluon.multimodal-1.2.1b20250302.dist-info → autogluon.multimodal-1.2.1b20250304.dist-info}/namespace_packages.txt +0 -0
  125. {autogluon.multimodal-1.2.1b20250302.dist-info → autogluon.multimodal-1.2.1b20250304.dist-info}/top_level.txt +0 -0
  126. {autogluon.multimodal-1.2.1b20250302.dist-info → autogluon.multimodal-1.2.1b20250304.dist-info}/zip-safe +0 -0
@@ -1,8 +1,10 @@
1
+ from autogluon.common.utils.log_utils import _add_stream_handler
2
+
1
3
  try:
2
4
  from .version import __version__
3
5
  except ImportError:
4
6
  pass
5
7
 
6
- from . import constants, data, learners, models, optimization, predictor, problem_types, utils
7
8
  from .predictor import MultiModalPredictor
8
- from .utils import download
9
+
10
+ _add_stream_handler()
@@ -7,6 +7,7 @@ data:
7
7
  minimum_cat_count: 100 # The minimum number of occurrences a category must have in the training data to avoid being considered a rare category.
8
8
  maximum_num_cat: 20 # The maximum amount of categories that can be considered non-rare.
9
9
  convert_to_text: False # Whether to convert the feature to text.
10
+ convert_to_text_template: "latex" # The template used to convert categorical to text. Choices are: "direct", "list", "text", "latex".
10
11
  numerical:
11
12
  convert_to_text: False # Whether to convert the feature to text.
12
13
  scaler_with_mean: True # Whether to normalize with mean.
@@ -14,7 +15,7 @@ data:
14
15
  document:
15
16
  missing_value_strategy: "zero" # How to deal with missing documents. By default, we use a zero document image to replace a missing document. We also support "skip", i.e., skipping a sample with missing documents.
16
17
  label:
17
- numerical_label_preprocessing: "standardscaler" # The mode of label preprocessing for . Support "standardscaler" or "minmaxscaler" or "none" / None (means no transform).
18
+ numerical_preprocessing: "standardscaler" # The mode of numerical label preprocessing for . Support "standardscaler" or "minmaxscaler" or None (means no transform).
18
19
  pos_label: # The name of binary classification's positive class. It's used in computing some metrics, e.g., roc_auc. If not provided, then use label_encoder.classes_[1],
19
20
  column_features_pooling_mode: "concat" # How to pool multi-column features into one feature vector. Currently only support "concat" or "mean" for few shot classification.
20
21
  mixup:
@@ -22,11 +23,12 @@ data:
22
23
  mixup_alpha: 0.8 # Mixup alpha.
23
24
  cutmix_alpha: 1.0 # Cutmix alpha.
24
25
  cutmix_minmax: # Cutmix min/max ratio, it will override cutmix alpha if set, a list/tuple with size two.
25
- prob: 1.0 # The probability of conducting mixup/cutmix if enable.
26
+ prob: 1.0 # The probability of conducting mixup/cutmix if enabled.
26
27
  switch_prob: 0.5 # The probability of switching mixup to cutmix if both enable.
27
28
  mode: "batch" # Perform mixup/cutmix on "batch" or "pair" or "elem".
28
29
  turn_off_epoch: 5 # The epoch when the mixup will be turned off.
29
30
  label_smoothing: 0.1 # Label smoothing.
31
+ modality_dropout: 0
30
32
  templates:
31
33
  turn_on: False
32
34
  num_templates: 30 # The number of templates to sample from uniformly.
@@ -3,11 +3,10 @@ env:
3
3
  num_nodes: 1
4
4
  batch_size: 128 # this is a desired batch size; pl trainer will accumulate gradients when per step batch is smaller.
5
5
  per_gpu_batch_size: 8 # training per gpu batch size
6
- eval_batch_size_ratio: 4 # per_gpu_batch_size_evaluation = per_gpu_batch_size * eval_batch_size_ratio
7
- per_gpu_batch_size_evaluation: # This is deprecated. Use eval_batch_size_ratio instead.
6
+ inference_batch_size_ratio: 4 # per_gpu_batch_size_for_inference = per_gpu_batch_size * inference_batch_size_ratio
8
7
  precision: "16-mixed" # training precision. Refer to https://lightning.ai/docs/pytorch/stable/common/trainer.html#precision
9
8
  num_workers: 2 # pytorch training dataloader workers.
10
- num_workers_evaluation: 2 # pytorch prediction/test dataloader workers.
9
+ num_workers_inference: 2 # pytorch prediction/test dataloader workers.
11
10
  accelerator: "auto" # "cpu", "gpu", or "auto"
12
11
  fast_dev_run: False
13
12
  deterministic: False
@@ -4,21 +4,23 @@ model:
4
4
  hidden_size: 64
5
5
  activation: "leaky_relu"
6
6
  num_layers: 1
7
- drop_rate: 0.1
7
+ dropout: 0.1
8
8
  normalization: "layer_norm"
9
9
  data_types:
10
10
  - "categorical"
11
+
11
12
  numerical_mlp:
12
13
  hidden_size: 128
13
14
  activation: "leaky_relu"
14
15
  num_layers: 1
15
- drop_rate: 0.1
16
+ dropout: 0.1
16
17
  normalization: "layer_norm"
17
- d_token: 8
18
+ token_dim: 8
18
19
  embedding_arch:
19
20
  data_types:
20
21
  - "numerical"
21
22
  merge: "concat"
23
+
22
24
  hf_text:
23
25
  checkpoint_name: "google/electra-base-discriminator"
24
26
  gradient_checkpointing: False
@@ -39,6 +41,7 @@ model:
39
41
  # - "random_delete(0.05)" # less than 0.1 based on eda paper
40
42
  # - "syn_replacement(0.05)" # less than 0.1 based on eda paper
41
43
  # - "insert_punc(0.05)"
44
+
42
45
  ner_text:
43
46
  checkpoint_name: "bert-base-cased"
44
47
  max_text_len: 512
@@ -53,6 +56,7 @@ model:
53
56
  special_tags:
54
57
  - X # CLS, SEP, and non-first tokens of a word will be labelled as X
55
58
  - O # Outside of a named entity
59
+
56
60
  document_transformer:
57
61
  checkpoint_name: "microsoft/layoutlmv3-base" # document foundation models
58
62
  gradient_checkpointing: False
@@ -75,6 +79,7 @@ model:
75
79
  stochastic_chunk: False
76
80
  text_aug_detect_length: 10 # We perform text augmentation only if a text has more than text_detection_length words. It is used to differentiate text columns versus tabular columns that are treated as text.
77
81
  text_trivial_aug_maxscale: 0.0 # augmentation magnitude randomly drawn from [0, text_trivial_aug_maxscale]
82
+
78
83
  t_few:
79
84
  checkpoint_name: "t5-small" #"bigscience/T0_3B"
80
85
  gradient_checkpointing: False
@@ -91,6 +96,8 @@ model:
91
96
  stochastic_chunk: False
92
97
  text_aug_detect_length: 10 # We perform text augmentation only if a text has more than text_detection_length words. It is used to differentiate text columns versus tabular columns that are treated as text.
93
98
  text_trivial_aug_maxscale: 0.0 # augmentation magnititude randomly drawn from [0, text_trivial_aug_maxscale]
99
+ text_train_augment_types:
100
+
94
101
  timm_image:
95
102
  checkpoint_name: "swin_base_patch4_window7_224"
96
103
  mix_choice: "all_logits"
@@ -105,7 +112,9 @@ model:
105
112
  - "center_crop"
106
113
  image_norm: "imagenet"
107
114
  image_size: null
108
- max_img_num_per_col: 2
115
+ image_chan_num: 3
116
+ use_learnable_image: False
117
+ max_image_num_per_column: 1
109
118
 
110
119
  mmdet_image:
111
120
  checkpoint_name: "yolov3_mobilenetv2_8xb24-320-300e_coco"
@@ -161,7 +170,9 @@ model:
161
170
  - "center_crop"
162
171
  image_norm: "clip"
163
172
  image_size: 224
164
- max_img_num_per_col: 2
173
+ image_chan_num: 3
174
+ use_learnable_image: False
175
+ max_image_num_per_column: 1
165
176
  tokenizer_name: "clip"
166
177
  max_text_len: 77 # The maximum possible length.
167
178
  insert_sep: False
@@ -174,15 +185,17 @@ model:
174
185
  # - "random_delete(0.05)" # less than 0.1 based on eda paper
175
186
  # - "syn_replacement(0.05)" # less than 0.1 based on eda paper
176
187
  # - "insert_punc(0.05)"
188
+
177
189
  fusion_mlp:
178
- weight: 0.1
190
+ aux_loss_weight:
179
191
  adapt_in_features: "max"
180
192
  hidden_sizes:
181
193
  - 128
182
194
  activation: "leaky_relu"
183
- drop_rate: 0.1
195
+ dropout: 0.1
184
196
  normalization: "layer_norm"
185
197
  data_types:
198
+
186
199
  fusion_ner:
187
200
  weight:
188
201
  adapt_in_features: "max"
@@ -192,21 +205,24 @@ model:
192
205
  drop_rate: 0.1
193
206
  normalization: "layer_norm"
194
207
  data_types:
208
+
195
209
  fusion_transformer:
210
+ aux_loss_weight:
196
211
  hidden_size: 192
197
- n_blocks: 3
198
- attention_n_heads: 8
212
+ num_blocks: 3
213
+ attention_num_heads: 8
199
214
  adapt_in_features: "max"
200
215
  attention_dropout: 0.2
201
216
  residual_dropout: 0.0
202
217
  ffn_dropout: 0.1
203
- ffn_d_hidden: 192
218
+ ffn_hidden_size: 192
204
219
  normalization: "layer_norm"
205
220
  ffn_activation: "geglu"
206
221
  head_activation: "relu"
207
222
  data_types:
208
223
  additive_attention: False # Whether to use lightweight additive attention, can be True, False or "auto"
209
224
  share_qv_weights: False # Whether to share weight for query and value, can be True, False or "auto"
225
+
210
226
  ft_transformer:
211
227
  data_types:
212
228
  - "categorical"
@@ -216,7 +232,7 @@ model:
216
232
  token_dim: 192
217
233
  hidden_size: 192
218
234
  num_blocks: 3
219
- attention_n_heads: 8
235
+ attention_num_heads: 8
220
236
  attention_dropout: 0.2
221
237
  residual_dropout: 0.0
222
238
  ffn_dropout: 0.1
@@ -247,3 +263,34 @@ model:
247
263
  frozen_layers: ["mask_decoder.iou_prediction_head", "prompt_encoder"]
248
264
  num_mask_tokens: 1
249
265
  ignore_label: 255
266
+
267
+ meta_transformer:
268
+ data_types:
269
+ - "image"
270
+ - "text"
271
+ - "categorical"
272
+ - "numerical"
273
+ checkpoint_path: null
274
+ model_version: "base"
275
+ requires_all_dtypes: False
276
+ train_transforms:
277
+ - "resize_shorter_side"
278
+ - "center_crop"
279
+ - "trivial_augment"
280
+ val_transforms:
281
+ - "resize_shorter_side"
282
+ - "center_crop"
283
+ image_norm: "imagenet"
284
+ image_size: 224
285
+ image_chan_num: 3
286
+ use_learnable_image: False
287
+ max_image_num_per_column: 1
288
+ tokenizer_name: "hf_auto"
289
+ max_text_len: 512 # If None or <=0, then use the max length of pretrained models.
290
+ insert_sep: True
291
+ text_segment_num: 2
292
+ stochastic_chunk: False
293
+ text_aug_detect_length: 10 # We perform text augmentation only if a text has more than text_detection_length words. It is used to differentiate text columns versus tabular columns that are treated as text.
294
+ text_trivial_aug_maxscale: 0.1 # augmentation magnitude randomly drawn from [0, text_trivial_aug_maxscale]
295
+ text_train_augment_types:
296
+ merge: "concat"
@@ -1,6 +1,6 @@
1
- optimization:
1
+ optim:
2
2
  optim_type: "adamw"
3
- learning_rate: 1.0e-4
3
+ lr: 1.0e-4
4
4
  weight_decay: 0.001
5
5
  lr_choice: "layerwise_decay"
6
6
  lr_decay: 0.9
@@ -18,11 +18,12 @@ optimization:
18
18
  gradient_clip_algorithm: "norm"
19
19
  track_grad_norm: -1 # Whether to check gradient norm. We can set it to 2 to check for gradient norm.
20
20
  log_every_n_steps: 10
21
+ label_smoothing: 0
21
22
  top_k: 3
22
23
  top_k_average_method:
23
24
  "greedy_soup" # We support averaging method described in https://arxiv.org/pdf/2203.05482.pdf.
24
25
  # Currently support "uniform_soup", "greedy_soup", and "best".
25
- efficient_finetune: null # Can be 'bit_fit' (only finetune bias), 'norm_fit' (finetune the normalization terms + bias terms), lora (LoRA Adaptations only), lora_bias (LoRA Adaptation + bit_fit), lora_norm (LoRA Adaptation + norm_fit), or null
26
+ peft: null # Can be 'bit_fit' (only finetune bias), 'norm_fit' (finetune the normalization terms + bias terms), lora (LoRA Adaptations only), lora_bias (LoRA Adaptation + bit_fit), lora_norm (LoRA Adaptation + norm_fit), or null
26
27
  lora:
27
28
  module_filter: null # Specify which module (if any) to adapt(e.g. ".*EncDecAttention|.*DenseReluDense"). Default consider all modules in a model (i.e. empty filter).
28
29
  filter: # Specify which layer in a module to adapt. Default fine-tune only query and value attention weights, recommended in https://arxiv.org/abs/2106.09685
@@ -35,7 +36,7 @@ optimization:
35
36
  r: 8
36
37
  alpha: 8
37
38
  conv_lora_expert_num: 8 # default setting for Conv-LoRA
38
- loss_function:
39
+ loss_func:
39
40
  "auto" # The replaced loss for regression. Can only support loss function in torch.nn.
40
41
  # example
41
42
  # "BCEWithLogitsLoss" or "nn.BCEWithLogitsloss"
@@ -48,3 +49,19 @@ optimization:
48
49
  loss_mask_weight: 5.0
49
50
  loss_dice_weight: 5.0
50
51
  extra_trainable_params: []
52
+ cross_modal_align: null
53
+ cross_modal_align_weight: 0
54
+ automatic_optimization: True
55
+ lemda:
56
+ turn_on: False
57
+ arch_type: "mlp_vae"
58
+ z_dim: 8
59
+ num_layers: 6
60
+ kld_weight: 0.1
61
+ mse_weight: 0.1
62
+ adv_weight: 1.0e-4
63
+ consist_weight: 0.01
64
+ consist_threshold: 0.5
65
+ lr: 1.0e-4
66
+ optim_type: "adamw"
67
+ weight_decay: 1.0e-5
@@ -59,12 +59,17 @@ SEMANTIC_SEGMENTATION_GT = "semantic_segmentation_gt"
59
59
 
60
60
  # Output keys
61
61
  LOGITS = "logits"
62
+ ORI_LOGITS = "ori_logits"
63
+ AUG_LOGITS = "aug_logits"
62
64
  TEMPLATE_LOGITS = "template_logits"
63
65
  LM_TARGET = "lm_target"
64
66
  LOSS = "loss"
65
67
  OUTPUT = "output"
66
68
  WEIGHT = "weight"
67
69
  FEATURES = "features"
70
+ MULTIMODAL_FEATURES = "multimodal_features" # used for the adapted multimodal features before the fusion module
71
+ MULTIMODAL_FEATURES_PRE_AUG = "multimodal_features_pre_aug"
72
+ MULTIMODAL_FEATURES_POST_AUG = "multimodal_features_post_aug"
68
73
  RAW_FEATURES = "raw_features"
69
74
  MASKS = "masks"
70
75
  PROBABILITY = "probability"
@@ -73,6 +78,8 @@ BBOX = "bbox"
73
78
  ROIS = "rois"
74
79
  SCORE = "score"
75
80
  LOGIT_SCALE = "logit_scale"
81
+ VAE_MEAN = "vae_mean"
82
+ VAE_VAR = "vae_var"
76
83
 
77
84
  # Loss
78
85
  MOE_LOSS = "moe_loss"
@@ -142,6 +149,7 @@ FM = "fm"
142
149
  MAE = "mae"
143
150
  BER = "ber"
144
151
  IOU = "iou"
152
+ COVERAGE = "coverage"
145
153
  RETRIEVAL_METRICS = [NDCG, PRECISION, RECALL, MRR]
146
154
  METRIC_MODE_MAP = {
147
155
  ACC: MAX,
@@ -168,6 +176,7 @@ METRIC_MODE_MAP = {
168
176
  SM: MAX,
169
177
  IOU: MAX,
170
178
  BER: MIN,
179
+ COVERAGE: MAX,
171
180
  }
172
181
 
173
182
  MATCHING_METRICS = {
@@ -179,7 +188,7 @@ MATCHING_METRICS_WITHOUT_PROBLEM_TYPE = [RECALL, NDCG]
179
188
 
180
189
  EVALUATION_METRICS = {
181
190
  # Use evaluation metrics from METRICS for these types
182
- BINARY: METRICS[BINARY].keys(),
191
+ BINARY: list(METRICS[BINARY].keys()) + [COVERAGE],
183
192
  MULTICLASS: METRICS[MULTICLASS].keys(),
184
193
  REGRESSION: METRICS[REGRESSION].keys(),
185
194
  OBJECT_DETECTION: DETECTION_METRICS,
@@ -197,6 +206,7 @@ VALIDATION_METRICS = {
197
206
  # Training status
198
207
  TRAIN = "train"
199
208
  VALIDATE = "validate"
209
+ VAL = "val"
200
210
  TEST = "test"
201
211
  PREDICT = "predict"
202
212
 
@@ -217,11 +227,11 @@ Y_TRUE = "y_true"
217
227
  # Configuration keys
218
228
  MODEL = "model"
219
229
  DATA = "data"
220
- OPTIMIZATION = "optimization"
221
- ENVIRONMENT = "environment"
230
+ OPTIM = "optim"
231
+ ENV = "env"
222
232
  DISTILLER = "distiller"
223
233
  MATCHER = "matcher"
224
- VALID_CONFIG_KEYS = [MODEL, DATA, OPTIMIZATION, ENVIRONMENT, DISTILLER, MATCHER]
234
+ VALID_CONFIG_KEYS = [MODEL, DATA, OPTIM, ENV, DISTILLER, MATCHER]
225
235
 
226
236
  # Image normalization mean and std. This is only to normalize images for the CLIP model.
227
237
  CLIP_IMAGE_MEAN = (0.48145466, 0.4578275, 0.40821073)
@@ -275,7 +285,7 @@ PEFT_STRATEGIES = list(set(PEFT_ADDITIVE_STRATEGIES) | set(PEFT_NON_ADDITIVE_STR
275
285
  # DeepSpeed constants
276
286
  DEEPSPEED_OFFLOADING = "deepspeed_stage_3_offload"
277
287
  DEEPSPEED_STRATEGY = "deepspeed"
278
- DEEPSPEED_MODULE = "autogluon.multimodal.optimization.deepspeed"
288
+ DEEPSPEED_MODULE = "autogluon.multimodal.optim.deepspeed"
279
289
  DEEPSPEED_MIN_PL_VERSION = "1.7.1"
280
290
 
281
291
  # registered model keys. TODO: document how to add new models.
@@ -298,6 +308,7 @@ DOCUMENT_TRANSFORMER = "document_transformer"
298
308
  HF_MODELS = (HF_TEXT, T_FEW, CLIP, NER_TEXT, DOCUMENT_TRANSFORMER)
299
309
  MMLAB_MODELS = (MMDET_IMAGE, MMOCR_TEXT_DET, MMOCR_TEXT_RECOG)
300
310
  SAM = "sam"
311
+ META_TRANSFORMER = "meta_transformer"
301
312
 
302
313
  # matcher loss type
303
314
  CONTRASTIVE_LOSS = "contrastive_loss"
@@ -1,4 +1,3 @@
1
- from . import collator, infer_types, randaug, utils
2
1
  from .datamodule import BaseDataModule
3
2
  from .dataset import BaseDataset
4
3
  from .dataset_mmlab import MultiImageMixDataset
@@ -9,8 +8,9 @@ from .infer_types import (
9
8
  infer_rois_column_type,
10
9
  is_image_column,
11
10
  )
12
- from .label_encoder import CustomLabelEncoder, NerLabelEncoder
13
11
  from .mixup import MixupModule
12
+ from .infer_types import infer_column_types, infer_output_shape, infer_problem_type, is_image_column, infer_ner_column_type
13
+ from .label_encoder import CustomLabelEncoder, NerLabelEncoder
14
14
  from .preprocess_dataframe import MultiModalFeaturePreprocessor
15
15
  from .process_categorical import CategoricalProcessor
16
16
  from .process_document import DocumentProcessor
@@ -21,3 +21,15 @@ from .process_ner import NerProcessor
21
21
  from .process_numerical import NumericalProcessor
22
22
  from .process_semantic_seg_img import SemanticSegImageProcessor
23
23
  from .process_text import TextProcessor
24
+ from .utils import (
25
+ create_data_processor,
26
+ create_fusion_data_processors,
27
+ data_to_df,
28
+ get_detected_data_types,
29
+ get_mixup,
30
+ infer_dtypes_by_model_names,
31
+ infer_scarcity_mode_by_data_size,
32
+ init_df_preprocessor,
33
+ split_train_tuning_data,
34
+ turn_on_off_feature_column_info,
35
+ )
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Union
4
4
  import pandas as pd
5
5
  import torch
6
6
 
7
- from ..constants import AUTOMM, GET_ITEM_ERROR_RETRY
7
+ from ..constants import GET_ITEM_ERROR_RETRY
8
8
  from .preprocess_dataframe import MultiModalFeaturePreprocessor
9
9
  from .utils import apply_data_processor, apply_df_preprocessor, get_per_sample_features
10
10
 
@@ -100,7 +100,7 @@ class BaseDataset(torch.utils.data.Dataset):
100
100
  per_ret = apply_data_processor(
101
101
  per_sample_features=per_sample_features,
102
102
  data_processors=per_processors_group,
103
- feature_modalities=getattr(self, f"modality_types_{group_id}"),
103
+ data_types=getattr(self, f"modality_types_{group_id}"),
104
104
  is_training=self.is_training,
105
105
  )
106
106
  ret.update(per_ret)
@@ -19,7 +19,6 @@ from ..constants import (
19
19
  DOCUMENT_IMAGE,
20
20
  DOCUMENT_PDF,
21
21
  IDENTIFIER,
22
- IMAGE,
23
22
  IMAGE_BASE64_STR,
24
23
  IMAGE_BYTEARRAY,
25
24
  IMAGE_PATH,
@@ -37,7 +36,6 @@ from ..constants import (
37
36
  TEXT,
38
37
  TEXT_NER,
39
38
  )
40
- from .utils import is_rois_input
41
39
 
42
40
  logger = logging.getLogger(__name__)
43
41
 
@@ -114,6 +112,22 @@ def is_categorical_column(
114
112
  return False
115
113
 
116
114
 
115
+ def is_rois_input(sample):
116
+ """
117
+ check if a sample is rois for object detection
118
+
119
+ Parameters
120
+ ----------
121
+ sample
122
+ The sampled data.
123
+
124
+ Returns
125
+ -------
126
+ bool, whether a sample is rois for object detection
127
+ """
128
+ return isinstance(sample, list) and len(sample) and isinstance(sample[0], list) and len(sample[0]) == 5
129
+
130
+
117
131
  def is_rois_column(data: pd.Series) -> bool:
118
132
  """
119
133
  Identify if a column is one rois column.
@@ -9,7 +9,7 @@ import pandas as pd
9
9
  from omegaconf import DictConfig, OmegaConf
10
10
  from sklearn.preprocessing import LabelEncoder
11
11
 
12
- from ..constants import AUTOMM, END_OFFSET, ENTITY_GROUP, NER_ANNOTATION, PROBABILITY, START_OFFSET
12
+ from ..constants import END_OFFSET, ENTITY_GROUP, PROBABILITY, START_OFFSET
13
13
 
14
14
  logger = logging.getLogger(__name__)
15
15
 
@@ -137,12 +137,12 @@ class NerLabelEncoder:
137
137
  transformed_y
138
138
  A list of word level annotations.
139
139
  """
140
- from .utils import process_ner_annotations
140
+ from .process_ner import NerProcessor
141
141
 
142
142
  all_annotations, _ = self.extract_ner_annotations(y)
143
143
  transformed_y = []
144
144
  for annotation, text_snippet in zip(all_annotations, x.items()):
145
- word_label, _, _, _ = process_ner_annotations(
145
+ word_label, _, _, _ = NerProcessor.process_ner_annotations(
146
146
  annotation, text_snippet[-1], self.entity_map, tokenizer, is_eval=True
147
147
  )
148
148
  word_label_invers = []
@@ -78,14 +78,14 @@ class InsertPunctuation(Augmenter):
78
78
  new = " ".join(new)
79
79
  return new
80
80
 
81
- @classmethod
82
- def clean(cls, data):
81
+ @staticmethod
82
+ def clean(data):
83
83
  if isinstance(data, list):
84
84
  return [d.strip() if d else d for d in data]
85
85
  return data.strip()
86
86
 
87
- @classmethod
88
- def is_duplicate(cls, dataset, data):
87
+ @staticmethod
88
+ def is_duplicate(dataset, data):
89
89
  for d in dataset:
90
90
  if d == data:
91
91
  return True