autogluon.features 1.5.1b20260107__tar.gz → 1.5.1b20260109__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {autogluon_features-1.5.1b20260107/src/autogluon.features.egg-info → autogluon_features-1.5.1b20260109}/PKG-INFO +2 -2
  2. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/abstract.py +41 -0
  3. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/auto_ml_pipeline.py +50 -14
  4. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/bulk.py +8 -3
  5. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/identity.py +6 -0
  6. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/version.py +1 -1
  7. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109/src/autogluon.features.egg-info}/PKG-INFO +2 -2
  8. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon.features.egg-info/requires.txt +1 -1
  9. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/LICENSE +0 -0
  10. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/NOTICE +0 -0
  11. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/README.md +0 -0
  12. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/setup.cfg +0 -0
  13. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/setup.py +0 -0
  14. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/__init__.py +0 -0
  15. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/binning.py +0 -0
  16. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/__init__.py +0 -0
  17. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/__init__.py +0 -0
  18. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/_numba_opt.py +0 -0
  19. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/canonical_key.py +0 -0
  20. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/combinations.py +0 -0
  21. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/combinations_lite.py +0 -0
  22. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/filtering.py +0 -0
  23. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/memory.py +0 -0
  24. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/operation.py +0 -0
  25. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/arithmetic/preprocessor.py +0 -0
  26. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/astype.py +0 -0
  27. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/binned.py +0 -0
  28. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/cat_as_num.py +0 -0
  29. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/cat_int.py +0 -0
  30. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/category.py +0 -0
  31. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/datetime.py +0 -0
  32. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/drop_duplicates.py +0 -0
  33. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/drop_unique.py +0 -0
  34. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/dummy.py +0 -0
  35. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/fillna.py +0 -0
  36. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/frequency.py +0 -0
  37. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/isnan.py +0 -0
  38. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/label_encoder.py +0 -0
  39. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/memory_minimize.py +0 -0
  40. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/one_hot_encoder.py +0 -0
  41. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/oof_target_encoder.py +0 -0
  42. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/pipeline.py +0 -0
  43. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/rename.py +0 -0
  44. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/skrub/__init__.py +0 -0
  45. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/skrub/_sklearn_compat.py +0 -0
  46. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/skrub/_squashing_scaler.py +0 -0
  47. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/text_ngram.py +0 -0
  48. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/generators/text_special.py +0 -0
  49. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/utils.py +0 -0
  50. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon/features/vectorizers.py +0 -0
  51. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon.features.egg-info/SOURCES.txt +0 -0
  52. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon.features.egg-info/dependency_links.txt +0 -0
  53. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon.features.egg-info/namespace_packages.txt +0 -0
  54. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon.features.egg-info/top_level.txt +0 -0
  55. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/src/autogluon.features.egg-info/zip-safe +0 -0
  56. {autogluon_features-1.5.1b20260107 → autogluon_features-1.5.1b20260109}/tests/test_check_style.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autogluon.features
3
- Version: 1.5.1b20260107
3
+ Version: 1.5.1b20260109
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -38,7 +38,7 @@ License-File: NOTICE
38
38
  Requires-Dist: numpy<2.4.0,>=1.25.0
39
39
  Requires-Dist: pandas<2.4.0,>=2.0.0
40
40
  Requires-Dist: scikit-learn<1.8.0,>=1.4.0
41
- Requires-Dist: autogluon.common==1.5.1b20260107
41
+ Requires-Dist: autogluon.common==1.5.1b20260109
42
42
  Dynamic: author
43
43
  Dynamic: classifier
44
44
  Dynamic: description
@@ -558,6 +558,27 @@ class AbstractFeatureGenerator:
558
558
  def get_default_infer_features_in_args() -> dict:
559
559
  raise NotImplementedError
560
560
 
561
+ @staticmethod
562
+ def get_infer_features_in_args_to_drop() -> dict:
563
+ """Return a dict of kwargs for FeatureMetadata.get_features().
564
+
565
+ This allows to specify which features should be dropped after running this
566
+ feature generator in a feature generator group.
567
+
568
+ For example, assume you are using a feature generator to apply PCA to all
569
+ features of special type S_TEXT_EMBEDDING, then this function could return:
570
+ {
571
+ "invalid_special_types": [S_TEXT_EMBEDDING]
572
+ }
573
+ to inform the user that all S_TEXT_EMBEDDING features that are captured by PCA
574
+ should be dropped from the output of the feature generator group.
575
+ """
576
+ return {}
577
+
578
+ def estimate_output_feature_metadata(self, feature_metadata_in: FeatureMetadata, **kwargs) -> FeatureMetadata:
579
+ """Return an estimated representation of the feature metadata after fit_transform."""
580
+ raise NotImplementedError("This method is not implemented for this generator.")
581
+
561
582
  def _fit_generators(
562
583
  self, X, y, feature_metadata, generators: list["AbstractFeatureGenerator"], **kwargs
563
584
  ) -> (DataFrame, FeatureMetadata, list):
@@ -918,3 +939,23 @@ class AbstractFeatureGenerator:
918
939
  more_tags = base_class._more_tags(self)
919
940
  collected_tags.update(more_tags)
920
941
  return collected_tags
942
+
943
+
944
+ # FIXME: this logic still needs more work to become general purpose.
945
+ # - Needs to make it work for multiple feature generator groups
946
+ # - Need to support for all possible feature generators
947
+ def estimate_feature_metadata_after_generators(
948
+ *, feature_generators: list[list[AbstractFeatureGenerator]] | None, feature_metadata_in: FeatureMetadata, **kwargs
949
+ ) -> FeatureMetadata:
950
+ """Estimate the feature metadata after applying a set of feature generators."""
951
+ feature_metadata = copy.deepcopy(feature_metadata_in)
952
+ if feature_generators is not None:
953
+ for fg_group in feature_generators:
954
+ feature_metadatas = [
955
+ fg.estimate_output_feature_metadata(feature_metadata_in=feature_metadata, **kwargs) for fg in fg_group
956
+ ]
957
+ feature_metadata = FeatureMetadata.join_metadatas(
958
+ feature_metadatas,
959
+ shared_raw_features="error",
960
+ )
961
+ return feature_metadata
@@ -1,6 +1,17 @@
1
1
  import logging
2
+ from enum import Enum
2
3
 
3
- from autogluon.common.features.types import R_FLOAT, R_INT, R_OBJECT, S_IMAGE_BYTEARRAY, S_IMAGE_PATH, S_TEXT
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+
6
+ from autogluon.common.features.types import (
7
+ R_FLOAT,
8
+ R_INT,
9
+ R_OBJECT,
10
+ S_IMAGE_BYTEARRAY,
11
+ S_IMAGE_PATH,
12
+ S_TEXT,
13
+ )
14
+ from autogluon.features.generators.abstract import AbstractFeatureGenerator
4
15
 
5
16
  from .category import CategoryFeatureGenerator
6
17
  from .datetime import DatetimeFeatureGenerator
@@ -17,8 +28,8 @@ logger = logging.getLogger(__name__)
17
28
  # TODO: write out in English the full set of transformations that are applied (and eventually host page on website).
18
29
  # Also explicitly write out all of the feature-generator "hyperparameters" that might affect the results from the AutoML FeatureGenerator
19
30
  class AutoMLPipelineFeatureGenerator(PipelineFeatureGenerator):
20
- """
21
- Pipeline feature generator with simplified arguments to handle most Tabular data including text and dates adequately.
31
+ """Pipeline feature generator with simplified arguments to handle most Tabular data including text and dates adequately.
32
+
22
33
  This is the default feature generation pipeline used by AutoGluon when unspecified.
23
34
  For more customization options, refer to :class:`PipelineFeatureGenerator` and :class:`BulkFeatureGenerator`.
24
35
 
@@ -56,6 +67,17 @@ class AutoMLPipelineFeatureGenerator(PipelineFeatureGenerator):
56
67
  vectorizer : :class:`sklearn.feature_extraction.text.CountVectorizer`, default CountVectorizer(min_df=30, ngram_range=(1, 3), max_features=10000, dtype=np.uint8) # noqa
57
68
  sklearn CountVectorizer object to use in :class:`TextNgramFeatureGenerator`.
58
69
  Only used if `enable_text_ngram_features=True`.
70
+ text_ngram_params : dict, default None
71
+ Parameters besides vectorizer passed to the :class:`TextNgramFeatureGenerator`.
72
+ custom_feature_generators : list of :class:`AbstractFeatureGenerator`, default None
73
+ Lists of custom feature generators. This list is inserted in the first generator
74
+ step that is getting the original X data (i.e. before any other feature
75
+ generators are applied) as input.
76
+ Note, there might be an overlap of custom feature generators with the default
77
+ feature generators used in AutoMLPipelineFeatureGenerator. It is the user's
78
+ responsibility to avoid unwanted overlap and disable default generators if
79
+ needed.
80
+ If None, no custom feature generators are added.
59
81
  **kwargs :
60
82
  Refer to :class:`AbstractFeatureGenerator` documentation for details on valid key word arguments.
61
83
 
@@ -80,15 +102,16 @@ class AutoMLPipelineFeatureGenerator(PipelineFeatureGenerator):
80
102
 
81
103
  def __init__(
82
104
  self,
83
- enable_numeric_features=True,
84
- enable_categorical_features=True,
85
- enable_datetime_features=True,
86
- enable_text_special_features=True,
87
- enable_text_ngram_features=True,
88
- enable_raw_text_features=False,
89
- enable_vision_features=True,
90
- vectorizer=None,
91
- text_ngram_params=None,
105
+ enable_numeric_features: bool = True,
106
+ enable_categorical_features: bool = True,
107
+ enable_datetime_features: bool = True,
108
+ enable_text_special_features: bool = True,
109
+ enable_text_ngram_features: bool = True,
110
+ enable_raw_text_features: bool = False,
111
+ enable_vision_features: bool = True,
112
+ vectorizer: CountVectorizer | None = None,
113
+ text_ngram_params: dict | None = None,
114
+ custom_feature_generators: list[AbstractFeatureGenerator] | None = None,
92
115
  **kwargs,
93
116
  ):
94
117
  if "generators" in kwargs:
@@ -111,12 +134,15 @@ class AutoMLPipelineFeatureGenerator(PipelineFeatureGenerator):
111
134
  self.enable_raw_text_features = enable_raw_text_features
112
135
  self.enable_vision_features = enable_vision_features
113
136
  self.text_ngram_params = text_ngram_params if text_ngram_params else {}
137
+ self.custom_feature_generators = custom_feature_generators
114
138
 
115
139
  generators = self._get_default_generators(vectorizer=vectorizer)
116
140
  super().__init__(generators=generators, **kwargs)
117
141
 
142
+ # TODO: switch to / add skrub's String or Text encoders
118
143
  def _get_default_generators(self, vectorizer=None):
119
144
  generator_group = []
145
+
120
146
  if self.enable_numeric_features:
121
147
  generator_group.append(
122
148
  IdentityFeatureGenerator(infer_features_in_args=dict(valid_raw_types=[R_INT, R_FLOAT]))
@@ -125,7 +151,8 @@ class AutoMLPipelineFeatureGenerator(PipelineFeatureGenerator):
125
151
  generator_group.append(
126
152
  IdentityFeatureGenerator(
127
153
  infer_features_in_args=dict(
128
- required_special_types=[S_TEXT], invalid_special_types=[S_IMAGE_PATH, S_IMAGE_BYTEARRAY]
154
+ required_special_types=[S_TEXT],
155
+ invalid_special_types=[S_IMAGE_PATH, S_IMAGE_BYTEARRAY],
129
156
  ),
130
157
  name_suffix="_raw_text",
131
158
  )
@@ -157,6 +184,13 @@ class AutoMLPipelineFeatureGenerator(PipelineFeatureGenerator):
157
184
  )
158
185
  )
159
186
  )
187
+
188
+ if self.custom_feature_generators is not None:
189
+ generator_group = [
190
+ *generator_group,
191
+ *self.custom_feature_generators,
192
+ ]
193
+
160
194
  generators = [generator_group]
161
195
  return generators
162
196
 
@@ -167,5 +201,7 @@ class AutoMLPipelineFeatureGenerator(PipelineFeatureGenerator):
167
201
  class AutoMLInterpretablePipelineFeatureGenerator(AutoMLPipelineFeatureGenerator):
168
202
  def _get_category_feature_generator(self):
169
203
  return CategoryFeatureGenerator(
170
- minimize_memory=False, maximum_num_cat=10, post_generators=[OneHotEncoderFeatureGenerator()]
204
+ minimize_memory=False,
205
+ maximum_num_cat=10,
206
+ post_generators=[OneHotEncoderFeatureGenerator()],
171
207
  )
@@ -93,7 +93,7 @@ class BulkFeatureGenerator(AbstractFeatureGenerator):
93
93
  def __init__(
94
94
  self,
95
95
  generators: list[list[AbstractFeatureGenerator | list]],
96
- pre_generators: list[AbstractFeatureGenerator] = None,
96
+ pre_generators: list[AbstractFeatureGenerator | List[AbstractFeatureGenerator]] = None,
97
97
  remove_unused_features: bool | str = True,
98
98
  **kwargs,
99
99
  ):
@@ -143,10 +143,15 @@ class BulkFeatureGenerator(AbstractFeatureGenerator):
143
143
 
144
144
  pre_generators = [AsTypeFeatureGenerator()] + pre_generators
145
145
  self.pre_enforce_types = False
146
- pre_generators = [[pre_generator] for pre_generator in pre_generators]
146
+ pre_generators = [
147
+ pre_generator if isinstance(pre_generator, list) else [pre_generator] for pre_generator in pre_generators
148
+ ]
147
149
 
148
150
  if self._post_generators is not None:
149
- post_generators = [[post_generator] for post_generator in self._post_generators]
151
+ post_generators = [
152
+ post_generator if isinstance(post_generator, list) else [post_generator]
153
+ for post_generator in self._post_generators
154
+ ]
150
155
  self._post_generators = []
151
156
  else:
152
157
  post_generators = []
@@ -2,6 +2,8 @@ import logging
2
2
 
3
3
  from pandas import DataFrame
4
4
 
5
+ from autogluon.common.features.feature_metadata import FeatureMetadata
6
+
5
7
  from .abstract import AbstractFeatureGenerator
6
8
 
7
9
  logger = logging.getLogger(__name__)
@@ -23,3 +25,7 @@ class IdentityFeatureGenerator(AbstractFeatureGenerator):
23
25
 
24
26
  def _more_tags(self):
25
27
  return {"feature_interactions": False}
28
+
29
+ def estimate_output_feature_metadata(self, feature_metadata_in: FeatureMetadata, **kwargs) -> FeatureMetadata:
30
+ features_to_remove = feature_metadata_in.get_features(**self._infer_features_in_args)
31
+ return feature_metadata_in.keep_features(features_to_remove, inplace=False)
@@ -1,4 +1,4 @@
1
1
  """This is the autogluon version file."""
2
2
 
3
- __version__ = "1.5.1b20260107"
3
+ __version__ = "1.5.1b20260109"
4
4
  __lite__ = False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: autogluon.features
3
- Version: 1.5.1b20260107
3
+ Version: 1.5.1b20260109
4
4
  Summary: Fast and Accurate ML in 3 Lines of Code
5
5
  Home-page: https://github.com/autogluon/autogluon
6
6
  Author: AutoGluon Community
@@ -38,7 +38,7 @@ License-File: NOTICE
38
38
  Requires-Dist: numpy<2.4.0,>=1.25.0
39
39
  Requires-Dist: pandas<2.4.0,>=2.0.0
40
40
  Requires-Dist: scikit-learn<1.8.0,>=1.4.0
41
- Requires-Dist: autogluon.common==1.5.1b20260107
41
+ Requires-Dist: autogluon.common==1.5.1b20260109
42
42
  Dynamic: author
43
43
  Dynamic: classifier
44
44
  Dynamic: description
@@ -1,4 +1,4 @@
1
1
  numpy<2.4.0,>=1.25.0
2
2
  pandas<2.4.0,>=2.0.0
3
3
  scikit-learn<1.8.0,>=1.4.0
4
- autogluon.common==1.5.1b20260107
4
+ autogluon.common==1.5.1b20260109