dataforge-ml 1.0.1__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {dataforge_ml-1.0.1/src/dataforge_ml.egg-info → dataforge_ml-2.0.0}/PKG-INFO +7 -1
  2. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/pyproject.toml +7 -1
  3. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/__init__.py +2 -0
  4. dataforge_ml-2.0.0/src/dataforge_ml/config.py +343 -0
  5. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/imputation/__init__.py +11 -3
  6. dataforge_ml-2.0.0/src/dataforge_ml/imputation/_config.py +610 -0
  7. dataforge_ml-2.0.0/src/dataforge_ml/imputation/_fitted_imputer.py +764 -0
  8. dataforge_ml-2.0.0/src/dataforge_ml/imputation/_numeric_imputer.py +1954 -0
  9. dataforge_ml-2.0.0/src/dataforge_ml/imputation/_regression_estimator_factory.py +81 -0
  10. dataforge_ml-2.0.0/src/dataforge_ml/imputation/_strategy_router.py +642 -0
  11. dataforge_ml-2.0.0/src/dataforge_ml/imputation/_utils.py +89 -0
  12. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/imputation/orchestrator.py +68 -53
  13. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/__init__.py +13 -0
  14. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_boolean_config.py +14 -0
  15. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_categorical.py +26 -19
  16. dataforge_ml-2.0.0/src/dataforge_ml/profiling/_categorical_config.py +315 -0
  17. dataforge_ml-2.0.0/src/dataforge_ml/profiling/_config.py +483 -0
  18. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_correlation_config.py +197 -1
  19. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_correlation_profiler.py +23 -22
  20. dataforge_ml-2.0.0/src/dataforge_ml/profiling/_datetime_config.py +252 -0
  21. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_datetime_profiler.py +34 -56
  22. dataforge_ml-2.0.0/src/dataforge_ml/profiling/_missingness_config.py +319 -0
  23. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_missingness_profiler.py +108 -25
  24. dataforge_ml-2.0.0/src/dataforge_ml/profiling/_nonlinearity_profiler.py +411 -0
  25. dataforge_ml-2.0.0/src/dataforge_ml/profiling/_numeric_config.py +729 -0
  26. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_numeric_profiler.py +146 -42
  27. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_target_config.py +32 -2
  28. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_target_profiler.py +3 -2
  29. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_text_config.py +15 -0
  30. dataforge_ml-2.0.0/src/dataforge_ml/profiling/_type_detection_config.py +129 -0
  31. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_type_detector.py +85 -61
  32. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/orchestrator.py +139 -43
  33. dataforge_ml-2.0.0/src/dataforge_ml/splitting/__init__.py +4 -0
  34. dataforge_ml-2.0.0/src/dataforge_ml/splitting/_config.py +131 -0
  35. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/splitting/_profile_signals.py +66 -24
  36. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/splitting/_splitter.py +7 -3
  37. dataforge_ml-2.0.0/src/dataforge_ml/utils/_null_detection.py +49 -0
  38. dataforge_ml-2.0.0/src/dataforge_ml/utils/_null_normalization.py +130 -0
  39. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0/src/dataforge_ml.egg-info}/PKG-INFO +7 -1
  40. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml.egg-info/SOURCES.txt +5 -0
  41. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml.egg-info/requires.txt +6 -0
  42. dataforge_ml-1.0.1/src/dataforge_ml/config.py +0 -143
  43. dataforge_ml-1.0.1/src/dataforge_ml/imputation/_config.py +0 -165
  44. dataforge_ml-1.0.1/src/dataforge_ml/imputation/_fitted_imputer.py +0 -290
  45. dataforge_ml-1.0.1/src/dataforge_ml/imputation/_numeric_imputer.py +0 -372
  46. dataforge_ml-1.0.1/src/dataforge_ml/profiling/_categorical_config.py +0 -119
  47. dataforge_ml-1.0.1/src/dataforge_ml/profiling/_config.py +0 -260
  48. dataforge_ml-1.0.1/src/dataforge_ml/profiling/_datetime_config.py +0 -123
  49. dataforge_ml-1.0.1/src/dataforge_ml/profiling/_missingness_config.py +0 -150
  50. dataforge_ml-1.0.1/src/dataforge_ml/profiling/_numeric_config.py +0 -152
  51. dataforge_ml-1.0.1/src/dataforge_ml/splitting/__init__.py +0 -4
  52. dataforge_ml-1.0.1/src/dataforge_ml/splitting/_config.py +0 -56
  53. dataforge_ml-1.0.1/src/dataforge_ml/utils/_null_detection.py +0 -22
  54. dataforge_ml-1.0.1/src/dataforge_ml/utils/_null_normalization.py +0 -64
  55. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/LICENSE +0 -0
  56. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/README.md +0 -0
  57. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/setup.cfg +0 -0
  58. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/models/__init__.py +0 -0
  59. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/models/_data_structure.py +0 -0
  60. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/models/_data_types.py +0 -0
  61. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_base.py +0 -0
  62. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_boolean_profiler.py +0 -0
  63. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_tabular.py +0 -0
  64. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/profiling/_text_profiler.py +0 -0
  65. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/utils/__init__.py +0 -0
  66. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml/utils/data_loader.py +0 -0
  67. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml.egg-info/dependency_links.txt +0 -0
  68. {dataforge_ml-1.0.1 → dataforge_ml-2.0.0}/src/dataforge_ml.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 1.0.1
3
+ Version: 2.0.0
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -18,8 +18,14 @@ Requires-Dist: numpy>=2.0.0
18
18
  Requires-Dist: pandas>=2.0.0
19
19
  Requires-Dist: chardet>=5.0.0
20
20
  Requires-Dist: iterative-stratification>=0.1.9
21
+ Requires-Dist: diptest
21
22
  Provides-Extra: dev
22
23
  Requires-Dist: pytest>=8.0; extra == "dev"
24
+ Requires-Dist: sphinx>=8.0; extra == "dev"
25
+ Requires-Dist: pydata-sphinx-theme>=0.16; extra == "dev"
26
+ Requires-Dist: myst-parser>=4.0; extra == "dev"
27
+ Requires-Dist: numpydoc>=1.8; extra == "dev"
28
+ Requires-Dist: sphinx-autobuild>=2024.0; extra == "dev"
23
29
  Dynamic: license-file
24
30
 
25
31
  # DataForgeML
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dataforge-ml"
7
- version = "1.0.1"
7
+ version = "2.0.0"
8
8
  description = "A automated feature engineering and designing pipeline library"
9
9
  readme = "README.md"
10
10
  requires-python = ">3.10"
@@ -24,11 +24,17 @@ dependencies = [
24
24
  "pandas>=2.0.0",
25
25
  "chardet>=5.0.0",
26
26
  "iterative-stratification>=0.1.9",
27
+ "diptest",
27
28
  ]
28
29
 
29
30
  [project.optional-dependencies]
30
31
  dev = [
31
32
  "pytest>=8.0",
33
+ "sphinx>=8.0",
34
+ "pydata-sphinx-theme>=0.16",
35
+ "myst-parser>=4.0",
36
+ "numpydoc>=1.8",
37
+ "sphinx-autobuild>=2024.0",
32
38
  ]
33
39
 
34
40
  [tool.pytest.ini_options]
@@ -8,6 +8,7 @@ from .profiling._config import (
8
8
  )
9
9
  from .splitting import DataSplitter, SplitResult, FoldResult
10
10
  from .utils.data_loader import DataLoader
11
+ from .imputation._config import ImputationFitDiagnostic
11
12
 
12
13
  __all__ = [
13
14
  "PipelineConfig",
@@ -23,4 +24,5 @@ __all__ = [
23
24
  "SplitResult",
24
25
  "FoldResult",
25
26
  "DataLoader",
27
+ "ImputationFitDiagnostic",
26
28
  ]
@@ -0,0 +1,343 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass, field
5
+ from enum import StrEnum
6
+ from typing import TYPE_CHECKING, Union, Optional
7
+
8
+ if TYPE_CHECKING:
9
+ from dataforge_ml.profiling._config import ProfileConfig, NumericKind
10
+ from dataforge_ml.imputation._config import ImputationConfig
11
+ from dataforge_ml.splitting._config import SplitConfig
12
+
13
+
14
+ class SemanticType(StrEnum):
15
+ """The ML-level interpretation assigned to a column by the type detector.
16
+
17
+ Used throughout the pipeline to route columns to the correct sub-processors
18
+ and to determine which statistical operations apply. See CONTEXT.md §SemanticType
19
+ for the full type taxonomy and the Text vs Categorical distinction.
20
+ """
21
+
22
+ Numeric = "numeric"
23
+ Categorical = "categorical"
24
+ Datetime = "datetime"
25
+ Boolean = "boolean"
26
+ Text = "text"
27
+ Identifier = "identifier"
28
+
29
+
30
+ class Modality(StrEnum):
31
+ """The data modality the pipeline operates on.
32
+
33
+ Currently only ``Tabular`` is supported. Reserved for future expansion to
34
+ additional modalities (time-series, image, etc.).
35
+ """
36
+
37
+ Tabular = "tabular"
38
+
39
+
40
+ class PipelinePhase(StrEnum):
41
+ """The six sequential phases of the DataForgeML feature engineering pipeline.
42
+
43
+ Phase Orchestrators call ``PipelineConfig.resolve_active_columns`` with one
44
+ of these values to obtain the column set for that phase after Hard and Soft
45
+ Exclusions are applied.
46
+ """
47
+
48
+ Profiling = "profiling"
49
+ Imputation = "imputation"
50
+ OutlierDetection = "outlier_detection"
51
+ Normalization = "normalization"
52
+ Encoding = "encoding"
53
+ Scaling = "scaling"
54
+
55
+
56
+ def _default_profile_config() -> ProfileConfig:
57
+ from dataforge_ml.profiling._config import ProfileConfig
58
+ return ProfileConfig()
59
+
60
+
61
+ def _default_imputation_config() -> ImputationConfig:
62
+ from dataforge_ml.imputation._config import ImputationConfig
63
+ return ImputationConfig()
64
+
65
+
66
+ def _default_split_config() -> SplitConfig:
67
+ from dataforge_ml.splitting._config import SplitConfig
68
+ return SplitConfig()
69
+
70
+
71
+ @dataclass
72
+ class PipelineConfig:
73
+ """
74
+ Master configuration for the full 6-phase feature engineering pipeline.
75
+
76
+ Parameters
77
+ ----------
78
+ exclude_columns : list[str]
79
+ Hard exclusions — columns dropped globally from every phase.
80
+ phase_exclusions : dict[PipelinePhase, list[str]]
81
+ Soft exclusions — columns bypassed for a specific phase but retained
82
+ in the dataset.
83
+ column_overrides : dict[str, SemanticType]
84
+ Explicit semantic type assignments respected by all downstream phases.
85
+ numeric_kind_overrides : dict[str, NumericKind]
86
+ Explicit ``NumericKind`` assignments for individual columns, applied
87
+ after auto-detection in Phase 1. Only valid for columns whose final
88
+ ``SemanticType`` is ``Numeric``; raises at orchestrator time otherwise.
89
+ profiling : ProfileConfig
90
+ Phase 1-specific parameters (correlation, chunking, memory threshold).
91
+ imputation : ImputationConfig
92
+ Phase 2-specific parameters (strategy thresholds, size guards).
93
+ split : SplitConfig
94
+ Splitting thresholds (stratification signal cap, boolean minority bar).
95
+ random_seed : int, optional
96
+ Single seed for all stochastic pipeline operations, including GMM
97
+ Sampling during bimodal imputation. None produces non-deterministic
98
+ output.
99
+ """
100
+
101
+ exclude_columns: list[str] = field(default_factory=list)
102
+ phase_exclusions: dict[PipelinePhase, list[str]] = field(default_factory=dict)
103
+ column_overrides: dict[str, SemanticType] = field(default_factory=dict)
104
+ numeric_kind_overrides: dict[str, NumericKind] = field(default_factory=dict)
105
+ profiling: ProfileConfig = field(default_factory=_default_profile_config)
106
+ imputation: ImputationConfig = field(default_factory=_default_imputation_config)
107
+ split: SplitConfig = field(default_factory=_default_split_config)
108
+ random_seed: Optional[int] = None
109
+
110
+ def resolve_active_columns(
111
+ self, phase: PipelinePhase, available_columns: list[str]
112
+ ) -> list[str]:
113
+ """Return the columns the given phase should operate on.
114
+
115
+ Hard Exclusions are applied first, then phase-specific Soft Exclusions.
116
+ Columns absent from ``available_columns`` are silently ignored in both
117
+ exclusion lists.
118
+
119
+ Parameters
120
+ ----------
121
+ phase : PipelinePhase
122
+ The pipeline phase requesting the active column set.
123
+ available_columns : list[str]
124
+ The full list of columns currently present in the DataFrame.
125
+
126
+ Returns
127
+ -------
128
+ list[str]
129
+ Columns from ``available_columns`` that are not excluded by either
130
+ Hard or Soft Exclusion rules for the given phase, preserving the
131
+ original order.
132
+ """
133
+ hard_set = set(self.exclude_columns)
134
+ soft_set = set(self.phase_exclusions.get(phase, []))
135
+ excluded = hard_set | soft_set
136
+ return [c for c in available_columns if c not in excluded]
137
+
138
+ def add_exclusions(self, cols: list[str]) -> None:
139
+ """Add columns to the hard exclusion set, deduplicating automatically.
140
+
141
+ Columns already present in ``exclude_columns`` and duplicate entries
142
+ within ``cols`` are silently ignored. Calling with an empty list is a
143
+ no-op.
144
+
145
+ Parameters
146
+ ----------
147
+ cols : list[str]
148
+ Column names to register as hard exclusions. Deduplication is
149
+ handled here; callers do not need to pre-deduplicate.
150
+ """
151
+ existing = set(self.exclude_columns)
152
+ for col in cols:
153
+ if col not in existing:
154
+ self.exclude_columns.append(col)
155
+ existing.add(col)
156
+
157
+ def set_column_type(
158
+ self, column: str, semantic_type: Union[str, SemanticType]
159
+ ) -> None:
160
+ """Explicitly set the semantic type for a column, overriding auto-detection.
161
+
162
+ Parameters
163
+ ----------
164
+ column : str
165
+ Name of the column to override.
166
+ semantic_type : str or SemanticType
167
+ The desired semantic type. Accepts enum values or their string
168
+ equivalents (e.g. ``"numeric"``, ``"categorical"``).
169
+
170
+ Raises
171
+ ------
172
+ ValueError
173
+ When ``semantic_type`` is a string that does not match any
174
+ ``SemanticType`` value.
175
+ """
176
+ if isinstance(semantic_type, str):
177
+ try:
178
+ semantic_type = SemanticType(semantic_type)
179
+ except ValueError:
180
+ valid = [e.value for e in SemanticType]
181
+ raise ValueError(
182
+ f"Unknown semantic type {semantic_type!r}. "
183
+ f"Valid values: {valid}"
184
+ )
185
+ self.column_overrides[column] = semantic_type
186
+
187
+ def set_columns_type(
188
+ self, columns: list[str], semantic_type: Union[str, SemanticType]
189
+ ) -> None:
190
+ """Assign the same semantic type to every column in the list.
191
+
192
+ Parameters
193
+ ----------
194
+ columns : list[str]
195
+ Column names to override.
196
+ semantic_type : str or SemanticType
197
+ The desired semantic type applied to every column in the list.
198
+ """
199
+ for column in columns:
200
+ self.set_column_type(column, semantic_type)
201
+
202
+ def set_numeric_kind(
203
+ self, column: str, kind: Union[str, NumericKind]
204
+ ) -> None:
205
+ """Explicitly set the ``NumericKind`` for a single column.
206
+
207
+ Parameters
208
+ ----------
209
+ column : str
210
+ Name of the column to override.
211
+ kind : str or NumericKind
212
+ The desired numeric kind. Accepts enum values or their string
213
+ equivalents (``"continuous"``, ``"bounded_discrete"``).
214
+
215
+ Raises
216
+ ------
217
+ ValueError
218
+ When ``kind`` is a string that does not match any ``NumericKind``
219
+ value.
220
+ """
221
+ from dataforge_ml.profiling._config import NumericKind as _NumericKind
222
+ if isinstance(kind, str):
223
+ try:
224
+ kind = _NumericKind(kind)
225
+ except ValueError:
226
+ valid = [e.value for e in _NumericKind]
227
+ raise ValueError(
228
+ f"Unknown NumericKind {kind!r}. Valid values: {valid}"
229
+ )
230
+ self.numeric_kind_overrides[column] = kind
231
+
232
+ def set_columns_numeric_kind(
233
+ self, columns: list[str], kind: Union[str, NumericKind]
234
+ ) -> None:
235
+ """Assign the same ``NumericKind`` to every column in the list.
236
+
237
+ Parameters
238
+ ----------
239
+ columns : list[str]
240
+ Column names to override.
241
+ kind : str or NumericKind
242
+ The desired numeric kind applied to every column in the list.
243
+ """
244
+ for column in columns:
245
+ self.set_numeric_kind(column, kind)
246
+
247
+ def to_dict(self) -> dict:
248
+ """Serialise the pipeline configuration to a plain dictionary.
249
+
250
+ Returns
251
+ -------
252
+ dict
253
+ All fields serialised to JSON-compatible types; nested configs are
254
+ recursively serialised via their own ``to_dict`` methods.
255
+ """
256
+ return {
257
+ "exclude_columns": list(self.exclude_columns),
258
+ "phase_exclusions": {
259
+ str(phase): list(cols)
260
+ for phase, cols in self.phase_exclusions.items()
261
+ },
262
+ "column_overrides": {
263
+ col: str(sem_type)
264
+ for col, sem_type in self.column_overrides.items()
265
+ },
266
+ "numeric_kind_overrides": {
267
+ col: str(kind)
268
+ for col, kind in self.numeric_kind_overrides.items()
269
+ },
270
+ "profiling": self.profiling.to_dict(),
271
+ "imputation": self.imputation.to_dict(),
272
+ "split": self.split.to_dict(),
273
+ "random_seed": self.random_seed,
274
+ }
275
+
276
+ @classmethod
277
+ def from_dict(cls, data: dict) -> PipelineConfig:
278
+ """Reconstruct a ``PipelineConfig`` from a plain dictionary.
279
+
280
+ Parameters
281
+ ----------
282
+ data : dict
283
+ Dictionary as produced by ``to_dict()``.
284
+
285
+ Returns
286
+ -------
287
+ PipelineConfig
288
+ Fully populated configuration instance with all nested sub-configs
289
+ restored.
290
+ """
291
+ from dataforge_ml.profiling._config import ProfileConfig, NumericKind as _NumericKind
292
+ from dataforge_ml.imputation._config import ImputationConfig
293
+ from dataforge_ml.splitting._config import SplitConfig
294
+ return cls(
295
+ exclude_columns=list(data.get("exclude_columns", [])),
296
+ phase_exclusions={
297
+ PipelinePhase(phase_str): list(cols)
298
+ for phase_str, cols in data.get("phase_exclusions", {}).items()
299
+ },
300
+ column_overrides={
301
+ col: SemanticType(sem_str)
302
+ for col, sem_str in data.get("column_overrides", {}).items()
303
+ },
304
+ numeric_kind_overrides={
305
+ col: _NumericKind(kind_str)
306
+ for col, kind_str in data.get("numeric_kind_overrides", {}).items()
307
+ },
308
+ profiling=ProfileConfig.from_dict(data.get("profiling", {})),
309
+ imputation=ImputationConfig.from_dict(data.get("imputation", {})),
310
+ split=SplitConfig.from_dict(data.get("split", {})),
311
+ random_seed=data.get("random_seed"),
312
+ )
313
+
314
+ def to_json(self, indent: int = 2) -> str:
315
+ """Serialise the pipeline configuration to a JSON string.
316
+
317
+ Parameters
318
+ ----------
319
+ indent : int
320
+ Number of spaces used for JSON indentation.
321
+
322
+ Returns
323
+ -------
324
+ str
325
+ JSON representation of ``to_dict()``.
326
+ """
327
+ return json.dumps(self.to_dict(), indent=indent)
328
+
329
+ @classmethod
330
+ def from_json(cls, json_str: str) -> PipelineConfig:
331
+ """Reconstruct a ``PipelineConfig`` from a JSON string.
332
+
333
+ Parameters
334
+ ----------
335
+ json_str : str
336
+ JSON string as produced by ``to_json()``.
337
+
338
+ Returns
339
+ -------
340
+ PipelineConfig
341
+ Fully populated configuration instance.
342
+ """
343
+ return cls.from_dict(json.loads(json_str))
@@ -1,21 +1,29 @@
1
1
  from ._config import (
2
2
  ColumnImputationRecord,
3
3
  ImputationConfig,
4
+ ImputationFitDiagnostic,
4
5
  ImputationResult,
5
6
  ImputationStrategy,
6
7
  NumericImputationConfig,
7
8
  )
8
- from ._fitted_imputer import FittedImputer, UnfittedColumnError
9
- from .orchestrator import ImputationOrchestrator, SplitImbalanceWarning
9
+ from ._fitted_imputer import (
10
+ FittedColumnAbsentError,
11
+ FittedImputer,
12
+ UnfittedColumnError,
13
+ UnseenColumnError,
14
+ )
15
+ from .orchestrator import ImputationOrchestrator
10
16
 
11
17
  __all__ = [
12
18
  "ImputationStrategy",
13
19
  "NumericImputationConfig",
14
20
  "ImputationConfig",
21
+ "ImputationFitDiagnostic",
15
22
  "ColumnImputationRecord",
16
23
  "ImputationResult",
17
24
  "FittedImputer",
18
25
  "UnfittedColumnError",
26
+ "UnseenColumnError",
27
+ "FittedColumnAbsentError",
19
28
  "ImputationOrchestrator",
20
- "SplitImbalanceWarning",
21
29
  ]