dataforge-ml 2.0.5__tar.gz → 2.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {dataforge_ml-2.0.5/src/dataforge_ml.egg-info → dataforge_ml-2.0.7}/PKG-INFO +1 -1
  2. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/pyproject.toml +1 -1
  3. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_boolean_config.py +25 -1
  4. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_boolean_profiler.py +8 -1
  5. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_config.py +61 -0
  6. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_datetime_config.py +1 -0
  7. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_datetime_profiler.py +25 -4
  8. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_numeric_config.py +1 -0
  9. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_numeric_profiler.py +7 -0
  10. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/orchestrator.py +18 -3
  11. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7/src/dataforge_ml.egg-info}/PKG-INFO +1 -1
  12. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/LICENSE +0 -0
  13. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/README.md +0 -0
  14. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/setup.cfg +0 -0
  15. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/__init__.py +0 -0
  16. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/config.py +0 -0
  17. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/__init__.py +0 -0
  18. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/_config.py +0 -0
  19. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/_fitted_imputer.py +0 -0
  20. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/_numeric_imputer.py +0 -0
  21. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/_regression_estimator_factory.py +0 -0
  22. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/_strategy_router.py +0 -0
  23. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/_utils.py +0 -0
  24. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/imputation/orchestrator.py +0 -0
  25. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/models/__init__.py +0 -0
  26. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/models/_data_structure.py +0 -0
  27. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/models/_data_types.py +0 -0
  28. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/__init__.py +0 -0
  29. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_base.py +0 -0
  30. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_categorical.py +0 -0
  31. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_categorical_config.py +0 -0
  32. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_correlation_config.py +0 -0
  33. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_correlation_profiler.py +0 -0
  34. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_missingness_config.py +0 -0
  35. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_missingness_profiler.py +0 -0
  36. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_nonlinearity_profiler.py +0 -0
  37. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_tabular.py +0 -0
  38. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_target_config.py +0 -0
  39. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_target_profiler.py +0 -0
  40. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_text_config.py +0 -0
  41. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_text_profiler.py +0 -0
  42. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_type_detection_config.py +0 -0
  43. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/profiling/_type_detector.py +0 -0
  44. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/splitting/__init__.py +0 -0
  45. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/splitting/_config.py +0 -0
  46. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/splitting/_profile_signals.py +0 -0
  47. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/splitting/_splitter.py +0 -0
  48. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/utils/__init__.py +0 -0
  49. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/utils/_null_detection.py +0 -0
  50. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/utils/_null_normalization.py +0 -0
  51. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml/utils/data_loader.py +0 -0
  52. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml.egg-info/SOURCES.txt +0 -0
  53. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml.egg-info/dependency_links.txt +0 -0
  54. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml.egg-info/requires.txt +0 -0
  55. {dataforge_ml-2.0.5 → dataforge_ml-2.0.7}/src/dataforge_ml.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 2.0.5
3
+ Version: 2.0.7
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dataforge-ml"
7
- version = "2.0.5"
7
+ version = "2.0.7"
8
8
  description = "A automated feature engineering and designing pipeline library"
9
9
  readme = "README.md"
10
10
  requires-python = ">3.10"
@@ -7,9 +7,14 @@ Populated by BooleanProfiler.
7
7
  from __future__ import annotations
8
8
 
9
9
  from dataclasses import dataclass, field
10
+ from enum import StrEnum
10
11
  from typing import Optional
11
12
 
12
13
 
14
+ class BooleanFlag(StrEnum):
15
+ FormatMismatch = "format_mismatch"
16
+
17
+
13
18
  @dataclass
14
19
  class BooleanStats:
15
20
  """Value distribution statistics for a single Boolean column.
@@ -24,6 +29,23 @@ class BooleanStats:
24
29
  true_ratio: float = 0.0
25
30
  false_ratio: float = 0.0
26
31
  mode: Optional[bool] = None
32
+ flags: list[BooleanFlag] = field(default_factory=list)
33
+
34
+ def has_flag(self, flag: BooleanFlag) -> bool:
35
+ """Check whether a specific ``BooleanFlag`` is set on this column.
36
+
37
+ Parameters
38
+ ----------
39
+ flag : BooleanFlag
40
+ The flag to test.
41
+
42
+ Returns
43
+ -------
44
+ bool
45
+ ``True`` if ``flag`` is present in :attr:`flags`, ``False``
46
+ otherwise.
47
+ """
48
+ return flag in self.flags
27
49
 
28
50
  def to_dict(self) -> dict:
29
51
  """Serialise the boolean statistics to a plain dictionary.
@@ -31,7 +53,8 @@ class BooleanStats:
31
53
  Returns
32
54
  -------
33
55
  dict
34
- All fields keyed by field name.
56
+ All fields keyed by field name. ``flags`` are serialised as their
57
+ string values.
35
58
  """
36
59
  return {
37
60
  "true_count": self.true_count,
@@ -39,6 +62,7 @@ class BooleanStats:
39
62
  "true_ratio": self.true_ratio,
40
63
  "false_ratio": self.false_ratio,
41
64
  "mode": self.mode,
65
+ "flags": [str(f) for f in self.flags],
42
66
  }
43
67
 
44
68
 
@@ -23,7 +23,7 @@ import polars as pl
23
23
 
24
24
  from ._base import ColumnBatchProfiler
25
25
  from ._config import BooleanStats
26
- from ._boolean_config import BooleanProfileResult
26
+ from ._boolean_config import BooleanFlag, BooleanProfileResult
27
27
  from ..models._data_types import _INT_DTYPES
28
28
 
29
29
  # ---------------------------------------------------------------------------
@@ -114,6 +114,13 @@ class BooleanProfiler(ColumnBatchProfiler[BooleanProfileResult]):
114
114
  bool_series = self._to_bool_series(series)
115
115
  non_null_count = bool_series.len()
116
116
 
117
+ # FormatMismatch: a value that is present (non-null after the
118
+ # orchestrator's Effective-Null normalization) but falls outside the
119
+ # recognized true/false vocabulary is dropped by coercion. A shortfall
120
+ # in the non-null count means the column holds dirty, uncoercible data.
121
+ if non_null_count < series.drop_nulls().len():
122
+ profile.flags.append(BooleanFlag.FormatMismatch)
123
+
117
124
  if non_null_count == 0:
118
125
  if series.drop_nulls().len() > 0 and col_name in user_overrides:
119
126
  from ._base import OverrideCoercionError
@@ -658,15 +658,18 @@ class ProfileConfig:
658
658
  numeric_sentinels: InitVar[Optional[dict[str, list[float]]]] = None
659
659
  string_sentinels: InitVar[Optional[dict[str, list[str]]]] = None
660
660
  datetime_epoch_units: InitVar[Optional[dict[str, Union[str, EpochUnit]]]] = None
661
+ datetime_formats: InitVar[Optional[dict[str, str]]] = None
661
662
  _numeric_sentinels: dict[str, list[float]] = field(default_factory=dict, init=False)
662
663
  _string_sentinels: dict[str, list[str]] = field(default_factory=dict, init=False)
663
664
  _datetime_epoch_units: dict[str, EpochUnit] = field(default_factory=dict, init=False)
665
+ _datetime_formats: dict[str, str] = field(default_factory=dict, init=False)
664
666
 
665
667
  def __post_init__(
666
668
  self,
667
669
  numeric_sentinels: Optional[dict[str, list[float]]],
668
670
  string_sentinels: Optional[dict[str, list[str]]],
669
671
  datetime_epoch_units: Optional[dict[str, Union[str, EpochUnit]]] = None,
672
+ datetime_formats: Optional[dict[str, str]] = None,
670
673
  ) -> None:
671
674
  if numeric_sentinels is not None and not isinstance(numeric_sentinels, property):
672
675
  for k, vals in numeric_sentinels.items():
@@ -677,6 +680,9 @@ class ProfileConfig:
677
680
  if datetime_epoch_units is not None and not isinstance(datetime_epoch_units, property):
678
681
  for k, val in datetime_epoch_units.items():
679
682
  self.set_datetime_epoch_unit(k, val)
683
+ if datetime_formats is not None and not isinstance(datetime_formats, property):
684
+ for k, fmt in datetime_formats.items():
685
+ self.set_datetime_format(k, fmt)
680
686
 
681
687
  @property
682
688
  def numeric_sentinels(self) -> MappingProxyType[str, list[float]]:
@@ -731,6 +737,27 @@ class ProfileConfig:
731
737
  """
732
738
  return MappingProxyType(self._datetime_epoch_units)
733
739
 
740
+ @property
741
+ def datetime_formats(self) -> MappingProxyType[str, str]:
742
+ """
743
+ Get the per-column declared datetime format strings.
744
+
745
+ Keys are column names; values are strftime-style format strings (e.g.
746
+ ``{"Year": "%Y"}``) applied by ``DatetimeProfiler`` with
747
+ ``strict=False`` when coercing that column to Datetime. A declaration
748
+ applies to any column profiled as Datetime, whether overridden or
749
+ auto-detected. Format strings are not validated against strftime
750
+ grammar at declaration time — a bad format surfaces at profiling time.
751
+ Defaults to an empty dict — columns with no declaration fall back to
752
+ Polars format inference.
753
+
754
+ Returns
755
+ -------
756
+ MappingProxyType[str, str]
757
+ Read-only mapping of column names to declared datetime formats.
758
+ """
759
+ return MappingProxyType(self._datetime_formats)
760
+
734
761
  def set_numeric_sentinel(self, column: str | list[str], values: list[float]) -> None:
735
762
  """
736
763
  Set numeric sentinel values for one or more columns.
@@ -809,6 +836,38 @@ class ProfileConfig:
809
836
  for c in columns:
810
837
  self._datetime_epoch_units[c] = enum_unit
811
838
 
839
+ def set_datetime_format(self, column: str | list[str], format: str) -> None:
840
+ """
841
+ Declare a datetime format string for one or more columns.
842
+
843
+ The format is applied by ``DatetimeProfiler`` with ``strict=False``
844
+ when coercing the column to Datetime, and is not validated against
845
+ strftime grammar or the data at declaration time — a bad format
846
+ surfaces at profiling time, consistent with ``set_column_type`` and
847
+ ``set_datetime_epoch_unit``.
848
+
849
+ Parameters
850
+ ----------
851
+ column : str or list of str
852
+ Column name or list of column names to apply the format to.
853
+ format : str
854
+ A non-empty strftime-style format string (e.g. ``"%Y"``).
855
+
856
+ Raises
857
+ ------
858
+ ValueError
859
+ If any column name is empty, or if `format` is not a non-empty
860
+ string.
861
+ """
862
+ if not isinstance(format, str) or not format:
863
+ raise ValueError("format must be a non-empty string.")
864
+
865
+ columns = [column] if isinstance(column, str) else column
866
+ for c in columns:
867
+ if not isinstance(c, str) or not c:
868
+ raise ValueError("column name must be a non-empty string.")
869
+ self._datetime_formats[c] = format
870
+
812
871
  def to_dict(self) -> dict:
813
872
  """
814
873
  Serialise the config to a plain dictionary.
@@ -837,6 +896,7 @@ class ProfileConfig:
837
896
  "numeric_sentinels": {k: list(v) for k, v in self.numeric_sentinels.items()},
838
897
  "string_sentinels": {k: list(v) for k, v in self.string_sentinels.items()},
839
898
  "datetime_epoch_units": {k: v.value for k, v in self.datetime_epoch_units.items()},
899
+ "datetime_formats": {k: v for k, v in self.datetime_formats.items()},
840
900
  }
841
901
 
842
902
  @classmethod
@@ -884,6 +944,7 @@ class ProfileConfig:
884
944
  numeric_sentinels=data.get("numeric_sentinels", {}),
885
945
  string_sentinels=data.get("string_sentinels", {}),
886
946
  datetime_epoch_units=data.get("datetime_epoch_units", {}),
947
+ datetime_formats=data.get("datetime_formats", {}),
887
948
  )
888
949
 
889
950
  return config
@@ -28,6 +28,7 @@ class DatetimeFlag(StrEnum):
28
28
  HighGapVariance = "high_gap_variance"
29
29
  MnarSuspected = "mnar_suspected"
30
30
  RecentDateMissing = "recent_date_missing"
31
+ FormatMismatch = "format_mismatch"
31
32
 
32
33
 
33
34
  @dataclass
@@ -68,9 +68,11 @@ class DatetimeProfiler(ColumnBatchProfiler[DatetimeProfileResult]):
68
68
  self,
69
69
  config: DatetimeProfileConfig | None = None,
70
70
  epoch_units: dict[str, str] | None = None,
71
+ formats: dict[str, str] | None = None,
71
72
  ) -> None:
72
73
  self._config = config if config is not None else DatetimeProfileConfig()
73
74
  self._epoch_units = epoch_units or {}
75
+ self._formats = formats or {}
74
76
 
75
77
  # ------------------------------------------------------------------
76
78
  # Public API
@@ -112,8 +114,14 @@ class DatetimeProfiler(ColumnBatchProfiler[DatetimeProfileResult]):
112
114
 
113
115
  def _coerce_to_datetime(self, series: pl.Series, col_name: str) -> pl.Series | None:
114
116
  if series.dtype in (pl.Utf8, pl.String):
117
+ declared_format = self._formats.get(col_name)
115
118
  try:
116
- coerced = series.str.to_datetime(strict=False)
119
+ if declared_format is not None:
120
+ coerced = series.str.to_datetime(
121
+ format=declared_format, strict=False
122
+ )
123
+ else:
124
+ coerced = series.str.to_datetime(strict=False)
117
125
  return coerced if coerced.drop_nulls().len() > 0 else None
118
126
  except pl.exceptions.ComputeError:
119
127
  return None
@@ -139,22 +147,35 @@ class DatetimeProfiler(ColumnBatchProfiler[DatetimeProfileResult]):
139
147
 
140
148
  available = []
141
149
  coerced_cache = {}
150
+ format_mismatch: dict[str, bool] = {}
142
151
  for col_name in self._resolve_columns(df.columns, columns):
143
- series = self._coerce_to_datetime(df[col_name], col_name)
152
+ original = df[col_name]
153
+ series = self._coerce_to_datetime(original, col_name)
144
154
  if series is not None:
145
155
  available.append(col_name)
146
156
  coerced_cache[col_name] = series
157
+ # FormatMismatch: a value that is present (non-null after the
158
+ # orchestrator's Effective-Null normalization) but fails
159
+ # coercion becomes null here. Compare non-null counts before
160
+ # and after coercion; a shortfall means dirty, uncoercible data.
161
+ format_mismatch[col_name] = (
162
+ series.drop_nulls().len() < original.drop_nulls().len()
163
+ )
147
164
  elif col_name in user_overrides:
148
- if df[col_name].drop_nulls().len() > 0:
165
+ if original.drop_nulls().len() > 0:
149
166
  from ._base import OverrideCoercionError
150
167
  raise OverrideCoercionError(
151
- f"Column {col_name!r} with TypeFlag.UserOverride completely failed coercion to Datetime."
168
+ f"Column {col_name!r} with TypeFlag.UserOverride completely failed coercion to Datetime. "
169
+ f"If Polars cannot infer the format, declare one explicitly via "
170
+ f"ProfileConfig.set_datetime_format({col_name!r}, <format>) (e.g. '%Y' for bare years)."
152
171
  )
153
172
 
154
173
  result.analysed_columns = available
155
174
 
156
175
  for col_name in available:
157
176
  profile = self._profile_column(coerced_cache[col_name], df.height, now)
177
+ if format_mismatch.get(col_name):
178
+ profile.flags.append(DatetimeFlag.FormatMismatch)
158
179
  result.columns[col_name] = profile
159
180
 
160
181
  return result
@@ -170,6 +170,7 @@ class NumericFlag(StrEnum):
170
170
  NearConstant = "near_constant"
171
171
  Bimodal = "bimodal"
172
172
  HighOutlierDensity = "high_outlier_density"
173
+ FormatMismatch = "format_mismatch"
173
174
 
174
175
 
175
176
  @dataclass
@@ -144,6 +144,13 @@ class NumericProfiler(ColumnBatchProfiler[NumericProfileResult]):
144
144
  clean = f64.drop_nulls()
145
145
  profile = NumericStats()
146
146
 
147
+ # FormatMismatch: a value that is present (non-null after the
148
+ # orchestrator's Effective-Null normalization) but fails the
149
+ # Float64 cast becomes null here. A shortfall in the non-null
150
+ # count means the column holds dirty, uncoercible data.
151
+ if clean.len() < series.drop_nulls().len():
152
+ profile.flags.append(NumericFlag.FormatMismatch)
153
+
147
154
  if clean.len() == 0:
148
155
  if series.drop_nulls().len() > 0 and col in user_overrides:
149
156
  from ._base import OverrideCoercionError
@@ -28,7 +28,7 @@ from typing import Any
28
28
  import numpy as np
29
29
  import polars as pl
30
30
 
31
- from ._base import ModalityProfiler, ColumnBatchProfiler
31
+ from ._base import ModalityProfiler, ColumnBatchProfiler, OverrideCoercionError
32
32
  from ._tabular import TabularProfiler
33
33
  from ._categorical import CategoricalProfiler
34
34
  from ._datetime_profiler import DatetimeProfiler
@@ -114,6 +114,10 @@ class StructuralProfiler:
114
114
  ------
115
115
  TypeError
116
116
  When ``data`` is not a ``polars.DataFrame``.
117
+ OverrideCoercionError
118
+ When a column carrying ``TypeFlag.UserOverride`` completely fails
119
+ coercion to its overridden ``SemanticType`` (zero usable values
120
+ remain despite the original column having non-null data).
117
121
  """
118
122
  if not isinstance(data, pl.DataFrame):
119
123
  raise TypeError(
@@ -220,13 +224,22 @@ class StructuralProfiler:
220
224
  type_to_cols.setdefault(sem_type, []).append(col_name)
221
225
 
222
226
  pc = self.config.profiling
227
+ profiling_frame = _resolve_effective_nulls(
228
+ data,
229
+ numeric_sentinels=dict(pc.numeric_sentinels),
230
+ string_sentinels=dict(pc.string_sentinels),
231
+ )
223
232
  for sem_type, cols in type_to_cols.items():
224
233
  if sem_type == SemanticType.Numeric:
225
234
  profiler = NumericProfiler(config=pc.numeric)
226
235
  elif sem_type == SemanticType.Categorical:
227
236
  profiler = CategoricalProfiler(config=pc.categorical)
228
237
  elif sem_type == SemanticType.Datetime:
229
- profiler = DatetimeProfiler(config=pc.datetime_, epoch_units=pc.datetime_epoch_units)
238
+ profiler = DatetimeProfiler(
239
+ config=pc.datetime_,
240
+ epoch_units=pc.datetime_epoch_units,
241
+ formats=pc.datetime_formats,
242
+ )
230
243
  else:
231
244
  profiler_cls = _COLUMN_PROFILER_REGISTRY.get(sem_type) # type: ignore[arg-type]
232
245
  if profiler_cls is None:
@@ -237,10 +250,12 @@ class StructuralProfiler:
237
250
  c for c in cols
238
251
  if result.columns.get(c) and TypeFlag.UserOverride in result.columns[c].type_flags
239
252
  }
240
- batch = profiler.profile(data, columns=cols, user_overrides=user_overrides)
253
+ batch = profiler.profile(profiling_frame, columns=cols, user_overrides=user_overrides)
241
254
  for col_name in batch.analysed_columns:
242
255
  if col_name in result.columns:
243
256
  result.columns[col_name].stats = batch.columns.get(col_name)
257
+ except OverrideCoercionError:
258
+ raise
244
259
  except Exception:
245
260
  pass
246
261
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 2.0.5
3
+ Version: 2.0.7
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
File without changes
File without changes
File without changes