dataforge-ml 2.0.3__tar.gz → 2.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {dataforge_ml-2.0.3/src/dataforge_ml.egg-info → dataforge_ml-2.0.5}/PKG-INFO +1 -1
  2. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/pyproject.toml +1 -1
  3. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/config.py +3 -14
  4. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_config.py +17 -21
  5. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5/src/dataforge_ml.egg-info}/PKG-INFO +1 -1
  6. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/LICENSE +0 -0
  7. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/README.md +0 -0
  8. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/setup.cfg +0 -0
  9. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/__init__.py +0 -0
  10. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/__init__.py +0 -0
  11. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_config.py +0 -0
  12. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_fitted_imputer.py +0 -0
  13. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_numeric_imputer.py +0 -0
  14. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_regression_estimator_factory.py +0 -0
  15. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_strategy_router.py +0 -0
  16. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_utils.py +0 -0
  17. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/orchestrator.py +0 -0
  18. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/models/__init__.py +0 -0
  19. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/models/_data_structure.py +0 -0
  20. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/models/_data_types.py +0 -0
  21. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/__init__.py +0 -0
  22. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_base.py +0 -0
  23. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_boolean_config.py +0 -0
  24. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_boolean_profiler.py +0 -0
  25. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_categorical.py +0 -0
  26. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_categorical_config.py +0 -0
  27. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_correlation_config.py +0 -0
  28. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_correlation_profiler.py +0 -0
  29. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_datetime_config.py +0 -0
  30. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_datetime_profiler.py +0 -0
  31. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_missingness_config.py +0 -0
  32. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_missingness_profiler.py +0 -0
  33. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_nonlinearity_profiler.py +0 -0
  34. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_numeric_config.py +0 -0
  35. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_numeric_profiler.py +0 -0
  36. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_tabular.py +0 -0
  37. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_target_config.py +0 -0
  38. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_target_profiler.py +0 -0
  39. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_text_config.py +0 -0
  40. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_text_profiler.py +0 -0
  41. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_type_detection_config.py +0 -0
  42. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_type_detector.py +0 -0
  43. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/orchestrator.py +0 -0
  44. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/__init__.py +0 -0
  45. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/_config.py +0 -0
  46. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/_profile_signals.py +0 -0
  47. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/_splitter.py +0 -0
  48. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/__init__.py +0 -0
  49. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/_null_detection.py +0 -0
  50. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/_null_normalization.py +0 -0
  51. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/data_loader.py +0 -0
  52. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/SOURCES.txt +0 -0
  53. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/dependency_links.txt +0 -0
  54. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/requires.txt +0 -0
  55. {dataforge_ml-2.0.3 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 2.0.3
3
+ Version: 2.0.5
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dataforge-ml"
7
- version = "2.0.3"
7
+ version = "2.0.5"
8
8
  description = "A automated feature engineering and designing pipeline library"
9
9
  readme = "README.md"
10
10
  requires-python = ">3.10"
@@ -86,20 +86,6 @@ class PipelineConfig:
86
86
  Single seed for all stochastic pipeline operations, including GMM
87
87
  Sampling during bimodal imputation. None produces non-deterministic
88
88
  output.
89
-
90
- Attributes
91
- ----------
92
- exclude_columns : tuple[str, ...]
93
- Hard exclusions — columns dropped globally from every phase.
94
- phase_exclusions : MappingProxyType[PipelinePhase, tuple[str, ...]]
95
- Soft exclusions — columns bypassed for a specific phase but retained
96
- in the dataset.
97
- column_overrides : MappingProxyType[str, SemanticType]
98
- Explicit semantic type assignments respected by all downstream phases.
99
- numeric_kind_overrides : MappingProxyType[str, NumericKind]
100
- Explicit ``NumericKind`` assignments for individual columns, applied
101
- after auto-detection in Phase 1. Only valid for columns whose final
102
- ``SemanticType`` is ``Numeric``; raises at orchestrator time otherwise.
103
89
  """
104
90
 
105
91
  _exclude_columns: list[str] = field(default_factory=list, init=False)
@@ -150,6 +136,9 @@ class PipelineConfig:
150
136
  def numeric_kind_overrides(self) -> "MappingProxyType[str, NumericKind]":
151
137
  """Explicit NumericKind assignments for individual columns, applied after auto-detection in Phase 1.
152
138
 
139
+ Only valid for columns whose final ``SemanticType`` is ``Numeric``;
140
+ raises at orchestrator time otherwise.
141
+
153
142
  Returns
154
143
  -------
155
144
  MappingProxyType[str, NumericKind]
@@ -628,27 +628,6 @@ class ProfileConfig:
628
628
  Default ``False``.
629
629
  nonlinearity : NonlinearityProfileConfig
630
630
  Threshold configuration for the nonlinearity sub-processor.
631
-
632
- Attributes
633
- ----------
634
- numeric_sentinels : MappingProxyType[str, list[float]]
635
- Per-column numeric sentinel declarations. Keys are column names;
636
- values are lists of float-compatible sentinel values that should be
637
- treated as effective nulls (e.g. ``{"age": [-999.0, 9999.0]}``).
638
- Applies to any column whose dtype passes ``_numeric_sentinel_eligible``
639
- (all integer and float Polars dtypes). Defaults to an empty dict —
640
- columns with no declaration are completely unaffected.
641
- string_sentinels : MappingProxyType[str, list[str]]
642
- Per-column user-declared string sentinel declarations. Keys are column
643
- names; values are lists of string values that should be treated as
644
- effective nulls for that column (e.g.
645
- ``{"status": ["N/A", "missing"]}``). Uses **replace semantics**: when
646
- a declaration exists for a column, only the declared values are matched
647
- (case-insensitive); the hardcoded defaults (``"NA"``, ``"NAN"``,
648
- ``"NULL"``, ``"NONE"``, ``"?"``) are not applied for that column.
649
- Empty/whitespace-only strings are always effective null regardless of
650
- any declaration. Defaults to an empty dict — columns with no
651
- declaration continue to use the hardcoded defaults unchanged.
652
631
  """
653
632
 
654
633
  modality: Modality = Modality.Tabular
@@ -704,6 +683,13 @@ class ProfileConfig:
704
683
  """
705
684
  Get the per-column numeric sentinel declarations.
706
685
 
686
+ Keys are column names; values are lists of float-compatible sentinel
687
+ values that should be treated as effective nulls (e.g.
688
+ ``{"age": [-999.0, 9999.0]}``). Applies to any column whose dtype
689
+ passes ``_numeric_sentinel_eligible`` (all integer and float Polars
690
+ dtypes). Defaults to an empty dict — columns with no declaration are
691
+ completely unaffected.
692
+
707
693
  Returns
708
694
  -------
709
695
  MappingProxyType[str, list[float]]
@@ -716,6 +702,16 @@ class ProfileConfig:
716
702
  """
717
703
  Get the per-column user-declared string sentinel declarations.
718
704
 
705
+ Keys are column names; values are lists of string values that should
706
+ be treated as effective nulls for that column (e.g.
707
+ ``{"status": ["N/A", "missing"]}``). Uses **replace semantics**: when
708
+ a declaration exists for a column, only the declared values are
709
+ matched (case-insensitive); the hardcoded defaults (``"NA"``,
710
+ ``"NAN"``, ``"NULL"``, ``"NONE"``, ``"?"``) are not applied for that
711
+ column. Empty/whitespace-only strings are always effective null
712
+ regardless of any declaration. Defaults to an empty dict — columns
713
+ with no declaration continue to use the hardcoded defaults unchanged.
714
+
719
715
  Returns
720
716
  -------
721
717
  MappingProxyType[str, list[str]]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 2.0.3
3
+ Version: 2.0.5
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
File without changes
File without changes
File without changes