dataforge-ml 2.0.4__tar.gz → 2.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataforge_ml-2.0.4/src/dataforge_ml.egg-info → dataforge_ml-2.0.5}/PKG-INFO +1 -1
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/pyproject.toml +1 -1
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/config.py +3 -14
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_config.py +17 -21
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5/src/dataforge_ml.egg-info}/PKG-INFO +1 -1
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/LICENSE +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/README.md +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/setup.cfg +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/__init__.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/__init__.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_fitted_imputer.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_numeric_imputer.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_regression_estimator_factory.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_strategy_router.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/_utils.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/imputation/orchestrator.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/models/__init__.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/models/_data_structure.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/models/_data_types.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/__init__.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_base.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_boolean_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_boolean_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_categorical.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_categorical_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_correlation_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_correlation_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_datetime_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_datetime_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_missingness_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_missingness_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_nonlinearity_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_numeric_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_numeric_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_tabular.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_target_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_target_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_text_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_text_profiler.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_type_detection_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_type_detector.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/orchestrator.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/__init__.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/_config.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/_profile_signals.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/splitting/_splitter.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/__init__.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/_null_detection.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/_null_normalization.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/utils/data_loader.py +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/SOURCES.txt +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/dependency_links.txt +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/requires.txt +0 -0
- {dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml.egg-info/top_level.txt +0 -0
|
@@ -86,20 +86,6 @@ class PipelineConfig:
|
|
|
86
86
|
Single seed for all stochastic pipeline operations, including GMM
|
|
87
87
|
Sampling during bimodal imputation. None produces non-deterministic
|
|
88
88
|
output.
|
|
89
|
-
|
|
90
|
-
Attributes
|
|
91
|
-
----------
|
|
92
|
-
exclude_columns : tuple[str, ...]
|
|
93
|
-
Hard exclusions — columns dropped globally from every phase.
|
|
94
|
-
phase_exclusions : MappingProxyType[PipelinePhase, tuple[str, ...]]
|
|
95
|
-
Soft exclusions — columns bypassed for a specific phase but retained
|
|
96
|
-
in the dataset.
|
|
97
|
-
column_overrides : MappingProxyType[str, SemanticType]
|
|
98
|
-
Explicit semantic type assignments respected by all downstream phases.
|
|
99
|
-
numeric_kind_overrides : MappingProxyType[str, NumericKind]
|
|
100
|
-
Explicit ``NumericKind`` assignments for individual columns, applied
|
|
101
|
-
after auto-detection in Phase 1. Only valid for columns whose final
|
|
102
|
-
``SemanticType`` is ``Numeric``; raises at orchestrator time otherwise.
|
|
103
89
|
"""
|
|
104
90
|
|
|
105
91
|
_exclude_columns: list[str] = field(default_factory=list, init=False)
|
|
@@ -150,6 +136,9 @@ class PipelineConfig:
|
|
|
150
136
|
def numeric_kind_overrides(self) -> "MappingProxyType[str, NumericKind]":
|
|
151
137
|
"""Explicit NumericKind assignments for individual columns, applied after auto-detection in Phase 1.
|
|
152
138
|
|
|
139
|
+
Only valid for columns whose final ``SemanticType`` is ``Numeric``;
|
|
140
|
+
raises at orchestrator time otherwise.
|
|
141
|
+
|
|
153
142
|
Returns
|
|
154
143
|
-------
|
|
155
144
|
MappingProxyType[str, NumericKind]
|
|
@@ -628,27 +628,6 @@ class ProfileConfig:
|
|
|
628
628
|
Default ``False``.
|
|
629
629
|
nonlinearity : NonlinearityProfileConfig
|
|
630
630
|
Threshold configuration for the nonlinearity sub-processor.
|
|
631
|
-
|
|
632
|
-
Attributes
|
|
633
|
-
----------
|
|
634
|
-
numeric_sentinels : MappingProxyType[str, list[float]]
|
|
635
|
-
Per-column numeric sentinel declarations. Keys are column names;
|
|
636
|
-
values are lists of float-compatible sentinel values that should be
|
|
637
|
-
treated as effective nulls (e.g. ``{"age": [-999.0, 9999.0]}``).
|
|
638
|
-
Applies to any column whose dtype passes ``_numeric_sentinel_eligible``
|
|
639
|
-
(all integer and float Polars dtypes). Defaults to an empty dict —
|
|
640
|
-
columns with no declaration are completely unaffected.
|
|
641
|
-
string_sentinels : MappingProxyType[str, list[str]]
|
|
642
|
-
Per-column user-declared string sentinel declarations. Keys are column
|
|
643
|
-
names; values are lists of string values that should be treated as
|
|
644
|
-
effective nulls for that column (e.g.
|
|
645
|
-
``{"status": ["N/A", "missing"]}``). Uses **replace semantics**: when
|
|
646
|
-
a declaration exists for a column, only the declared values are matched
|
|
647
|
-
(case-insensitive); the hardcoded defaults (``"NA"``, ``"NAN"``,
|
|
648
|
-
``"NULL"``, ``"NONE"``, ``"?"``) are not applied for that column.
|
|
649
|
-
Empty/whitespace-only strings are always effective null regardless of
|
|
650
|
-
any declaration. Defaults to an empty dict — columns with no
|
|
651
|
-
declaration continue to use the hardcoded defaults unchanged.
|
|
652
631
|
"""
|
|
653
632
|
|
|
654
633
|
modality: Modality = Modality.Tabular
|
|
@@ -704,6 +683,13 @@ class ProfileConfig:
|
|
|
704
683
|
"""
|
|
705
684
|
Get the per-column numeric sentinel declarations.
|
|
706
685
|
|
|
686
|
+
Keys are column names; values are lists of float-compatible sentinel
|
|
687
|
+
values that should be treated as effective nulls (e.g.
|
|
688
|
+
``{"age": [-999.0, 9999.0]}``). Applies to any column whose dtype
|
|
689
|
+
passes ``_numeric_sentinel_eligible`` (all integer and float Polars
|
|
690
|
+
dtypes). Defaults to an empty dict — columns with no declaration are
|
|
691
|
+
completely unaffected.
|
|
692
|
+
|
|
707
693
|
Returns
|
|
708
694
|
-------
|
|
709
695
|
MappingProxyType[str, list[float]]
|
|
@@ -716,6 +702,16 @@ class ProfileConfig:
|
|
|
716
702
|
"""
|
|
717
703
|
Get the per-column user-declared string sentinel declarations.
|
|
718
704
|
|
|
705
|
+
Keys are column names; values are lists of string values that should
|
|
706
|
+
be treated as effective nulls for that column (e.g.
|
|
707
|
+
``{"status": ["N/A", "missing"]}``). Uses **replace semantics**: when
|
|
708
|
+
a declaration exists for a column, only the declared values are
|
|
709
|
+
matched (case-insensitive); the hardcoded defaults (``"NA"``,
|
|
710
|
+
``"NAN"``, ``"NULL"``, ``"NONE"``, ``"?"``) are not applied for that
|
|
711
|
+
column. Empty/whitespace-only strings are always effective null
|
|
712
|
+
regardless of any declaration. Defaults to an empty dict — columns
|
|
713
|
+
with no declaration continue to use the hardcoded defaults unchanged.
|
|
714
|
+
|
|
719
715
|
Returns
|
|
720
716
|
-------
|
|
721
717
|
MappingProxyType[str, list[str]]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_correlation_profiler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_missingness_profiler.py
RENAMED
|
File without changes
|
{dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_nonlinearity_profiler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dataforge_ml-2.0.4 → dataforge_ml-2.0.5}/src/dataforge_ml/profiling/_type_detection_config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|