dataforge-ml 0.9.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/PKG-INFO +3 -3
  2. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/README.md +2 -2
  3. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/pyproject.toml +1 -1
  4. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/config.py +0 -39
  5. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/structural.py +0 -2
  6. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml.egg-info/PKG-INFO +3 -3
  7. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/LICENSE +0 -0
  8. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/setup.cfg +0 -0
  9. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/__init__.py +0 -0
  10. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/models/__init__.py +0 -0
  11. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/models/_data_structure.py +0 -0
  12. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/models/_data_types.py +0 -0
  13. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/__init__.py +0 -0
  14. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_base.py +0 -0
  15. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_boolean_config.py +0 -0
  16. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_boolean_profiler.py +0 -0
  17. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_categorical.py +0 -0
  18. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_categorical_config.py +0 -0
  19. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_correlation_config.py +0 -0
  20. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_correlation_profiler.py +0 -0
  21. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_datetime_config.py +0 -0
  22. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_datetime_profiler.py +0 -0
  23. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_missingness_config.py +0 -0
  24. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_missingness_profiler.py +0 -0
  25. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_null_detection.py +0 -0
  26. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_numeric_config.py +0 -0
  27. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_numeric_profiler.py +0 -0
  28. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_tabular.py +0 -0
  29. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_target_config.py +0 -0
  30. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_target_profiler.py +0 -0
  31. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_text_config.py +0 -0
  32. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_text_profiler.py +0 -0
  33. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/profiling/_type_detector.py +0 -0
  34. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/splitting/__init__.py +0 -0
  35. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/splitting/_config.py +0 -0
  36. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/splitting/_splitter.py +0 -0
  37. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/utils/__init__.py +0 -0
  38. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml/utils/data_loader.py +0 -0
  39. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml.egg-info/SOURCES.txt +0 -0
  40. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml.egg-info/dependency_links.txt +0 -0
  41. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml.egg-info/requires.txt +0 -0
  42. {dataforge_ml-0.9.0 → dataforge_ml-0.10.0}/src/dataforge_ml.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -21,9 +21,9 @@ Provides-Extra: dev
21
21
  Requires-Dist: pytest>=8.0; extra == "dev"
22
22
  Dynamic: license-file
23
23
 
24
- # FeatureForge
24
+ # DataForgeML
25
25
 
26
- Automated feature engineering and data profiling pipeline library for tabular datasets.
26
+ Automated feature engineering and data profiling pipeline library for datasets.
27
27
 
28
28
  ## Installation
29
29
 
@@ -1,6 +1,6 @@
1
- # FeatureForge
1
+ # DataForgeML
2
2
 
3
- Automated feature engineering and data profiling pipeline library for tabular datasets.
3
+ Automated feature engineering and data profiling pipeline library for datasets.
4
4
 
5
5
  ## Installation
6
6
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dataforge-ml"
7
- version = "0.9.0"
7
+ version = "0.10.0"
8
8
  description = "A automated feature engineering and designing pipeline library"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -227,10 +227,6 @@ class ProfileConfig:
227
227
  Data modality. Currently only Tabular is implemented.
228
228
  target_column : Optional[str]
229
229
  Name of the label/target column, if any.
230
- column_overrides : dict[str, SemanticType]
231
- Explicit semantic type assignments that override auto-detection.
232
- exclude_columns : list[str]
233
- Columns to skip entirely during profiling.
234
230
  compute_correlation : bool
235
231
  Whether to compute the feature-feature correlation matrix.
236
232
  correlation_target_column : Optional[str]
@@ -243,47 +239,16 @@ class ProfileConfig:
243
239
 
244
240
  modality: Modality = Modality.Tabular
245
241
  target_columns: list[str] = field(default_factory=list)
246
- column_overrides: dict[str, SemanticType] = field(default_factory=dict)
247
- exclude_columns: list[str] = field(default_factory=list)
248
242
  compute_correlation: bool = False
249
243
  correlation_target_column: Optional[str] = None
250
244
  memory_threshold_mb: float = 500.0
251
245
  chunk_size: int = 100_000
252
246
 
253
- def set_column_type(self, column: str, semantic_type: Union[str, "SemanticType"]) -> None:
254
- """
255
- Explicitly set the semantic type for a column, overriding auto-detection.
256
-
257
- The override is the sole source of truth for that column's type — the
258
- type detector's verdict is ignored during profiling. Calling this method
259
- multiple times on the same column is valid; the last call wins.
260
-
261
- Parameters
262
- ----------
263
- column : str
264
- Name of the column to override.
265
- semantic_type : str | SemanticType
266
- Target semantic type. Accepts a plain string (e.g. ``"numeric"``,
267
- ``"categorical"``) or a ``SemanticType`` enum value. Invalid strings
268
- raise ``ValueError``.
269
- """
270
- if isinstance(semantic_type, str):
271
- try:
272
- semantic_type = SemanticType(semantic_type)
273
- except ValueError:
274
- valid = [e.value for e in SemanticType]
275
- raise ValueError(
276
- f"Unknown semantic type {semantic_type!r}. "
277
- f"Valid values: {valid}"
278
- )
279
- self.column_overrides[column] = semantic_type
280
247
 
281
248
  def to_dict(self) -> dict:
282
249
  return {
283
250
  "modality": str(self.modality),
284
251
  "target_columns": list(self.target_columns),
285
- "column_overrides": {k: str(v) for k, v in self.column_overrides.items()},
286
- "exclude_columns": list(self.exclude_columns),
287
252
  "compute_correlation": self.compute_correlation,
288
253
  "correlation_target_column": self.correlation_target_column,
289
254
  "memory_threshold_mb": self.memory_threshold_mb,
@@ -295,10 +260,6 @@ class ProfileConfig:
295
260
  return cls(
296
261
  modality=Modality(data.get("modality", Modality.Tabular)),
297
262
  target_columns=list(data.get("target_columns", [])),
298
- column_overrides={
299
- k: SemanticType(v) for k, v in data.get("column_overrides", {}).items()
300
- },
301
- exclude_columns=list(data.get("exclude_columns", [])),
302
263
  compute_correlation=bool(data.get("compute_correlation", False)),
303
264
  correlation_target_column=data.get("correlation_target_column"),
304
265
  memory_threshold_mb=float(data.get("memory_threshold_mb", 500.0)),
@@ -67,8 +67,6 @@ class StructuralProfiler:
67
67
 
68
68
  def __init__(self, config: PipelineConfig | None = None) -> None:
69
69
  self.config: PipelineConfig = config or PipelineConfig()
70
- # Keep sub-profilers aligned with the master column_overrides.
71
- self.config.profiling.column_overrides = self.config.column_overrides
72
70
 
73
71
  if self.config.profiling.modality == Modality.Tabular:
74
72
  self.modality_profiler: ModalityProfiler = TabularProfiler()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -21,9 +21,9 @@ Provides-Extra: dev
21
21
  Requires-Dist: pytest>=8.0; extra == "dev"
22
22
  Dynamic: license-file
23
23
 
24
- # FeatureForge
24
+ # DataForgeML
25
25
 
26
- Automated feature engineering and data profiling pipeline library for tabular datasets.
26
+ Automated feature engineering and data profiling pipeline library for datasets.
27
27
 
28
28
  ## Installation
29
29
 
File without changes
File without changes