dataforge-ml 2.0.7__tar.gz → 2.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {dataforge_ml-2.0.7/src/dataforge_ml.egg-info → dataforge_ml-2.0.8}/PKG-INFO +1 -1
  2. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/pyproject.toml +1 -1
  3. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_datetime_config.py +2 -70
  4. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_datetime_profiler.py +4 -55
  5. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8/src/dataforge_ml.egg-info}/PKG-INFO +1 -1
  6. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/LICENSE +0 -0
  7. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/README.md +0 -0
  8. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/setup.cfg +0 -0
  9. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/__init__.py +0 -0
  10. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/config.py +0 -0
  11. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/__init__.py +0 -0
  12. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/_config.py +0 -0
  13. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/_fitted_imputer.py +0 -0
  14. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/_numeric_imputer.py +0 -0
  15. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/_regression_estimator_factory.py +0 -0
  16. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/_strategy_router.py +0 -0
  17. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/_utils.py +0 -0
  18. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/imputation/orchestrator.py +0 -0
  19. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/models/__init__.py +0 -0
  20. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/models/_data_structure.py +0 -0
  21. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/models/_data_types.py +0 -0
  22. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/__init__.py +0 -0
  23. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_base.py +0 -0
  24. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_boolean_config.py +0 -0
  25. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_boolean_profiler.py +0 -0
  26. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_categorical.py +0 -0
  27. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_categorical_config.py +0 -0
  28. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_config.py +0 -0
  29. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_correlation_config.py +0 -0
  30. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_correlation_profiler.py +0 -0
  31. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_missingness_config.py +0 -0
  32. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_missingness_profiler.py +0 -0
  33. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_nonlinearity_profiler.py +0 -0
  34. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_numeric_config.py +0 -0
  35. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_numeric_profiler.py +0 -0
  36. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_tabular.py +0 -0
  37. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_target_config.py +0 -0
  38. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_target_profiler.py +0 -0
  39. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_text_config.py +0 -0
  40. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_text_profiler.py +0 -0
  41. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_type_detection_config.py +0 -0
  42. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/_type_detector.py +0 -0
  43. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/profiling/orchestrator.py +0 -0
  44. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/splitting/__init__.py +0 -0
  45. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/splitting/_config.py +0 -0
  46. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/splitting/_profile_signals.py +0 -0
  47. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/splitting/_splitter.py +0 -0
  48. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/utils/__init__.py +0 -0
  49. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/utils/_null_detection.py +0 -0
  50. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/utils/_null_normalization.py +0 -0
  51. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml/utils/data_loader.py +0 -0
  52. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml.egg-info/SOURCES.txt +0 -0
  53. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml.egg-info/dependency_links.txt +0 -0
  54. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml.egg-info/requires.txt +0 -0
  55. {dataforge_ml-2.0.7 → dataforge_ml-2.0.8}/src/dataforge_ml.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 2.0.7
3
+ Version: 2.0.8
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dataforge-ml"
7
- version = "2.0.7"
7
+ version = "2.0.8"
8
8
  description = "A automated feature engineering and designing pipeline library"
9
9
  readme = "README.md"
10
10
  requires-python = ">3.10"
@@ -31,77 +31,12 @@ class DatetimeFlag(StrEnum):
31
31
  FormatMismatch = "format_mismatch"
32
32
 
33
33
 
34
- @dataclass
35
- class TemporalSignals:
36
- """Which time-component features are present in a Datetime column.
37
-
38
- Each boolean field indicates that the corresponding granularity was
39
- detected as non-constant, making it a candidate for feature extraction
40
- in Phase 5 Encoding.
41
- """
42
-
43
- has_year: bool = False
44
- has_month: bool = False
45
- has_day: bool = False
46
- has_day_of_week: bool = False
47
- has_hour: bool = False
48
- has_is_weekend: bool = False
49
- has_is_month_end: bool = False
50
-
51
- def extractable_features(self) -> list[str]:
52
- """Return the names of all time-component features that can be extracted.
53
-
54
- Returns
55
- -------
56
- list[str]
57
- Feature names corresponding to every ``has_*`` field that is
58
- ``True``. An empty list means no temporal variation was detected.
59
- """
60
- features = []
61
- if self.has_year:
62
- features.append("year")
63
- if self.has_month:
64
- features.append("month")
65
- if self.has_day:
66
- features.append("day_of_month")
67
- if self.has_day_of_week:
68
- features.append("day_of_week")
69
- if self.has_hour:
70
- features.append("hour")
71
- if self.has_is_weekend:
72
- features.append("is_weekend")
73
- if self.has_is_month_end:
74
- features.append("is_month_end")
75
- return features
76
-
77
- def to_dict(self) -> dict:
78
- """Serialise the temporal signals to a plain dictionary.
79
-
80
- Returns
81
- -------
82
- dict
83
- All ``has_*`` flags plus an ``extractable_features`` key
84
- containing the result of :meth:`extractable_features`.
85
- """
86
- return {
87
- "has_year": self.has_year,
88
- "has_month": self.has_month,
89
- "has_day": self.has_day,
90
- "has_day_of_week": self.has_day_of_week,
91
- "has_hour": self.has_hour,
92
- "has_is_weekend": self.has_is_weekend,
93
- "has_is_month_end": self.has_is_month_end,
94
- "extractable_features": self.extractable_features(),
95
- }
96
-
97
-
98
34
  @dataclass
99
35
  class DatetimeStats:
100
36
  """Statistical summary of a single Datetime column.
101
37
 
102
38
  Produced by ``DatetimeProfiler`` for each opted-in column. Stores
103
- range, gap regularity, inferred granularity, and ``TemporalSignals``
104
- indicating which time components are available for feature extraction.
39
+ range, gap regularity, and inferred granularity.
105
40
  """
106
41
 
107
42
  min_date: Optional[str] = None
@@ -111,7 +46,6 @@ class DatetimeStats:
111
46
  inferred_granularity: Optional[InferredGranularity] = None
112
47
  median_gap_seconds: Optional[float] = None
113
48
  gap_cv: Optional[float] = None
114
- signals: TemporalSignals = field(default_factory=TemporalSignals)
115
49
  flags: list[DatetimeFlag] = field(default_factory=list)
116
50
 
117
51
  def has_flag(self, flag: DatetimeFlag) -> bool:
@@ -137,8 +71,7 @@ class DatetimeStats:
137
71
  -------
138
72
  dict
139
73
  All fields keyed by field name. ``inferred_granularity`` is
140
- serialised as its string value; ``signals`` is expanded via
141
- :meth:`TemporalSignals.to_dict`; ``flags`` are serialised as
74
+ serialised as its string value; ``flags`` are serialised as
142
75
  their string values.
143
76
  """
144
77
  return {
@@ -149,7 +82,6 @@ class DatetimeStats:
149
82
  "inferred_granularity": str(self.inferred_granularity) if self.inferred_granularity else None,
150
83
  "median_gap_seconds": self.median_gap_seconds,
151
84
  "gap_cv": self.gap_cv,
152
- "signals": self.signals.to_dict(),
153
85
  "flags": [str(f) for f in self.flags],
154
86
  }
155
87
 
@@ -7,9 +7,10 @@ Per-column metrics (opt-in via ProfileConfig.datetime_columns):
7
7
  3. Future dates – count of values > now, with context note
8
8
  4. Granularity – inferred periodicity from median consecutive gap;
9
9
  high gap-CV flagged as irregular
10
- 5. Temporal signals – audit which of {year, month, day, day-of-week,
11
- hour, is-weekend, is-month-end} vary in the data,
12
- to guide downstream feature engineering
10
+
11
+ Temporal-component variance (whether year, month, day-of-week, etc. vary) is
12
+ intentionally not profiled here; Phase 5 Encoding derives it on demand from
13
+ the column.
13
14
 
14
15
  Granularity inference bands (median gap in seconds):
15
16
  < 90 s → secondly
@@ -34,7 +35,6 @@ from ._datetime_config import (
34
35
  DatetimeStats,
35
36
  InferredGranularity,
36
37
  DatetimeFlag,
37
- TemporalSignals,
38
38
  )
39
39
 
40
40
  # Granularity bands — upper bound (exclusive) in seconds for each label.
@@ -226,9 +226,6 @@ class DatetimeProfiler(ColumnBatchProfiler[DatetimeProfileResult]):
226
226
  # 6. Granularity
227
227
  self._infer_granularity(clean, profile)
228
228
 
229
- # 7. Temporal signals
230
- self._audit_temporal_signals(clean, profile)
231
-
232
229
  return profile
233
230
 
234
231
  # ------------------------------------------------------------------
@@ -377,51 +374,3 @@ class DatetimeProfiler(ColumnBatchProfiler[DatetimeProfileResult]):
377
374
  break
378
375
 
379
376
  profile.inferred_granularity = granularity
380
-
381
- # ------------------------------------------------------------------
382
- # Step 7: Temporal signal audit
383
- # ------------------------------------------------------------------
384
-
385
- @staticmethod
386
- def _audit_temporal_signals(
387
- clean: pl.Series,
388
- profile: DatetimeStats,
389
- ) -> None:
390
- """
391
- Check which temporal features vary across rows.
392
-
393
- All checks are done via Polars expressions on the full clean series,
394
- so no Python-level loops are required.
395
- """
396
- signals = TemporalSignals()
397
-
398
- years = clean.dt.year()
399
- months = clean.dt.month()
400
- days = clean.dt.day()
401
- dow = clean.dt.weekday() # 0=Monday … 6=Sunday
402
- hours = clean.dt.hour()
403
-
404
- signals.has_year = years.n_unique() > 1
405
- signals.has_month = months.n_unique() > 1
406
- signals.has_day = days.n_unique() > 1
407
- signals.has_day_of_week = dow.n_unique() > 1
408
- signals.has_hour = int(hours.max()) > 0 # type: ignore[arg-type]
409
-
410
- # Weekend signal is only meaningful when day-of-week varies
411
- if signals.has_day_of_week:
412
- weekend_mask = dow >= 5 # Saturday=5, Sunday=6
413
- signals.has_is_weekend = bool(weekend_mask.any())
414
-
415
- # Month-end: day == last day of the respective month
416
- try:
417
- month_end_ts = clean.dt.month_end()
418
- is_month_end_mask = (
419
- (clean.dt.year() == month_end_ts.dt.year())
420
- & (clean.dt.month() == month_end_ts.dt.month())
421
- & (clean.dt.day() == month_end_ts.dt.day())
422
- )
423
- signals.has_is_month_end = bool(is_month_end_mask.any())
424
- except Exception:
425
- signals.has_is_month_end = False
426
-
427
- profile.signals = signals
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-ml
3
- Version: 2.0.7
3
+ Version: 2.0.8
4
4
  Summary: A automated feature engineering and designing pipeline library
5
5
  License: MIT
6
6
  Classifier: License :: OSI Approved :: MIT License
File without changes
File without changes
File without changes