featkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- featkit/__init__.py +1 -0
- featkit/builders/.gitkeep +0 -0
- featkit/builders/__init__.py +0 -0
- featkit/builders/distributional_space.py +77 -0
- featkit/builders/pivot_space.py +102 -0
- featkit/builders/temporal_space.py +86 -0
- featkit/config.py +38 -0
- featkit/contracts/__init__.py +1 -0
- featkit/contracts/measurement/.gitkeep +0 -0
- featkit/contracts/measurement/__init__.py +27 -0
- featkit/contracts/measurement/base.py +47 -0
- featkit/contracts/measurement/defaults.py +117 -0
- featkit/contracts/output/.gitkeep +0 -0
- featkit/contracts/output/__init__.py +19 -0
- featkit/contracts/output/base.py +36 -0
- featkit/contracts/output/defaults.py +80 -0
- featkit/dataset/.gitkeep +0 -0
- featkit/dataset/__init__.py +0 -0
- featkit/dataset/base.py +120 -0
- featkit/enums.py +110 -0
- featkit/fields/.gitkeep +0 -0
- featkit/fields/__init__.py +9 -0
- featkit/fields/base.py +48 -0
- featkit/fields/categorical_field.py +55 -0
- featkit/fields/id_field.py +14 -0
- featkit/fields/measurement_field.py +42 -0
- featkit/fields/time_field.py +43 -0
- featkit/generators/__init__.py +0 -0
- featkit/generators/base.py +171 -0
- featkit/generators/output.py +118 -0
- featkit/generators/pyspark/.gitkeep +0 -0
- featkit/generators/pyspark/__init__.py +0 -0
- featkit/generators/pyspark/databricks.py +448 -0
- featkit/generators/sql/.gitkeep +0 -0
- featkit/generators/sql/__init__.py +0 -0
- featkit/generators/sql/base.py +496 -0
- featkit/generators/sql/databricks.py +19 -0
- featkit/generators/sql/snowflake.py +19 -0
- featkit/generators/sql/spark_sql.py +19 -0
- featkit/layer2/.gitkeep +0 -0
- featkit/layer2/__init__.py +0 -0
- featkit/layer2/base.py +86 -0
- featkit/layer2/distributional.py +51 -0
- featkit/layer2/pivoted.py +63 -0
- featkit/layer3/.gitkeep +0 -0
- featkit/layer3/__init__.py +0 -0
- featkit/layer3/temporal_feature.py +87 -0
- featkit/pipeline.py +63 -0
- featkit-0.1.0.dist-info/METADATA +140 -0
- featkit-0.1.0.dist-info/RECORD +52 -0
- featkit-0.1.0.dist-info/WHEEL +4 -0
- featkit-0.1.0.dist-info/licenses/LICENSE +21 -0
featkit/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""featkit — automated feature store generation from relational facts tables."""
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""DistributionalSpaceBuilder — generates all DistributionalColumn objects from a dataset."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import cast
|
|
6
|
+
|
|
7
|
+
from featkit.contracts.measurement.defaults import get_default_contract
|
|
8
|
+
from featkit.dataset.base import AbstractDataset
|
|
9
|
+
from featkit.enums import CategoricalTreatment
|
|
10
|
+
from featkit.fields.categorical_field import CategoricalField
|
|
11
|
+
from featkit.fields.measurement_field import MeasurementField
|
|
12
|
+
from featkit.layer2.distributional import DistributionalColumn
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DistributionalSpaceBuilder:
|
|
16
|
+
"""Generates the full set of DistributionalColumn objects for a dataset.
|
|
17
|
+
|
|
18
|
+
For each DISTRIBUTIONAL- or BOTH-treatment categorical, each measurement
|
|
19
|
+
field, each contract-valid aggregator, and each distributional metric
|
|
20
|
+
declared on the categorical, one ``DistributionalColumn`` is produced.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
dataset: The source facts-table schema.
|
|
24
|
+
value_measurements: Restrict which measurement fields are used as the
|
|
25
|
+
value source. ``None`` means use all measurement fields in the dataset.
|
|
26
|
+
An empty list produces no columns. Every entry must be present in the
|
|
27
|
+
dataset (compared by name, type, and contract); a ``ValueError`` is
|
|
28
|
+
raised for unknown fields.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
dataset: AbstractDataset,
|
|
34
|
+
value_measurements: list[MeasurementField] | None = None,
|
|
35
|
+
) -> None:
|
|
36
|
+
self.dataset = dataset
|
|
37
|
+
self.value_measurements = value_measurements
|
|
38
|
+
|
|
39
|
+
def build(self) -> list[DistributionalColumn]:
|
|
40
|
+
"""Build and return all DistributionalColumn objects."""
|
|
41
|
+
all_cats = [cast(CategoricalField, f) for f in self.dataset.categorical_fields]
|
|
42
|
+
dist_cats = [
|
|
43
|
+
c
|
|
44
|
+
for c in all_cats
|
|
45
|
+
if c.treatment in {CategoricalTreatment.DISTRIBUTIONAL, CategoricalTreatment.BOTH}
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
dataset_measurements = [cast(MeasurementField, f) for f in self.dataset.measurement_fields]
|
|
49
|
+
|
|
50
|
+
if self.value_measurements is not None:
|
|
51
|
+
measurements: list[MeasurementField] = []
|
|
52
|
+
for requested in self.value_measurements:
|
|
53
|
+
matched = next((m for m in dataset_measurements if m == requested), None)
|
|
54
|
+
if matched is None:
|
|
55
|
+
raise ValueError(
|
|
56
|
+
f"MeasurementField {requested.name!r} "
|
|
57
|
+
f"(type {requested.measurement_type.name}) is not present in the dataset"
|
|
58
|
+
)
|
|
59
|
+
measurements.append(matched)
|
|
60
|
+
else:
|
|
61
|
+
measurements = dataset_measurements
|
|
62
|
+
|
|
63
|
+
results: list[DistributionalColumn] = []
|
|
64
|
+
seen: set[str] = set()
|
|
65
|
+
|
|
66
|
+
for cat in dist_cats:
|
|
67
|
+
for mf in measurements:
|
|
68
|
+
contract = mf.contract or get_default_contract(mf.measurement_type)
|
|
69
|
+
aggs = sorted(contract.valid_layer2_aggregators, key=lambda a: a.value)
|
|
70
|
+
for agg in aggs:
|
|
71
|
+
for metric in cat.distributional_metrics:
|
|
72
|
+
col = DistributionalColumn(mf, agg, cat, metric)
|
|
73
|
+
if col.column_name not in seen:
|
|
74
|
+
seen.add(col.column_name)
|
|
75
|
+
results.append(col)
|
|
76
|
+
|
|
77
|
+
return results
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""PivotSpaceBuilder — generates all PivotedColumn objects from a dataset."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from itertools import product
|
|
7
|
+
from typing import cast
|
|
8
|
+
|
|
9
|
+
from featkit.contracts.measurement.defaults import get_default_contract
|
|
10
|
+
from featkit.dataset.base import AbstractDataset
|
|
11
|
+
from featkit.enums import CategoricalTreatment, Layer2Aggregator, MeasurementType
|
|
12
|
+
from featkit.fields.categorical_field import CategoricalField
|
|
13
|
+
from featkit.fields.measurement_field import MeasurementField
|
|
14
|
+
from featkit.layer2.pivoted import PivotedColumn
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PivotSpaceBuilder:
|
|
18
|
+
"""Generates the full set of PivotedColumn objects for a dataset.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
dataset: The source facts-table schema.
|
|
22
|
+
include_marginals: When True, each categorical domain is augmented with
|
|
23
|
+
None (the ∅ marginal), producing one column per ∅-substituted
|
|
24
|
+
combination in addition to the fully-specified ones.
|
|
25
|
+
aggregators_override: Per-measurement-type override list. Only aggregators
|
|
26
|
+
that are also contract-valid for the measurement type are used.
|
|
27
|
+
domain_resolver: Callable invoked to resolve the domain of a categorical
|
|
28
|
+
whose ``allowed_values`` is None. Must return a list of string values.
|
|
29
|
+
Raises ``ValueError`` at build time if not provided for such a field.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
dataset: AbstractDataset,
|
|
35
|
+
include_marginals: bool = True,
|
|
36
|
+
aggregators_override: dict[MeasurementType, list[Layer2Aggregator]] | None = None,
|
|
37
|
+
domain_resolver: Callable[[CategoricalField], list[str]] | None = None,
|
|
38
|
+
) -> None:
|
|
39
|
+
self.dataset = dataset
|
|
40
|
+
self.include_marginals = include_marginals
|
|
41
|
+
self.aggregators_override = aggregators_override
|
|
42
|
+
self.domain_resolver = domain_resolver
|
|
43
|
+
|
|
44
|
+
def build(self) -> list[PivotedColumn]:
|
|
45
|
+
"""Build and return all PivotedColumn objects."""
|
|
46
|
+
all_cats = [cast(CategoricalField, f) for f in self.dataset.categorical_fields]
|
|
47
|
+
pivot_cats = [
|
|
48
|
+
c
|
|
49
|
+
for c in all_cats
|
|
50
|
+
if c.treatment in {CategoricalTreatment.PIVOT, CategoricalTreatment.BOTH}
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
cat_domains: dict[CategoricalField, list[str | None]] = {}
|
|
54
|
+
for cat in pivot_cats:
|
|
55
|
+
if cat.allowed_values is not None:
|
|
56
|
+
raw: list[str] = list(cat.allowed_values)
|
|
57
|
+
elif self.domain_resolver is not None:
|
|
58
|
+
raw = list(self.domain_resolver(cat))
|
|
59
|
+
else:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
f"CategoricalField {cat.name!r} has no allowed_values and no "
|
|
62
|
+
f"domain_resolver was provided"
|
|
63
|
+
)
|
|
64
|
+
if any(v is None for v in raw):
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"CategoricalField {cat.name!r}: resolved domain contains None; "
|
|
67
|
+
f"None is reserved as the \u2205 marginal sentinel"
|
|
68
|
+
)
|
|
69
|
+
domain: list[str | None] = list(raw)
|
|
70
|
+
if self.include_marginals:
|
|
71
|
+
domain = domain + [None]
|
|
72
|
+
cat_domains[cat] = domain
|
|
73
|
+
|
|
74
|
+
measurements = [cast(MeasurementField, f) for f in self.dataset.measurement_fields]
|
|
75
|
+
|
|
76
|
+
cats = list(cat_domains.keys())
|
|
77
|
+
combos = product(*(cat_domains[c] for c in cats)) if cats else ((),)
|
|
78
|
+
|
|
79
|
+
results: list[PivotedColumn] = []
|
|
80
|
+
seen: dict[str, PivotedColumn] = {}
|
|
81
|
+
|
|
82
|
+
for combo in combos:
|
|
83
|
+
cat_combination = {cats[i]: combo[i] for i in range(len(cats))} if cats else {}
|
|
84
|
+
for mf in measurements:
|
|
85
|
+
for agg in self._valid_aggregators(mf):
|
|
86
|
+
col = PivotedColumn(mf, agg, cat_combination)
|
|
87
|
+
if col.column_name in seen:
|
|
88
|
+
raise ValueError(
|
|
89
|
+
f"Duplicate pivot column name generated: {col.column_name!r}. "
|
|
90
|
+
f"Conflicting columns: {seen[col.column_name]!r} and {col!r}"
|
|
91
|
+
)
|
|
92
|
+
seen[col.column_name] = col
|
|
93
|
+
results.append(col)
|
|
94
|
+
|
|
95
|
+
return results
|
|
96
|
+
|
|
97
|
+
def _valid_aggregators(self, mf: MeasurementField) -> list[Layer2Aggregator]:
|
|
98
|
+
contract = mf.contract or get_default_contract(mf.measurement_type)
|
|
99
|
+
valid = contract.valid_layer2_aggregators
|
|
100
|
+
if self.aggregators_override and mf.measurement_type in self.aggregators_override:
|
|
101
|
+
return [a for a in self.aggregators_override[mf.measurement_type] if a in valid]
|
|
102
|
+
return sorted(valid, key=lambda a: a.value)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""TemporalSpaceBuilder — generates all TemporalFeature objects from Layer 2 columns."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Sequence
|
|
6
|
+
|
|
7
|
+
from featkit.enums import Layer2OutputType, TemporalOperator, TimeWindowDirection
|
|
8
|
+
from featkit.layer2.base import AbstractLayer2Column
|
|
9
|
+
from featkit.layer3.temporal_feature import _POINT_IN_TIME_OPERATORS, TemporalFeature
|
|
10
|
+
|
|
11
|
+
#: Operators that require composed (MEDIA_ABS / RATIO) window sizes.
|
|
12
|
+
_COMPOSED_OPERATORS: frozenset[TemporalOperator] = frozenset(
|
|
13
|
+
{TemporalOperator.MEDIA_ABS, TemporalOperator.RATIO}
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TemporalSpaceBuilder:
|
|
18
|
+
"""Generates the full set of TemporalFeature objects from a list of Layer 2 columns.
|
|
19
|
+
|
|
20
|
+
For each Layer 2 column, the operator set is taken from the column's output
|
|
21
|
+
contract (or the per-output-type override). Each operator is then paired with
|
|
22
|
+
the appropriate window sizes:
|
|
23
|
+
|
|
24
|
+
- Point-in-time operators (``ULT_MES``, ``PREV_MES``, ``REC``): ``window_size=None``
|
|
25
|
+
- ``MEDIA_ABS`` / ``RATIO``: ``composed_windows`` (skipped when ``None``)
|
|
26
|
+
- All other windowed operators: ``time_windows``
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
layer2_columns: All Layer 2 columns to build features from.
|
|
30
|
+
time_windows: Window sizes for standard windowed operators.
|
|
31
|
+
composed_windows: Window sizes for ``MEDIA_ABS`` and ``RATIO``. When
|
|
32
|
+
``None``, those operators are omitted entirely.
|
|
33
|
+
direction: Sliding-window direction applied to every feature.
|
|
34
|
+
operators_override: Per-output-type override. Only operators that are
|
|
35
|
+
also contract-valid for the column's output type are used.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
layer2_columns: list[AbstractLayer2Column],
|
|
41
|
+
time_windows: list[int],
|
|
42
|
+
composed_windows: list[int] | None = None,
|
|
43
|
+
direction: TimeWindowDirection = TimeWindowDirection.BACKWARD,
|
|
44
|
+
operators_override: dict[Layer2OutputType, list[TemporalOperator]] | None = None,
|
|
45
|
+
) -> None:
|
|
46
|
+
self.layer2_columns = layer2_columns
|
|
47
|
+
self.time_windows = time_windows
|
|
48
|
+
self.composed_windows = composed_windows
|
|
49
|
+
self.direction = direction
|
|
50
|
+
self.operators_override = operators_override
|
|
51
|
+
|
|
52
|
+
def build(self) -> list[TemporalFeature]:
|
|
53
|
+
"""Build and return all TemporalFeature objects."""
|
|
54
|
+
results: list[TemporalFeature] = []
|
|
55
|
+
seen: set[str] = set()
|
|
56
|
+
|
|
57
|
+
for col in self.layer2_columns:
|
|
58
|
+
if self.operators_override and col.output_type in self.operators_override:
|
|
59
|
+
operators = [
|
|
60
|
+
op
|
|
61
|
+
for op in self.operators_override[col.output_type]
|
|
62
|
+
if col.output_contract.is_valid(op)
|
|
63
|
+
]
|
|
64
|
+
else:
|
|
65
|
+
operators = sorted(
|
|
66
|
+
col.output_contract.valid_temporal_operators, key=lambda o: o.value
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
for op in operators:
|
|
70
|
+
window_sizes: Sequence[int | None]
|
|
71
|
+
if op in _COMPOSED_OPERATORS:
|
|
72
|
+
if self.composed_windows is None:
|
|
73
|
+
continue
|
|
74
|
+
window_sizes = list(self.composed_windows)
|
|
75
|
+
elif op in _POINT_IN_TIME_OPERATORS:
|
|
76
|
+
window_sizes = [None]
|
|
77
|
+
else:
|
|
78
|
+
window_sizes = list(self.time_windows)
|
|
79
|
+
|
|
80
|
+
for ws in window_sizes:
|
|
81
|
+
feat = TemporalFeature(col, op, self.direction, window_size=ws)
|
|
82
|
+
if feat.column_name not in seen:
|
|
83
|
+
seen.add(feat.column_name)
|
|
84
|
+
results.append(feat)
|
|
85
|
+
|
|
86
|
+
return results
|
featkit/config.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""FeatureStoreConfig — top-level configuration object for a feature store pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
from featkit.dataset.base import AbstractDataset
|
|
8
|
+
from featkit.enums import Layer2Aggregator, Layer2OutputType, MeasurementType, TemporalOperator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class FeatureStoreConfig:
|
|
13
|
+
"""Configuration for a FeatureStorePipeline run.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
dataset: The source facts-table schema.
|
|
17
|
+
output_schema: Destination schema (database/schema) for generated tables.
|
|
18
|
+
output_table_prefix: Prefix applied to every output table name.
|
|
19
|
+
time_windows: Window sizes (in granularity units) for standard windowed
|
|
20
|
+
temporal operators.
|
|
21
|
+
composed_windows: Window sizes for ``MEDIA_ABS`` and ``RATIO`` operators.
|
|
22
|
+
When ``None`` those operators are omitted entirely.
|
|
23
|
+
include_marginals: When ``True``, ``PivotSpaceBuilder`` includes the ∅
|
|
24
|
+
marginal combination for each categorical.
|
|
25
|
+
aggregators_override: Per-measurement-type override for Layer 2
|
|
26
|
+
aggregators. Only contract-valid aggregators are used.
|
|
27
|
+
operators_override: Per-output-type override for temporal operators.
|
|
28
|
+
Only contract-valid operators are used.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
dataset: AbstractDataset
|
|
32
|
+
output_schema: str
|
|
33
|
+
output_table_prefix: str
|
|
34
|
+
time_windows: list[int]
|
|
35
|
+
composed_windows: list[int] | None = None
|
|
36
|
+
include_marginals: bool = True
|
|
37
|
+
aggregators_override: dict[MeasurementType, list[Layer2Aggregator]] | None = None
|
|
38
|
+
operators_override: dict[Layer2OutputType, list[TemporalOperator]] | None = field(default=None)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Type boundary contracts for featkit layers."""
|
|
File without changes
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Layer 1 → Layer 2 measurement type contracts."""
|
|
2
|
+
|
|
3
|
+
from featkit.contracts.measurement.base import AbstractMeasurementTypeContract
|
|
4
|
+
from featkit.contracts.measurement.defaults import (
|
|
5
|
+
BalanceContract,
|
|
6
|
+
CantidadContract,
|
|
7
|
+
EstadisticoContract,
|
|
8
|
+
FechaContract,
|
|
9
|
+
FlagContract,
|
|
10
|
+
MontoContract,
|
|
11
|
+
TicketContract,
|
|
12
|
+
TimeDiffContract,
|
|
13
|
+
get_default_contract,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"AbstractMeasurementTypeContract",
|
|
18
|
+
"BalanceContract",
|
|
19
|
+
"CantidadContract",
|
|
20
|
+
"EstadisticoContract",
|
|
21
|
+
"FechaContract",
|
|
22
|
+
"FlagContract",
|
|
23
|
+
"MontoContract",
|
|
24
|
+
"TicketContract",
|
|
25
|
+
"TimeDiffContract",
|
|
26
|
+
"get_default_contract",
|
|
27
|
+
]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Abstract base for Layer 1 → Layer 2 type boundary contracts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
from featkit.enums import Layer2Aggregator, MeasurementType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AbstractMeasurementTypeContract(ABC):
|
|
11
|
+
"""Governs which Layer2Aggregators are semantically valid for a MeasurementType.
|
|
12
|
+
|
|
13
|
+
This is the Layer 1 → Layer 2 type boundary: the contract ensures that only
|
|
14
|
+
aggregators that make semantic sense for the measurement are applied.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, measurement_type: MeasurementType) -> None:
|
|
18
|
+
self._measurement_type = measurement_type
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def measurement_type(self) -> MeasurementType:
|
|
22
|
+
"""The MeasurementType this contract governs (read-only)."""
|
|
23
|
+
return self._measurement_type
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
28
|
+
"""Frozen set of aggregators that are semantically valid for this measurement type."""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
def is_valid(self, aggregator: Layer2Aggregator) -> bool:
|
|
32
|
+
"""Return ``True`` if the aggregator is permitted by this contract."""
|
|
33
|
+
return aggregator in self.valid_layer2_aggregators
|
|
34
|
+
|
|
35
|
+
def _key(self) -> tuple[object, ...]:
|
|
36
|
+
return (self.measurement_type, self.valid_layer2_aggregators)
|
|
37
|
+
|
|
38
|
+
def __eq__(self, other: object) -> bool:
|
|
39
|
+
if not isinstance(other, AbstractMeasurementTypeContract):
|
|
40
|
+
return NotImplemented
|
|
41
|
+
return type(self) is type(other) and self._key() == other._key()
|
|
42
|
+
|
|
43
|
+
def __hash__(self) -> int:
|
|
44
|
+
return hash((type(self), self._key()))
|
|
45
|
+
|
|
46
|
+
def __repr__(self) -> str:
|
|
47
|
+
return f"{type(self).__name__}(measurement_type={self.measurement_type.name})"
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Default concrete contracts — one per MeasurementType."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from featkit.contracts.measurement.base import AbstractMeasurementTypeContract
|
|
6
|
+
from featkit.enums import Layer2Aggregator, MeasurementType
|
|
7
|
+
|
|
8
|
+
_S = Layer2Aggregator.SUM
|
|
9
|
+
_C = Layer2Aggregator.COUNT
|
|
10
|
+
_MX = Layer2Aggregator.MAX
|
|
11
|
+
_MN = Layer2Aggregator.MIN
|
|
12
|
+
_AV = Layer2Aggregator.AVG
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MontoContract(AbstractMeasurementTypeContract):
|
|
16
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_S, _MX, _MN, _AV})
|
|
17
|
+
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
super().__init__(MeasurementType.MONTO)
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
23
|
+
return self._AGGREGATORS
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CantidadContract(AbstractMeasurementTypeContract):
|
|
27
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_S})
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
super().__init__(MeasurementType.CANTIDAD)
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
34
|
+
return self._AGGREGATORS
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TicketContract(AbstractMeasurementTypeContract):
|
|
38
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_AV})
|
|
39
|
+
|
|
40
|
+
def __init__(self) -> None:
|
|
41
|
+
super().__init__(MeasurementType.TICKET)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
45
|
+
return self._AGGREGATORS
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FlagContract(AbstractMeasurementTypeContract):
|
|
49
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_MX})
|
|
50
|
+
|
|
51
|
+
def __init__(self) -> None:
|
|
52
|
+
super().__init__(MeasurementType.FLAG)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
56
|
+
return self._AGGREGATORS
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class FechaContract(AbstractMeasurementTypeContract):
|
|
60
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_MX, _MN})
|
|
61
|
+
|
|
62
|
+
def __init__(self) -> None:
|
|
63
|
+
super().__init__(MeasurementType.FECHA)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
67
|
+
return self._AGGREGATORS
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class BalanceContract(AbstractMeasurementTypeContract):
|
|
71
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_MX, _MN, _AV})
|
|
72
|
+
|
|
73
|
+
def __init__(self) -> None:
|
|
74
|
+
super().__init__(MeasurementType.BALANCE)
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
78
|
+
return self._AGGREGATORS
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class TimeDiffContract(AbstractMeasurementTypeContract):
|
|
82
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_S, _AV, _MX, _MN})
|
|
83
|
+
|
|
84
|
+
def __init__(self) -> None:
|
|
85
|
+
super().__init__(MeasurementType.TIME_DIFF)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
89
|
+
return self._AGGREGATORS
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class EstadisticoContract(AbstractMeasurementTypeContract):
|
|
93
|
+
_AGGREGATORS: frozenset[Layer2Aggregator] = frozenset({_S, _AV, _MX, _MN, _C})
|
|
94
|
+
|
|
95
|
+
def __init__(self) -> None:
|
|
96
|
+
super().__init__(MeasurementType.ESTADISTICO)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def valid_layer2_aggregators(self) -> frozenset[Layer2Aggregator]:
|
|
100
|
+
return self._AGGREGATORS
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
_DEFAULTS: dict[MeasurementType, AbstractMeasurementTypeContract] = {
|
|
104
|
+
MeasurementType.MONTO: MontoContract(),
|
|
105
|
+
MeasurementType.CANTIDAD: CantidadContract(),
|
|
106
|
+
MeasurementType.TICKET: TicketContract(),
|
|
107
|
+
MeasurementType.FLAG: FlagContract(),
|
|
108
|
+
MeasurementType.FECHA: FechaContract(),
|
|
109
|
+
MeasurementType.BALANCE: BalanceContract(),
|
|
110
|
+
MeasurementType.TIME_DIFF: TimeDiffContract(),
|
|
111
|
+
MeasurementType.ESTADISTICO: EstadisticoContract(),
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_default_contract(mt: MeasurementType) -> AbstractMeasurementTypeContract:
|
|
116
|
+
"""Return the default contract for the given :class:`~featkit.enums.MeasurementType`."""
|
|
117
|
+
return _DEFAULTS[mt]
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Layer 2 → Layer 3 output type contracts."""
|
|
2
|
+
|
|
3
|
+
from featkit.contracts.output.base import AbstractLayer2OutputContract
|
|
4
|
+
from featkit.contracts.output.defaults import (
|
|
5
|
+
CategoricalOutputContract,
|
|
6
|
+
FlagOutputContract,
|
|
7
|
+
NumericOutputContract,
|
|
8
|
+
TemporalOutputContract,
|
|
9
|
+
get_default_output_contract,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"AbstractLayer2OutputContract",
|
|
14
|
+
"CategoricalOutputContract",
|
|
15
|
+
"FlagOutputContract",
|
|
16
|
+
"NumericOutputContract",
|
|
17
|
+
"TemporalOutputContract",
|
|
18
|
+
"get_default_output_contract",
|
|
19
|
+
]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Abstract base for Layer 2 → Layer 3 type boundary contracts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
from featkit.enums import Layer2OutputType, TemporalOperator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AbstractLayer2OutputContract(ABC):
|
|
11
|
+
"""Governs which TemporalOperators are semantically valid for a Layer2OutputType.
|
|
12
|
+
|
|
13
|
+
This is the Layer 2 → Layer 3 type boundary: the contract ensures that only
|
|
14
|
+
operators that make semantic sense for the output column type are applied.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, output_type: Layer2OutputType) -> None:
|
|
18
|
+
self._output_type = output_type
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def output_type(self) -> Layer2OutputType:
|
|
22
|
+
"""The Layer2OutputType this contract governs (read-only)."""
|
|
23
|
+
return self._output_type
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def valid_temporal_operators(self) -> frozenset[TemporalOperator]:
|
|
28
|
+
"""Frozen set of temporal operators valid for this output type."""
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
def is_valid(self, operator: TemporalOperator) -> bool:
|
|
32
|
+
"""Return ``True`` if the operator is permitted by this contract."""
|
|
33
|
+
return operator in self.valid_temporal_operators
|
|
34
|
+
|
|
35
|
+
def __repr__(self) -> str:
|
|
36
|
+
return f"{type(self).__name__}(output_type={self.output_type.name})"
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Default concrete output contracts — one per Layer2OutputType."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from featkit.contracts.output.base import AbstractLayer2OutputContract
|
|
6
|
+
from featkit.enums import Layer2OutputType, TemporalOperator
|
|
7
|
+
|
|
8
|
+
_PU = TemporalOperator.PROM_U
|
|
9
|
+
_PP = TemporalOperator.PROM_P
|
|
10
|
+
_SU = TemporalOperator.SUM_U
|
|
11
|
+
_SP = TemporalOperator.SUM_P
|
|
12
|
+
_UM = TemporalOperator.ULT_MES
|
|
13
|
+
_PM = TemporalOperator.PREV_MES
|
|
14
|
+
_CR = TemporalOperator.CREC
|
|
15
|
+
_FR = TemporalOperator.FREQ
|
|
16
|
+
_MNU = TemporalOperator.MIN_U
|
|
17
|
+
_MXU = TemporalOperator.MAX_U
|
|
18
|
+
_RC = TemporalOperator.REC
|
|
19
|
+
_XM = TemporalOperator.XM
|
|
20
|
+
_MA = TemporalOperator.MEDIA_ABS
|
|
21
|
+
_RA = TemporalOperator.RATIO
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class NumericOutputContract(AbstractLayer2OutputContract):
|
|
25
|
+
_OPERATORS: frozenset[TemporalOperator] = frozenset(
|
|
26
|
+
{_PU, _PP, _SU, _SP, _CR, _MNU, _MXU, _UM, _PM, _FR, _XM, _MA, _RA}
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
super().__init__(Layer2OutputType.NUMERIC)
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def valid_temporal_operators(self) -> frozenset[TemporalOperator]:
|
|
34
|
+
return self._OPERATORS
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FlagOutputContract(AbstractLayer2OutputContract):
|
|
38
|
+
_OPERATORS: frozenset[TemporalOperator] = frozenset({_UM, _PM, _FR, _XM, _RC})
|
|
39
|
+
|
|
40
|
+
def __init__(self) -> None:
|
|
41
|
+
super().__init__(Layer2OutputType.FLAG)
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def valid_temporal_operators(self) -> frozenset[TemporalOperator]:
|
|
45
|
+
return self._OPERATORS
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class CategoricalOutputContract(AbstractLayer2OutputContract):
|
|
49
|
+
_OPERATORS: frozenset[TemporalOperator] = frozenset({_UM, _PM, _RC})
|
|
50
|
+
|
|
51
|
+
def __init__(self) -> None:
|
|
52
|
+
super().__init__(Layer2OutputType.CATEGORICAL)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def valid_temporal_operators(self) -> frozenset[TemporalOperator]:
|
|
56
|
+
return self._OPERATORS
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class TemporalOutputContract(AbstractLayer2OutputContract):
|
|
60
|
+
_OPERATORS: frozenset[TemporalOperator] = frozenset({_UM, _PM, _RC, _MNU, _MXU, _CR})
|
|
61
|
+
|
|
62
|
+
def __init__(self) -> None:
|
|
63
|
+
super().__init__(Layer2OutputType.TEMPORAL)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def valid_temporal_operators(self) -> frozenset[TemporalOperator]:
|
|
67
|
+
return self._OPERATORS
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
_DEFAULTS: dict[Layer2OutputType, AbstractLayer2OutputContract] = {
|
|
71
|
+
Layer2OutputType.NUMERIC: NumericOutputContract(),
|
|
72
|
+
Layer2OutputType.FLAG: FlagOutputContract(),
|
|
73
|
+
Layer2OutputType.CATEGORICAL: CategoricalOutputContract(),
|
|
74
|
+
Layer2OutputType.TEMPORAL: TemporalOutputContract(),
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_default_output_contract(ot: Layer2OutputType) -> AbstractLayer2OutputContract:
|
|
79
|
+
"""Return the default contract for the given :class:`~featkit.enums.Layer2OutputType`."""
|
|
80
|
+
return _DEFAULTS[ot]
|
featkit/dataset/.gitkeep
ADDED
|
File without changes
|
|
File without changes
|