featkit 0.4.2__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {featkit-0.4.2 → featkit-0.4.3}/CHANGELOG.md +6 -0
- {featkit-0.4.2 → featkit-0.4.3}/PKG-INFO +16 -9
- {featkit-0.4.2 → featkit-0.4.3}/README.md +15 -8
- {featkit-0.4.2 → featkit-0.4.3}/pyproject.toml +1 -1
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/builders/ratio_space.py +24 -6
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/config.py +15 -1
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/enums.py +16 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/pipeline.py +1 -1
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_ratio.py +119 -0
- {featkit-0.4.2 → featkit-0.4.3}/.github/workflows/auto-tag.yml +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/.github/workflows/ci.yml +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/.github/workflows/docs.yml +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/.github/workflows/publish.yml +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/.gitignore +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/LICENSE +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/docs/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/docs/example_databricks_notebook.md +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/docs/examples.md +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/docs/general_plan.md +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/docs/index.md +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/docs/quickstart.md +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/mkdocs.yml +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/builders/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/builders/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/builders/distributional_space.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/builders/pivot_space.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/builders/temporal_space.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/measurement/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/measurement/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/measurement/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/measurement/defaults.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/output/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/output/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/output/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/contracts/output/defaults.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/dataset/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/dataset/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/dataset/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/databricks_adapter.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/databricks_notebook_adapter.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/mock_adapter.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/spark_adapter.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/sqlalchemy_adapter.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/domain_resolver.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/fields/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/fields/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/fields/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/fields/categorical_field.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/fields/id_field.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/fields/measurement_field.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/fields/time_field.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/output.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/pyspark/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/pyspark/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/pyspark/databricks.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/sql/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/sql/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/sql/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/sql/databricks.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/sql/snowflake.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/generators/sql/spark_sql.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer2/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer2/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer2/base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer2/distributional.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer2/pivoted.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer2/ratio.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer3/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer3/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/src/featkit/layer3/temporal_feature.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_builders.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_contracts.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_enums.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_execution/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_execution/test_adapters.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_execution/test_domain_resolver.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_fields.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_generators/.gitkeep +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_generators/__init__.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_generators/test_base.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_generators/test_pyspark.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_generators/test_sql_databricks.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_generators/test_sql_snowflake.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_integration.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_layer2.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_layer3.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_output_contracts.py +0 -0
- {featkit-0.4.2 → featkit-0.4.3}/tests/test_pipeline.py +0 -0
|
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.4.3] - 2026-06-30
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `RatioMode` enum with two values: `ALL_PROJECTIONS` (default, existing behaviour) and `GLOBAL_TOTAL` (restricts Layer 2C denominators to the single all-∅ grand-total column, producing one ratio per numerator representing its share of the portfolio total).
|
|
14
|
+
- `FeatureStoreConfig.ratio_mode` parameter (default `RatioMode.ALL_PROJECTIONS`) to select the denominator strategy for `RatioSpaceBuilder`.
|
|
15
|
+
|
|
10
16
|
## [0.4.2] - 2026-06-30
|
|
11
17
|
|
|
12
18
|
### Fixed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: featkit
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: featkit — automated feature store generation from relational facts tables
|
|
5
5
|
Project-URL: Repository, https://github.com/Mirkiux/featkit
|
|
6
6
|
Project-URL: Documentation, https://mirkiux.github.io/featkit
|
|
@@ -214,16 +214,23 @@ where `NUMERATOR` and `DENOMINATOR` are full Layer 2A pivot feature names. The d
|
|
|
214
214
|
|
|
215
215
|
The underlying value is `numerator / NULLIF(denominator, 0)` computed per entity per period.
|
|
216
216
|
|
|
217
|
-
|
|
218
|
-
# Numerator: DIGITAL channel + RETAIL sector
|
|
219
|
-
# Denominator: RETAIL sector only (CANAL marginalized → share of DIGITAL within RETAIL)
|
|
220
|
-
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO__SECTOR_RETAIL
|
|
217
|
+
Enabled by setting `include_ratios=True` (requires `include_marginals=True`). The `ratio_mode` parameter controls which denominators are paired with each numerator:
|
|
221
218
|
|
|
222
|
-
|
|
223
|
-
|
|
219
|
+
| `ratio_mode` | Denominators considered | Ratios produced per numerator |
|
|
220
|
+
|---|---|---|
|
|
221
|
+
| `RatioMode.ALL_PROJECTIONS` *(default)* | Every proper marginal projection (partial or fully marginalised) | One per valid denominator |
|
|
222
|
+
| `RatioMode.GLOBAL_TOTAL` | Only the fully-marginalised column (all fields ∅) | Exactly one — the share of the grand total |
|
|
224
223
|
|
|
225
|
-
|
|
226
|
-
|
|
224
|
+
```
|
|
225
|
+
# With RatioMode.ALL_PROJECTIONS (default):
|
|
226
|
+
# Numerator: DIGITAL channel + RETAIL sector → three denominators
|
|
227
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO__SECTOR_RETAIL # share within RETAIL
|
|
228
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO__CANAL_DIGITAL # share within DIGITAL
|
|
229
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO # share of total
|
|
230
|
+
|
|
231
|
+
# With RatioMode.GLOBAL_TOTAL:
|
|
232
|
+
# Same numerator → only the grand-total denominator
|
|
233
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO # share of total only
|
|
227
234
|
```
|
|
228
235
|
|
|
229
236
|
---
|
|
@@ -146,16 +146,23 @@ where `NUMERATOR` and `DENOMINATOR` are full Layer 2A pivot feature names. The d
|
|
|
146
146
|
|
|
147
147
|
The underlying value is `numerator / NULLIF(denominator, 0)` computed per entity per period.
|
|
148
148
|
|
|
149
|
-
|
|
150
|
-
# Numerator: DIGITAL channel + RETAIL sector
|
|
151
|
-
# Denominator: RETAIL sector only (CANAL marginalized → share of DIGITAL within RETAIL)
|
|
152
|
-
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO__SECTOR_RETAIL
|
|
149
|
+
Enabled by setting `include_ratios=True` (requires `include_marginals=True`). The `ratio_mode` parameter controls which denominators are paired with each numerator:
|
|
153
150
|
|
|
154
|
-
|
|
155
|
-
|
|
151
|
+
| `ratio_mode` | Denominators considered | Ratios produced per numerator |
|
|
152
|
+
|---|---|---|
|
|
153
|
+
| `RatioMode.ALL_PROJECTIONS` *(default)* | Every proper marginal projection (partial or fully marginalised) | One per valid denominator |
|
|
154
|
+
| `RatioMode.GLOBAL_TOTAL` | Only the fully-marginalised column (all fields ∅) | Exactly one — the share of the grand total |
|
|
156
155
|
|
|
157
|
-
|
|
158
|
-
|
|
156
|
+
```
|
|
157
|
+
# With RatioMode.ALL_PROJECTIONS (default):
|
|
158
|
+
# Numerator: DIGITAL channel + RETAIL sector → three denominators
|
|
159
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO__SECTOR_RETAIL # share within RETAIL
|
|
160
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO__CANAL_DIGITAL # share within DIGITAL
|
|
161
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO # share of total
|
|
162
|
+
|
|
163
|
+
# With RatioMode.GLOBAL_TOTAL:
|
|
164
|
+
# Same numerator → only the grand-total denominator
|
|
165
|
+
SUM__MTO__CANAL_DIGITAL__SECTOR_RETAIL__over__SUM__MTO # share of total only
|
|
159
166
|
```
|
|
160
167
|
|
|
161
168
|
---
|
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
|
+
from featkit.enums import RatioMode
|
|
7
8
|
from featkit.layer2.pivoted import PivotedColumn
|
|
8
9
|
from featkit.layer2.ratio import RatioPivotedColumn
|
|
9
10
|
|
|
@@ -31,6 +32,11 @@ class RatioSpaceBuilder:
|
|
|
31
32
|
Args:
|
|
32
33
|
pivot_columns: The full set of Layer 2A pivot columns, typically
|
|
33
34
|
``FeatureStorePipeline.layer2a``.
|
|
35
|
+
ratio_mode: Controls which columns are eligible as denominators.
|
|
36
|
+
``RatioMode.ALL_PROJECTIONS`` (default) considers every column
|
|
37
|
+
with at least one ∅ (``None``) field. ``RatioMode.GLOBAL_TOTAL``
|
|
38
|
+
restricts to columns where *all* fields are ∅ — the grand-total
|
|
39
|
+
column — producing one ratio per numerator.
|
|
34
40
|
verbose: When ``True``, emits ``DEBUG``-level log messages listing
|
|
35
41
|
each generated ratio column name.
|
|
36
42
|
"""
|
|
@@ -38,9 +44,11 @@ class RatioSpaceBuilder:
|
|
|
38
44
|
def __init__(
|
|
39
45
|
self,
|
|
40
46
|
pivot_columns: list[PivotedColumn],
|
|
47
|
+
ratio_mode: RatioMode = RatioMode.ALL_PROJECTIONS,
|
|
41
48
|
verbose: bool = False,
|
|
42
49
|
) -> None:
|
|
43
50
|
self.pivot_columns = pivot_columns
|
|
51
|
+
self.ratio_mode = ratio_mode
|
|
44
52
|
self.verbose = verbose
|
|
45
53
|
|
|
46
54
|
def build(self) -> list[RatioPivotedColumn]:
|
|
@@ -56,12 +64,22 @@ class RatioSpaceBuilder:
|
|
|
56
64
|
if c.categorical_combination
|
|
57
65
|
and any(v is not None for v in c.categorical_combination.values())
|
|
58
66
|
]
|
|
59
|
-
# Potential denominators:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
67
|
+
# Potential denominators: depends on mode.
|
|
68
|
+
# ALL_PROJECTIONS: any column with at least one ∅ field (partial or full marginal).
|
|
69
|
+
# GLOBAL_TOTAL: only the column where every field is ∅ (grand total).
|
|
70
|
+
if self.ratio_mode == RatioMode.GLOBAL_TOTAL:
|
|
71
|
+
denominators = [
|
|
72
|
+
c
|
|
73
|
+
for c in self.pivot_columns
|
|
74
|
+
if c.categorical_combination
|
|
75
|
+
and all(v is None for v in c.categorical_combination.values())
|
|
76
|
+
]
|
|
77
|
+
else:
|
|
78
|
+
denominators = [
|
|
79
|
+
c
|
|
80
|
+
for c in self.pivot_columns
|
|
81
|
+
if any(v is None for v in c.categorical_combination.values())
|
|
82
|
+
]
|
|
65
83
|
|
|
66
84
|
results: list[RatioPivotedColumn] = []
|
|
67
85
|
seen: set[str] = set()
|
|
@@ -6,7 +6,13 @@ from dataclasses import dataclass, field
|
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
from featkit.dataset.base import AbstractDataset
|
|
9
|
-
from featkit.enums import
|
|
9
|
+
from featkit.enums import (
|
|
10
|
+
Layer2Aggregator,
|
|
11
|
+
Layer2OutputType,
|
|
12
|
+
MeasurementType,
|
|
13
|
+
RatioMode,
|
|
14
|
+
TemporalOperator,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
if TYPE_CHECKING:
|
|
12
18
|
from featkit.execution.adapters.base import DataSourceAdapter
|
|
@@ -32,6 +38,13 @@ class FeatureStoreConfig:
|
|
|
32
38
|
combination over each of its proper marginal projections. Has no
|
|
33
39
|
effect when ``include_marginals`` is ``False`` (no marginal
|
|
34
40
|
denominators exist).
|
|
41
|
+
ratio_mode: Controls which denominators are considered when
|
|
42
|
+
``include_ratios`` is ``True``. ``RatioMode.ALL_PROJECTIONS``
|
|
43
|
+
(default) pairs each numerator with every valid proper marginal
|
|
44
|
+
projection. ``RatioMode.GLOBAL_TOTAL`` restricts denominators to
|
|
45
|
+
the single fully-marginalised column (all categorical fields set to
|
|
46
|
+
∅), producing one ratio per numerator representing its share of the
|
|
47
|
+
grand total.
|
|
35
48
|
aggregators_override: Per-measurement-type override for Layer 2
|
|
36
49
|
aggregators. Only contract-valid aggregators are used.
|
|
37
50
|
operators_override: Per-output-type override for temporal operators.
|
|
@@ -53,6 +66,7 @@ class FeatureStoreConfig:
|
|
|
53
66
|
composed_windows: list[int] | None = None
|
|
54
67
|
include_marginals: bool = True
|
|
55
68
|
include_ratios: bool = True
|
|
69
|
+
ratio_mode: RatioMode = RatioMode.ALL_PROJECTIONS
|
|
56
70
|
aggregators_override: dict[MeasurementType, list[Layer2Aggregator]] | None = None
|
|
57
71
|
operators_override: dict[Layer2OutputType, list[TemporalOperator]] | None = field(default=None)
|
|
58
72
|
adapter: DataSourceAdapter | None = None
|
|
@@ -108,3 +108,19 @@ class TimeWindowDirection(Enum):
|
|
|
108
108
|
|
|
109
109
|
BACKWARD = "BACKWARD"
|
|
110
110
|
FORWARD = "FORWARD"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class RatioMode(Enum):
|
|
114
|
+
"""Controls which denominators are paired with each numerator in Layer 2C.
|
|
115
|
+
|
|
116
|
+
``ALL_PROJECTIONS`` (default): every proper marginal projection of the
|
|
117
|
+
numerator is used as a denominator — partial marginals (some fields set to
|
|
118
|
+
∅) as well as the global total (all fields ∅).
|
|
119
|
+
|
|
120
|
+
``GLOBAL_TOTAL``: only the fully-marginalised column (all categorical fields
|
|
121
|
+
set to ∅) is used as a denominator, producing a single ratio per numerator
|
|
122
|
+
that represents its share of the grand total.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
ALL_PROJECTIONS = "ALL_PROJECTIONS"
|
|
126
|
+
GLOBAL_TOTAL = "GLOBAL_TOTAL"
|
|
@@ -66,7 +66,7 @@ class FeatureStorePipeline:
|
|
|
66
66
|
verbose=cfg.verbose,
|
|
67
67
|
).build()
|
|
68
68
|
self.layer2c = (
|
|
69
|
-
RatioSpaceBuilder(self.layer2a, verbose=cfg.verbose).build()
|
|
69
|
+
RatioSpaceBuilder(self.layer2a, ratio_mode=cfg.ratio_mode, verbose=cfg.verbose).build()
|
|
70
70
|
if cfg.include_ratios and cfg.include_marginals
|
|
71
71
|
else []
|
|
72
72
|
)
|
|
@@ -12,6 +12,7 @@ from featkit.enums import (
|
|
|
12
12
|
Layer2Aggregator,
|
|
13
13
|
Layer2OutputType,
|
|
14
14
|
MeasurementType,
|
|
15
|
+
RatioMode,
|
|
15
16
|
TimeGranularity,
|
|
16
17
|
)
|
|
17
18
|
from featkit.fields.categorical_field import CategoricalField
|
|
@@ -387,3 +388,121 @@ class TestSQLRatioGeneration:
|
|
|
387
388
|
sql = DatabricksSQLCodeGenerator().build_final_join(pipeline).sql
|
|
388
389
|
for col in pipeline.layer2c:
|
|
389
390
|
assert col.column_name in sql
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# ---------------------------------------------------------------------------
|
|
394
|
+
# RatioMode.GLOBAL_TOTAL
|
|
395
|
+
# ---------------------------------------------------------------------------
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class TestRatioModeGlobalTotal:
|
|
399
|
+
"""RatioMode.GLOBAL_TOTAL restricts denominators to the all-None column only."""
|
|
400
|
+
|
|
401
|
+
def test_global_total_produces_fewer_ratios_than_all_projections(
|
|
402
|
+
self, full_combo, marginal_channel, marginal_region, marginal_all
|
|
403
|
+
):
|
|
404
|
+
cols = [full_combo, marginal_channel, marginal_region, marginal_all]
|
|
405
|
+
all_proj = RatioSpaceBuilder(cols, ratio_mode=RatioMode.ALL_PROJECTIONS).build()
|
|
406
|
+
global_only = RatioSpaceBuilder(cols, ratio_mode=RatioMode.GLOBAL_TOTAL).build()
|
|
407
|
+
assert len(global_only) < len(all_proj)
|
|
408
|
+
|
|
409
|
+
def test_global_total_denominators_are_all_none(
|
|
410
|
+
self, full_combo, marginal_channel, marginal_region, marginal_all
|
|
411
|
+
):
|
|
412
|
+
cols = [full_combo, marginal_channel, marginal_region, marginal_all]
|
|
413
|
+
ratios = RatioSpaceBuilder(cols, ratio_mode=RatioMode.GLOBAL_TOTAL).build()
|
|
414
|
+
for ratio in ratios:
|
|
415
|
+
denom_vals = ratio.denominator.categorical_combination.values()
|
|
416
|
+
assert all(v is None for v in denom_vals), (
|
|
417
|
+
f"Expected all-None denominator, got {ratio.denominator.column_name}"
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
def test_global_total_full_combo_produces_one_ratio(
|
|
421
|
+
self, full_combo, marginal_channel, marginal_region, marginal_all
|
|
422
|
+
):
|
|
423
|
+
# full_combo (ch=retail, r=north) should only pair with marginal_all (ch=None, r=None)
|
|
424
|
+
cols = [full_combo, marginal_channel, marginal_region, marginal_all]
|
|
425
|
+
ratios = RatioSpaceBuilder(cols, ratio_mode=RatioMode.GLOBAL_TOTAL).build()
|
|
426
|
+
full_combo_ratios = [r for r in ratios if r.numerator is full_combo]
|
|
427
|
+
assert len(full_combo_ratios) == 1
|
|
428
|
+
assert full_combo_ratios[0].denominator is marginal_all
|
|
429
|
+
|
|
430
|
+
def test_global_total_partial_marginals_also_pair_with_global(
|
|
431
|
+
self, full_combo, marginal_channel, marginal_region, marginal_all
|
|
432
|
+
):
|
|
433
|
+
# marginal_channel (ch=None, r=north) should also pair with marginal_all
|
|
434
|
+
cols = [full_combo, marginal_channel, marginal_region, marginal_all]
|
|
435
|
+
ratios = RatioSpaceBuilder(cols, ratio_mode=RatioMode.GLOBAL_TOTAL).build()
|
|
436
|
+
partial_ratios = [r for r in ratios if r.numerator is marginal_channel]
|
|
437
|
+
assert len(partial_ratios) == 1
|
|
438
|
+
assert partial_ratios[0].denominator is marginal_all
|
|
439
|
+
|
|
440
|
+
def test_global_total_no_global_marginal_returns_empty(
|
|
441
|
+
self, full_combo, marginal_channel, marginal_region
|
|
442
|
+
):
|
|
443
|
+
# Without the all-None column there is no valid denominator
|
|
444
|
+
cols = [full_combo, marginal_channel, marginal_region]
|
|
445
|
+
ratios = RatioSpaceBuilder(cols, ratio_mode=RatioMode.GLOBAL_TOTAL).build()
|
|
446
|
+
assert ratios == []
|
|
447
|
+
|
|
448
|
+
def test_global_total_default_mode_is_all_projections(
|
|
449
|
+
self, full_combo, marginal_channel, marginal_region, marginal_all
|
|
450
|
+
):
|
|
451
|
+
cols = [full_combo, marginal_channel, marginal_region, marginal_all]
|
|
452
|
+
default = RatioSpaceBuilder(cols).build()
|
|
453
|
+
explicit = RatioSpaceBuilder(cols, ratio_mode=RatioMode.ALL_PROJECTIONS).build()
|
|
454
|
+
assert [r.column_name for r in default] == [r.column_name for r in explicit]
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
class TestPipelineRatioModeIntegration:
|
|
458
|
+
def test_global_total_via_config(self):
|
|
459
|
+
cfg = FeatureStoreConfig(
|
|
460
|
+
dataset=_two_cat_dataset(),
|
|
461
|
+
output_schema="s",
|
|
462
|
+
output_table_prefix="p_",
|
|
463
|
+
time_windows=[3],
|
|
464
|
+
include_marginals=True,
|
|
465
|
+
include_ratios=True,
|
|
466
|
+
ratio_mode=RatioMode.GLOBAL_TOTAL,
|
|
467
|
+
)
|
|
468
|
+
pipeline = FeatureStorePipeline(cfg).build()
|
|
469
|
+
assert len(pipeline.layer2c) > 0
|
|
470
|
+
for col in pipeline.layer2c:
|
|
471
|
+
denom_vals = col.denominator.categorical_combination.values()
|
|
472
|
+
assert all(v is None for v in denom_vals)
|
|
473
|
+
|
|
474
|
+
def test_global_total_fewer_ratios_than_all_projections(self):
|
|
475
|
+
ds = _two_cat_dataset()
|
|
476
|
+
all_proj = FeatureStorePipeline(
|
|
477
|
+
FeatureStoreConfig(
|
|
478
|
+
dataset=ds,
|
|
479
|
+
output_schema="s",
|
|
480
|
+
output_table_prefix="p_",
|
|
481
|
+
time_windows=[3],
|
|
482
|
+
include_marginals=True,
|
|
483
|
+
include_ratios=True,
|
|
484
|
+
)
|
|
485
|
+
).build()
|
|
486
|
+
global_only = FeatureStorePipeline(
|
|
487
|
+
FeatureStoreConfig(
|
|
488
|
+
dataset=ds,
|
|
489
|
+
output_schema="s",
|
|
490
|
+
output_table_prefix="p_",
|
|
491
|
+
time_windows=[3],
|
|
492
|
+
include_marginals=True,
|
|
493
|
+
include_ratios=True,
|
|
494
|
+
ratio_mode=RatioMode.GLOBAL_TOTAL,
|
|
495
|
+
)
|
|
496
|
+
).build()
|
|
497
|
+
assert len(global_only.layer2c) < len(all_proj.layer2c)
|
|
498
|
+
|
|
499
|
+
def test_default_ratio_mode_is_all_projections(self):
|
|
500
|
+
cfg = FeatureStoreConfig(
|
|
501
|
+
dataset=_two_cat_dataset(),
|
|
502
|
+
output_schema="s",
|
|
503
|
+
output_table_prefix="p_",
|
|
504
|
+
time_windows=[3],
|
|
505
|
+
include_marginals=True,
|
|
506
|
+
include_ratios=True,
|
|
507
|
+
)
|
|
508
|
+
assert cfg.ratio_mode == RatioMode.ALL_PROJECTIONS
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{featkit-0.4.2 → featkit-0.4.3}/src/featkit/execution/adapters/databricks_notebook_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|