featkit 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. featkit-0.4.1/.github/workflows/auto-tag.yml +54 -0
  2. {featkit-0.3.0 → featkit-0.4.1}/CHANGELOG.md +11 -0
  3. {featkit-0.3.0 → featkit-0.4.1}/PKG-INFO +1 -1
  4. {featkit-0.3.0 → featkit-0.4.1}/pyproject.toml +2 -1
  5. featkit-0.4.1/src/featkit/builders/ratio_space.py +102 -0
  6. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/builders/temporal_space.py +3 -3
  7. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/config.py +7 -0
  8. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/domain_resolver.py +19 -2
  9. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/sql/base.py +30 -18
  10. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer2/base.py +36 -26
  11. featkit-0.4.1/src/featkit/layer2/ratio.py +101 -0
  12. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer3/temporal_feature.py +3 -3
  13. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/pipeline.py +10 -2
  14. {featkit-0.3.0 → featkit-0.4.1}/tests/test_contracts.py +1 -1
  15. {featkit-0.3.0 → featkit-0.4.1}/tests/test_execution/test_domain_resolver.py +56 -0
  16. featkit-0.4.1/tests/test_ratio.py +389 -0
  17. {featkit-0.3.0 → featkit-0.4.1}/.github/workflows/ci.yml +0 -0
  18. {featkit-0.3.0 → featkit-0.4.1}/.github/workflows/docs.yml +0 -0
  19. {featkit-0.3.0 → featkit-0.4.1}/.github/workflows/publish.yml +0 -0
  20. {featkit-0.3.0 → featkit-0.4.1}/.gitignore +0 -0
  21. {featkit-0.3.0 → featkit-0.4.1}/LICENSE +0 -0
  22. {featkit-0.3.0 → featkit-0.4.1}/README.md +0 -0
  23. {featkit-0.3.0 → featkit-0.4.1}/docs/.gitkeep +0 -0
  24. {featkit-0.3.0 → featkit-0.4.1}/docs/example_databricks_notebook.md +0 -0
  25. {featkit-0.3.0 → featkit-0.4.1}/docs/examples.md +0 -0
  26. {featkit-0.3.0 → featkit-0.4.1}/docs/general_plan.md +0 -0
  27. {featkit-0.3.0 → featkit-0.4.1}/docs/index.md +0 -0
  28. {featkit-0.3.0 → featkit-0.4.1}/docs/quickstart.md +0 -0
  29. {featkit-0.3.0 → featkit-0.4.1}/mkdocs.yml +0 -0
  30. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/__init__.py +0 -0
  31. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/builders/.gitkeep +0 -0
  32. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/builders/__init__.py +0 -0
  33. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/builders/distributional_space.py +0 -0
  34. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/builders/pivot_space.py +0 -0
  35. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/__init__.py +0 -0
  36. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/measurement/.gitkeep +0 -0
  37. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/measurement/__init__.py +0 -0
  38. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/measurement/base.py +0 -0
  39. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/measurement/defaults.py +0 -0
  40. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/output/.gitkeep +0 -0
  41. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/output/__init__.py +0 -0
  42. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/output/base.py +0 -0
  43. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/contracts/output/defaults.py +0 -0
  44. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/dataset/.gitkeep +0 -0
  45. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/dataset/__init__.py +0 -0
  46. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/dataset/base.py +0 -0
  47. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/enums.py +0 -0
  48. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/__init__.py +0 -0
  49. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/adapters/__init__.py +0 -0
  50. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/adapters/base.py +0 -0
  51. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/adapters/databricks_adapter.py +0 -0
  52. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/adapters/databricks_notebook_adapter.py +0 -0
  53. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/adapters/mock_adapter.py +0 -0
  54. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/adapters/spark_adapter.py +0 -0
  55. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/execution/adapters/sqlalchemy_adapter.py +0 -0
  56. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/fields/.gitkeep +0 -0
  57. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/fields/__init__.py +0 -0
  58. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/fields/base.py +0 -0
  59. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/fields/categorical_field.py +0 -0
  60. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/fields/id_field.py +0 -0
  61. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/fields/measurement_field.py +0 -0
  62. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/fields/time_field.py +0 -0
  63. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/__init__.py +0 -0
  64. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/base.py +0 -0
  65. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/output.py +0 -0
  66. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/pyspark/.gitkeep +0 -0
  67. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/pyspark/__init__.py +0 -0
  68. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/pyspark/databricks.py +0 -0
  69. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/sql/.gitkeep +0 -0
  70. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/sql/__init__.py +0 -0
  71. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/sql/databricks.py +0 -0
  72. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/sql/snowflake.py +0 -0
  73. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/generators/sql/spark_sql.py +0 -0
  74. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer2/.gitkeep +0 -0
  75. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer2/__init__.py +0 -0
  76. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer2/distributional.py +0 -0
  77. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer2/pivoted.py +0 -0
  78. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer3/.gitkeep +0 -0
  79. {featkit-0.3.0 → featkit-0.4.1}/src/featkit/layer3/__init__.py +0 -0
  80. {featkit-0.3.0 → featkit-0.4.1}/tests/__init__.py +0 -0
  81. {featkit-0.3.0 → featkit-0.4.1}/tests/test_builders.py +0 -0
  82. {featkit-0.3.0 → featkit-0.4.1}/tests/test_enums.py +0 -0
  83. {featkit-0.3.0 → featkit-0.4.1}/tests/test_execution/__init__.py +0 -0
  84. {featkit-0.3.0 → featkit-0.4.1}/tests/test_execution/test_adapters.py +0 -0
  85. {featkit-0.3.0 → featkit-0.4.1}/tests/test_fields.py +0 -0
  86. {featkit-0.3.0 → featkit-0.4.1}/tests/test_generators/.gitkeep +0 -0
  87. {featkit-0.3.0 → featkit-0.4.1}/tests/test_generators/__init__.py +0 -0
  88. {featkit-0.3.0 → featkit-0.4.1}/tests/test_generators/test_base.py +0 -0
  89. {featkit-0.3.0 → featkit-0.4.1}/tests/test_generators/test_pyspark.py +0 -0
  90. {featkit-0.3.0 → featkit-0.4.1}/tests/test_generators/test_sql_databricks.py +0 -0
  91. {featkit-0.3.0 → featkit-0.4.1}/tests/test_generators/test_sql_snowflake.py +0 -0
  92. {featkit-0.3.0 → featkit-0.4.1}/tests/test_integration.py +0 -0
  93. {featkit-0.3.0 → featkit-0.4.1}/tests/test_layer2.py +0 -0
  94. {featkit-0.3.0 → featkit-0.4.1}/tests/test_layer3.py +0 -0
  95. {featkit-0.3.0 → featkit-0.4.1}/tests/test_output_contracts.py +0 -0
  96. {featkit-0.3.0 → featkit-0.4.1}/tests/test_pipeline.py +0 -0
@@ -0,0 +1,54 @@
1
+ name: Auto-tag on version bump
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - "pyproject.toml"
9
+
10
+ jobs:
11
+ tag:
12
+ name: Create version tag
13
+ runs-on: ubuntu-latest
14
+
15
+ steps:
16
+ - name: Ensure RELEASE_TOKEN is configured
17
+ env:
18
+ RELEASE_TOKEN: ${{ secrets.RELEASE_TOKEN }}
19
+ run: |
20
+ if [ -z "$RELEASE_TOKEN" ]; then
21
+ echo "RELEASE_TOKEN secret is not set. Add it (PAT with contents:read/write) so tag pushes can trigger publish.yml." >&2
22
+ exit 1
23
+ fi
24
+
25
+ - uses: actions/checkout@v4
26
+ with:
27
+ fetch-depth: 0
28
+ # A PAT is required so the tag push triggers downstream workflows
29
+ # (pushes made with GITHUB_TOKEN are intentionally excluded from
30
+ # workflow triggers by GitHub to prevent infinite loops).
31
+ token: ${{ secrets.RELEASE_TOKEN }}
32
+
33
+ - name: Read version from pyproject.toml
34
+ id: version
35
+ run: |
36
+ VERSION=$(grep '^version = ' pyproject.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
37
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
38
+
39
+ - name: Check if tag exists
40
+ id: tag_check
41
+ run: |
42
+ if git rev-parse "v${{ steps.version.outputs.version }}" >/dev/null 2>&1; then
43
+ echo "exists=true" >> $GITHUB_OUTPUT
44
+ else
45
+ echo "exists=false" >> $GITHUB_OUTPUT
46
+ fi
47
+
48
+ - name: Create and push tag
49
+ if: steps.tag_check.outputs.exists == 'false'
50
+ run: |
51
+ git config user.name "github-actions[bot]"
52
+ git config user.email "github-actions[bot]@users.noreply.github.com"
53
+ git tag "v${{ steps.version.outputs.version }}"
54
+ git push origin "v${{ steps.version.outputs.version }}"
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.4.1] - 2026-06-09
11
+
12
+ ### Fixed
13
+ - CI: auto-tag workflow now uses a PAT (`RELEASE_TOKEN`) to push tags so that `publish.yml` is triggered correctly (`fix(ci)`)
14
+
15
+ ## [0.4.0] - 2026-06-09
16
+
17
+ ### Added
18
+ - Ratio/percentage features (`RatioPivotedColumn`, `RatioSpaceBuilder`): for every pivot combination with at least one non-`None` categorical value, a `numerator / NULLIF(denominator, 0)` column is generated for each proper marginal projection of that combination. Controlled by `FeatureStoreConfig.include_ratios` (default `True`, requires `include_marginals=True`). (`feat(ratio)`)
19
+ - `verbose` parameter on `AdapterDomainResolver` and `AdapterCombinationResolver`: when `True`, the generated `SELECT DISTINCT` SQL is emitted at `DEBUG` level before execution. `FeatureStorePipeline` forwards `cfg.verbose` to the combination resolver automatically. (`feat(domain-resolver)`)
20
+
10
21
  ## [0.3.0] - 2026-06-08
11
22
 
12
23
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: featkit
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: featkit — automated feature store generation from relational facts tables
5
5
  Project-URL: Repository, https://github.com/Mirkiux/featkit
6
6
  Project-URL: Documentation, https://mirkiux.github.io/featkit
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "featkit"
7
- version = "0.3.0"
7
+ version = "0.4.1"
8
8
  description = "featkit — automated feature store generation from relational facts tables"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -78,6 +78,7 @@ module = ["tests.*"]
78
78
  disallow_untyped_defs = false
79
79
  disallow_untyped_calls = false
80
80
  disallow_any_generics = false
81
+ disallow_incomplete_defs = false
81
82
 
82
83
  [tool.pytest.ini_options]
83
84
  testpaths = ["tests"]
@@ -0,0 +1,102 @@
1
+ """RatioSpaceBuilder — generates RatioPivotedColumn objects from pivot columns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from featkit.layer2.pivoted import PivotedColumn
8
+ from featkit.layer2.ratio import RatioPivotedColumn
9
+
10
+ _log = logging.getLogger(__name__)
11
+
12
+
13
+ class RatioSpaceBuilder:
14
+ """Generates all valid ratio columns from a list of pivot columns.
15
+
16
+ For each pivot column that has at least one non-``None`` categorical value
17
+ (potential numerator), the builder finds every other column in the list
18
+ that is a proper marginal projection of it and creates a
19
+ :class:`~featkit.layer2.ratio.RatioPivotedColumn` for each valid
20
+ (numerator, denominator) pair.
21
+
22
+ A proper marginal projection (denominator) satisfies:
23
+
24
+ * same aggregator and source measurement instance,
25
+ * same set of categorical fields,
26
+ * every non-``None`` denominator value equals the numerator's value for
27
+ that field, and
28
+ * at least one field that is ``None`` in the denominator but non-``None``
29
+ in the numerator (the denominator sums over that dimension).
30
+
31
+ Args:
32
+ pivot_columns: The full set of Layer 2A pivot columns, typically
33
+ ``FeatureStorePipeline.layer2a``.
34
+ verbose: When ``True``, emits ``DEBUG``-level log messages listing
35
+ each generated ratio column name.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ pivot_columns: list[PivotedColumn],
41
+ verbose: bool = False,
42
+ ) -> None:
43
+ self.pivot_columns = pivot_columns
44
+ self.verbose = verbose
45
+
46
+ def build(self) -> list[RatioPivotedColumn]:
47
+ """Build and return all RatioPivotedColumn objects."""
48
+ if self.verbose:
49
+ _log.debug("RatioSpaceBuilder.build() started")
50
+
51
+ # Potential numerators: must have at least one categorical field (non-empty combination)
52
+ # and at least one non-None value (explicitly excludes all-None/global-marginal columns).
53
+ numerators = [
54
+ c
55
+ for c in self.pivot_columns
56
+ if c.categorical_combination
57
+ and any(v is not None for v in c.categorical_combination.values())
58
+ ]
59
+ # Potential denominators: any column with at least one None categorical value
60
+ denominators = [
61
+ c
62
+ for c in self.pivot_columns
63
+ if any(v is None for v in c.categorical_combination.values())
64
+ ]
65
+
66
+ results: list[RatioPivotedColumn] = []
67
+ seen: set[str] = set()
68
+
69
+ for num in numerators:
70
+ num_fields = set(num.categorical_combination.keys())
71
+ for denom in denominators:
72
+ if (
73
+ denom.layer2_aggregator != num.layer2_aggregator
74
+ or denom.source_measurement is not num.source_measurement
75
+ or set(denom.categorical_combination.keys()) != num_fields
76
+ ):
77
+ continue
78
+ # Denom must not contradict num, and must marginalize at least one
79
+ # field that num has a non-None value for.
80
+ valid = True
81
+ is_proper = False
82
+ for f, dv in denom.categorical_combination.items():
83
+ nv = num.categorical_combination[f]
84
+ if dv is not None and dv != nv:
85
+ valid = False
86
+ break
87
+ if dv is None and nv is not None:
88
+ is_proper = True
89
+ if not valid or not is_proper:
90
+ continue
91
+ col = RatioPivotedColumn(num, denom)
92
+ if col.column_name not in seen:
93
+ seen.add(col.column_name)
94
+ if self.verbose:
95
+ _log.debug("column_name: %r", col.column_name)
96
+ results.append(col)
97
+
98
+ if self.verbose:
99
+ _log.debug(
100
+ "RatioSpaceBuilder.build() done — %d ratio column(s) generated", len(results)
101
+ )
102
+ return results
@@ -6,7 +6,7 @@ import logging
6
6
  from collections.abc import Sequence
7
7
 
8
8
  from featkit.enums import Layer2OutputType, TemporalOperator, TimeWindowDirection
9
- from featkit.layer2.base import AbstractLayer2Column
9
+ from featkit.layer2.base import AbstractL2Column
10
10
  from featkit.layer3.temporal_feature import _POINT_IN_TIME_OPERATORS, TemporalFeature
11
11
 
12
12
  _log = logging.getLogger(__name__)
@@ -44,14 +44,14 @@ class TemporalSpaceBuilder:
44
44
 
45
45
  def __init__(
46
46
  self,
47
- layer2_columns: list[AbstractLayer2Column],
47
+ layer2_columns: list[AbstractL2Column],
48
48
  time_windows: list[int],
49
49
  composed_windows: list[int] | None = None,
50
50
  direction: TimeWindowDirection = TimeWindowDirection.BACKWARD,
51
51
  operators_override: dict[Layer2OutputType, list[TemporalOperator]] | None = None,
52
52
  verbose: bool = False,
53
53
  ) -> None:
54
- self.layer2_columns = layer2_columns
54
+ self.layer2_columns: list[AbstractL2Column] = layer2_columns
55
55
  self.time_windows = time_windows
56
56
  self.composed_windows = composed_windows
57
57
  self.direction = direction
@@ -26,6 +26,12 @@ class FeatureStoreConfig:
26
26
  When ``None`` those operators are omitted entirely.
27
27
  include_marginals: When ``True``, ``PivotSpaceBuilder`` includes the ∅
28
28
  marginal combination for each categorical.
29
+ include_ratios: When ``True`` (and ``include_marginals`` is also
30
+ ``True``), ``RatioSpaceBuilder`` derives a ratio column
31
+ (``numerator / NULLIF(denominator, 0)``) for every pivot
32
+ combination over each of its proper marginal projections. Has no
33
+ effect when ``include_marginals`` is ``False`` (no marginal
34
+ denominators exist).
29
35
  aggregators_override: Per-measurement-type override for Layer 2
30
36
  aggregators. Only contract-valid aggregators are used.
31
37
  operators_override: Per-output-type override for temporal operators.
@@ -46,6 +52,7 @@ class FeatureStoreConfig:
46
52
  time_windows: list[int]
47
53
  composed_windows: list[int] | None = None
48
54
  include_marginals: bool = True
55
+ include_ratios: bool = True
49
56
  aggregators_override: dict[MeasurementType, list[Layer2Aggregator]] | None = None
50
57
  operators_override: dict[Layer2OutputType, list[TemporalOperator]] | None = field(default=None)
51
58
  adapter: DataSourceAdapter | None = None
@@ -2,11 +2,14 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import logging
5
6
  import re
6
7
 
7
8
  from featkit.execution.adapters.base import DataSourceAdapter
8
9
  from featkit.fields.categorical_field import CategoricalField
9
10
 
11
+ _log = logging.getLogger(__name__)
12
+
10
13
  # Matches a simple SQL identifier: letters, digits, underscores; must start
11
14
  # with a letter or underscore. Dollar signs are excluded deliberately —
12
15
  # they are technically valid in some dialects but uncommon and easy to abuse.
@@ -69,6 +72,8 @@ class AdapterDomainResolver:
69
72
  source_reference: Fully-qualified table name (e.g.
70
73
  ``"mydb.myschema.silver_transactions"``). Validated against a
71
74
  safe identifier pattern at construction time.
75
+ verbose: When ``True``, emits a ``DEBUG``-level log message with the
76
+ generated SQL before each query is executed.
72
77
 
73
78
  Raises:
74
79
  ValueError: At construction time if *source_reference* contains
@@ -76,10 +81,13 @@ class AdapterDomainResolver:
76
81
  if the resolved field name does the same.
77
82
  """
78
83
 
79
- def __init__(self, adapter: DataSourceAdapter, source_reference: str) -> None:
84
+ def __init__(
85
+ self, adapter: DataSourceAdapter, source_reference: str, verbose: bool = False
86
+ ) -> None:
80
87
  _require_safe_reference(source_reference, "source_reference")
81
88
  self._adapter = adapter
82
89
  self._source_reference = source_reference
90
+ self._verbose = verbose
83
91
 
84
92
  def __call__(self, field: CategoricalField) -> list[str]:
85
93
  """Return distinct non-null values for *field* from the facts table.
@@ -94,6 +102,8 @@ class AdapterDomainResolver:
94
102
  f"WHERE {field.name} IS NOT NULL "
95
103
  f"ORDER BY 1"
96
104
  )
105
+ if self._verbose:
106
+ _log.debug("AdapterDomainResolver SQL: %s", sql)
97
107
  df = self._adapter.execute(sql)
98
108
  return list(df.iloc[:, 0].astype(str))
99
109
 
@@ -121,16 +131,21 @@ class AdapterCombinationResolver:
121
131
  instance used to execute the query.
122
132
  source_reference: Fully-qualified table name. Validated at
123
133
  construction time.
134
+ verbose: When ``True``, emits a ``DEBUG``-level log message with the
135
+ generated SQL before each query is executed.
124
136
 
125
137
  Raises:
126
138
  ValueError: At construction time if *source_reference* is unsafe,
127
139
  or at call time if any field name is unsafe.
128
140
  """
129
141
 
130
- def __init__(self, adapter: DataSourceAdapter, source_reference: str) -> None:
142
+ def __init__(
143
+ self, adapter: DataSourceAdapter, source_reference: str, verbose: bool = False
144
+ ) -> None:
131
145
  _require_safe_reference(source_reference, "source_reference")
132
146
  self._adapter = adapter
133
147
  self._source_reference = source_reference
148
+ self._verbose = verbose
134
149
 
135
150
  def __call__(self, fields: list[CategoricalField]) -> list[dict[CategoricalField, str]]:
136
151
  """Return observed non-null combinations for *fields* from the facts table.
@@ -169,6 +184,8 @@ class AdapterCombinationResolver:
169
184
  f"WHERE {' AND '.join(where_parts)} "
170
185
  f"ORDER BY {order_list}"
171
186
  )
187
+ if self._verbose:
188
+ _log.debug("AdapterCombinationResolver SQL: %s", sql)
172
189
  df = self._adapter.execute(sql)
173
190
  if df.empty:
174
191
  return []
@@ -17,6 +17,7 @@ from featkit.generators.output import SQLOutput
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from featkit.layer2.distributional import DistributionalColumn
20
+ from featkit.layer2.pivoted import PivotedColumn
20
21
  from featkit.layer3.temporal_feature import TemporalFeature
21
22
  from featkit.pipeline import FeatureStorePipeline
22
23
 
@@ -160,6 +161,26 @@ class AbstractSQLCodeGenerator(AbstractCodeGenerator):
160
161
  # build_layer2a
161
162
  # ------------------------------------------------------------------
162
163
 
164
+ def _pivoted_agg_expr(self, col: PivotedColumn) -> str:
165
+ """Return the bare aggregate SQL expression for *col* (without alias).
166
+
167
+ Used both for regular pivot columns and as numerator/denominator
168
+ sub-expressions when building ratio columns.
169
+ """
170
+ meas = col.source_measurement.name
171
+ agg = col.layer2_aggregator.value
172
+ conditions = [
173
+ f"{self._quoted_id(cat_field.name)} = {self._str_literal(cat_val)}"
174
+ for cat_field, cat_val in sorted(
175
+ col.categorical_combination.items(), key=lambda kv: kv[0].name
176
+ )
177
+ if cat_val is not None
178
+ ]
179
+ if conditions:
180
+ predicate = " AND ".join(conditions)
181
+ return f"{agg}(CASE WHEN {predicate} THEN {meas} END)"
182
+ return f"{agg}({meas})"
183
+
163
184
  def build_layer2a(self, pipeline: FeatureStorePipeline) -> SQLOutput:
164
185
  """Generate the Layer 2A pivot aggregation table.
165
186
 
@@ -167,6 +188,9 @@ class AbstractSQLCodeGenerator(AbstractCodeGenerator):
167
188
  :meth:`_str_literal` to escape special characters and prevent
168
189
  SQL injection in the generated script. Column identifiers are
169
190
  double-quoted via :meth:`_quoted_id`.
191
+
192
+ Ratio columns (``layer2c``) are computed in the same SELECT as the
193
+ base pivot columns: ``numerator_expr / NULLIF(denominator_expr, 0)``.
170
194
  """
171
195
  verbose = pipeline.config.verbose
172
196
  if verbose:
@@ -181,25 +205,12 @@ class AbstractSQLCodeGenerator(AbstractCodeGenerator):
181
205
  select_parts: list[str] = list(id_cols) + [time_col]
182
206
 
183
207
  for col in pipeline.layer2a:
184
- meas = col.source_measurement.name
185
- agg = col.layer2_aggregator.value
186
- alias = col.column_name
187
-
188
- conditions = [
189
- f"{self._quoted_id(cat_field.name)} = {self._str_literal(cat_val)}"
190
- for cat_field, cat_val in sorted(
191
- col.categorical_combination.items(), key=lambda kv: kv[0].name
192
- )
193
- if cat_val is not None
194
- ]
195
-
196
- if conditions:
197
- predicate = " AND ".join(conditions)
198
- agg_expr = f"{agg}(CASE WHEN {predicate} THEN {meas} END)"
199
- else:
200
- agg_expr = f"{agg}({meas})"
208
+ select_parts.append(f"{self._pivoted_agg_expr(col)} AS {col.column_name}")
201
209
 
202
- select_parts.append(f"{agg_expr} AS {alias}")
210
+ for ratio_col in pipeline.layer2c:
211
+ num_expr = self._pivoted_agg_expr(ratio_col.numerator)
212
+ denom_expr = self._pivoted_agg_expr(ratio_col.denominator)
213
+ select_parts.append(f"{num_expr} / NULLIF({denom_expr}, 0) AS {ratio_col.column_name}")
203
214
 
204
215
  group_cols = ", ".join(id_cols + [time_col])
205
216
  select_list = ",\n ".join(select_parts)
@@ -516,6 +527,7 @@ class AbstractSQLCodeGenerator(AbstractCodeGenerator):
516
527
  [f"l2a.{c}" for c in id_cols]
517
528
  + [f"l2a.{time_col}"]
518
529
  + [f"l2a.{col.column_name}" for col in pipeline.layer2a]
530
+ + [f"l2a.{col.column_name}" for col in pipeline.layer2c]
519
531
  )
520
532
  if pipeline.layer2b:
521
533
  select_parts += [f"l2b.{col.column_name}" for col in pipeline.layer2b]
@@ -18,16 +18,12 @@ if TYPE_CHECKING:
18
18
  COLUMN_NAME_SEP = "__"
19
19
 
20
20
 
21
- class AbstractLayer2Column(ABC):
22
- """Common base for every column in the Layer 2 horizontal concept table.
21
+ class AbstractL2Column(ABC):
22
+ """Minimal interface for any Layer 2 column.
23
23
 
24
- Subclasses supply the concrete ``output_type`` and ``column_name``; this
25
- class derives ``output_contract`` from ``output_type`` automatically.
26
-
27
- Raises:
28
- ValueError: If ``layer2_aggregator`` is not permitted by the
29
- measurement's contract, or if ``source_measurement.name``
30
- contains the column name separator.
24
+ All Layer 2 columns whether pivot aggregations, distributional metrics,
25
+ or derived ratio columns satisfy this interface. :class:`TemporalFeature`
26
+ and :class:`TemporalSpaceBuilder` accept any subclass of this base.
31
27
  """
32
28
 
33
29
  @staticmethod
@@ -39,6 +35,37 @@ class AbstractLayer2Column(ABC):
39
35
  f"{COLUMN_NAME_SEP!r}"
40
36
  )
41
37
 
38
+ @property
39
+ @abstractmethod
40
+ def output_type(self) -> Layer2OutputType:
41
+ """Layer 2 output type that governs valid Layer 3 temporal operators."""
42
+ ...
43
+
44
+ @property
45
+ def output_contract(self) -> AbstractLayer2OutputContract:
46
+ """Contract for the Layer 2 → Layer 3 boundary, derived from ``output_type``."""
47
+ return get_default_output_contract(self.output_type)
48
+
49
+ @property
50
+ @abstractmethod
51
+ def column_name(self) -> str:
52
+ """Deterministic name for this column in the Layer 2 output table."""
53
+ ...
54
+
55
+
56
+ class AbstractLayer2Column(AbstractL2Column):
57
+ """Base for Layer 2 columns derived from a single measurement + aggregator.
58
+
59
+ Subclasses supply the concrete ``output_type`` and ``column_name``; this
60
+ class derives ``output_contract`` from ``output_type`` automatically and
61
+ validates that the aggregator is permitted by the measurement's contract.
62
+
63
+ Raises:
64
+ ValueError: If ``layer2_aggregator`` is not permitted by the
65
+ measurement's contract, or if ``source_measurement.name``
66
+ contains the column name separator.
67
+ """
68
+
42
69
  def __init__(
43
70
  self,
44
71
  source_measurement: MeasurementField,
@@ -60,23 +87,6 @@ class AbstractLayer2Column(ABC):
60
87
  self.source_measurement = source_measurement
61
88
  self.layer2_aggregator = layer2_aggregator
62
89
 
63
- @property
64
- @abstractmethod
65
- def output_type(self) -> Layer2OutputType:
66
- """Layer 2 output type that governs valid Layer 3 temporal operators."""
67
- ...
68
-
69
- @property
70
- def output_contract(self) -> AbstractLayer2OutputContract:
71
- """Contract for the Layer 2 → Layer 3 boundary, derived from ``output_type``."""
72
- return get_default_output_contract(self.output_type)
73
-
74
- @property
75
- @abstractmethod
76
- def column_name(self) -> str:
77
- """Deterministic name for this column in the Layer 2 output table."""
78
- ...
79
-
80
90
  def __repr__(self) -> str:
81
91
  return (
82
92
  f"{type(self).__name__}("
@@ -0,0 +1,101 @@
1
+ """RatioPivotedColumn — a Layer 2 column that is the ratio of two PivotedColumns."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from featkit.enums import Layer2OutputType
6
+ from featkit.layer2.base import AbstractL2Column
7
+ from featkit.layer2.pivoted import PivotedColumn
8
+
9
+
10
+ class RatioPivotedColumn(AbstractL2Column):
11
+ """A Layer 2 column representing the ratio of a pivot cell over one of its
12
+ marginal projections.
13
+
14
+ The ratio is computed per entity-period in the Layer 2A table as::
15
+
16
+ numerator_agg_expr / NULLIF(denominator_agg_expr, 0)
17
+
18
+ Temporal operators are then applied to the pre-computed per-period ratio
19
+ exactly as they are for any other numeric Layer 2 column.
20
+
21
+ The denominator must be a *proper* marginal projection of the numerator:
22
+ every non-``None`` denominator value must match the corresponding numerator
23
+ value, and at least one field that is non-``None`` in the numerator must be
24
+ ``None`` in the denominator (i.e. the denominator sums over that dimension).
25
+ The numerator itself may contain ``None`` fields — those dimensions are
26
+ already marginalised in both columns and are left unchanged.
27
+
28
+ Args:
29
+ numerator: A :class:`~featkit.layer2.pivoted.PivotedColumn` with at
30
+ least one non-``None`` categorical value.
31
+ denominator: A :class:`~featkit.layer2.pivoted.PivotedColumn` that is a
32
+ proper marginal projection of *numerator* — same aggregator, same
33
+ measurement instance, same categorical fields, every non-``None``
34
+ denominator value equal to the corresponding numerator value, and
35
+ at least one field that is ``None`` in the denominator but
36
+ non-``None`` in the numerator.
37
+
38
+ Raises:
39
+ ValueError: If the numerator/denominator pair violates any of the
40
+ constraints above.
41
+ """
42
+
43
+ def __init__(self, numerator: PivotedColumn, denominator: PivotedColumn) -> None:
44
+ if numerator.layer2_aggregator != denominator.layer2_aggregator:
45
+ raise ValueError(
46
+ f"numerator and denominator must share the same aggregator; "
47
+ f"got {numerator.layer2_aggregator.name!r} vs "
48
+ f"{denominator.layer2_aggregator.name!r}"
49
+ )
50
+ if numerator.source_measurement is not denominator.source_measurement:
51
+ raise ValueError(
52
+ f"numerator and denominator must share the same source_measurement; "
53
+ f"got {numerator.source_measurement.name!r} vs "
54
+ f"{denominator.source_measurement.name!r}"
55
+ )
56
+ if numerator.categorical_combination.keys() != denominator.categorical_combination.keys():
57
+ raise ValueError("numerator and denominator must have the same categorical fields")
58
+ # Denominator must be a proper projection: it cannot contradict the numerator,
59
+ # and must marginalize at least one field that numerator has a non-None value for.
60
+ has_proper_marginal = False
61
+ for field, dval in denominator.categorical_combination.items():
62
+ nval = numerator.categorical_combination[field]
63
+ if dval is not None and dval != nval:
64
+ raise ValueError(
65
+ f"denominator value for field {field.name!r} is {dval!r} but "
66
+ f"numerator has {nval!r}; "
67
+ "denominator must be a proper marginal projection of the numerator"
68
+ )
69
+ if dval is None and nval is not None:
70
+ has_proper_marginal = True
71
+ if not has_proper_marginal:
72
+ raise ValueError(
73
+ "denominator must marginalize at least one field that has a non-None value "
74
+ "in the numerator (denominator must be a proper marginal projection)"
75
+ )
76
+
77
+ self._numerator = numerator
78
+ self._denominator = denominator
79
+
80
+ @property
81
+ def numerator(self) -> PivotedColumn:
82
+ return self._numerator
83
+
84
+ @property
85
+ def denominator(self) -> PivotedColumn:
86
+ return self._denominator
87
+
88
+ @property
89
+ def output_type(self) -> Layer2OutputType:
90
+ return Layer2OutputType.NUMERIC
91
+
92
+ @property
93
+ def column_name(self) -> str:
94
+ return f"{self._numerator.column_name}__over__{self._denominator.column_name}"
95
+
96
+ def __repr__(self) -> str:
97
+ return (
98
+ f"RatioPivotedColumn("
99
+ f"numerator={self._numerator.column_name!r}, "
100
+ f"denominator={self._denominator.column_name!r})"
101
+ )
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from featkit.enums import TemporalOperator, TimeWindowDirection
6
- from featkit.layer2.base import AbstractLayer2Column
6
+ from featkit.layer2.base import AbstractL2Column
7
7
 
8
8
  #: Operators that operate on a single point in time and do not require a window.
9
9
  _POINT_IN_TIME_OPERATORS: frozenset[TemporalOperator] = frozenset(
@@ -31,7 +31,7 @@ class TemporalFeature:
31
31
 
32
32
  def __init__(
33
33
  self,
34
- source: AbstractLayer2Column,
34
+ source: AbstractL2Column,
35
35
  operator: TemporalOperator,
36
36
  direction: TimeWindowDirection,
37
37
  window_size: int | None = None,
@@ -64,7 +64,7 @@ class TemporalFeature:
64
64
  ):
65
65
  raise ValueError(f"window_size must be a positive integer, got {window_size!r}")
66
66
 
67
- self.source = source
67
+ self.source: AbstractL2Column = source
68
68
  self.operator = operator
69
69
  self.direction = direction
70
70
  self.window_size = window_size
@@ -6,10 +6,12 @@ from typing import TYPE_CHECKING
6
6
 
7
7
  from featkit.builders.distributional_space import DistributionalSpaceBuilder
8
8
  from featkit.builders.pivot_space import PivotSpaceBuilder
9
+ from featkit.builders.ratio_space import RatioSpaceBuilder
9
10
  from featkit.builders.temporal_space import TemporalSpaceBuilder
10
11
  from featkit.config import FeatureStoreConfig
11
12
  from featkit.layer2.distributional import DistributionalColumn
12
13
  from featkit.layer2.pivoted import PivotedColumn
14
+ from featkit.layer2.ratio import RatioPivotedColumn
13
15
  from featkit.layer3.temporal_feature import TemporalFeature
14
16
 
15
17
  if TYPE_CHECKING:
@@ -33,6 +35,7 @@ class FeatureStorePipeline:
33
35
  self.config = config
34
36
  self.layer2a: list[PivotedColumn] = []
35
37
  self.layer2b: list[DistributionalColumn] = []
38
+ self.layer2c: list[RatioPivotedColumn] = []
36
39
  self.layer3: list[TemporalFeature] = []
37
40
 
38
41
  def build(self) -> FeatureStorePipeline:
@@ -48,7 +51,7 @@ class FeatureStorePipeline:
48
51
  from featkit.execution.domain_resolver import AdapterCombinationResolver
49
52
 
50
53
  combination_resolver = AdapterCombinationResolver(
51
- cfg.adapter, cfg.dataset.source_reference
54
+ cfg.adapter, cfg.dataset.source_reference, verbose=cfg.verbose
52
55
  )
53
56
 
54
57
  self.layer2a = PivotSpaceBuilder(
@@ -62,8 +65,13 @@ class FeatureStorePipeline:
62
65
  dataset=cfg.dataset,
63
66
  verbose=cfg.verbose,
64
67
  ).build()
68
+ self.layer2c = (
69
+ RatioSpaceBuilder(self.layer2a, verbose=cfg.verbose).build()
70
+ if cfg.include_ratios and cfg.include_marginals
71
+ else []
72
+ )
65
73
  self.layer3 = TemporalSpaceBuilder(
66
- layer2_columns=[*self.layer2a, *self.layer2b],
74
+ layer2_columns=[*self.layer2a, *self.layer2b, *self.layer2c],
67
75
  time_windows=cfg.time_windows,
68
76
  composed_windows=cfg.composed_windows,
69
77
  operators_override=cfg.operators_override,