dbt-features 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt_features/__init__.py +16 -0
- dbt_features/__main__.py +4 -0
- dbt_features/catalog.py +237 -0
- dbt_features/cli.py +560 -0
- dbt_features/demo/__init__.py +15 -0
- dbt_features/demo/manifest.json +319 -0
- dbt_features/enrichment/__init__.py +25 -0
- dbt_features/enrichment/adapters/__init__.py +72 -0
- dbt_features/enrichment/adapters/_dbapi.py +116 -0
- dbt_features/enrichment/adapters/bigquery.py +212 -0
- dbt_features/enrichment/adapters/duckdb.py +141 -0
- dbt_features/enrichment/adapters/postgres.py +115 -0
- dbt_features/enrichment/adapters/redshift.py +143 -0
- dbt_features/enrichment/adapters/snowflake.py +192 -0
- dbt_features/enrichment/cache.py +126 -0
- dbt_features/enrichment/engine.py +66 -0
- dbt_features/enrichment/exceptions.py +12 -0
- dbt_features/enrichment/format.py +116 -0
- dbt_features/enrichment/models.py +54 -0
- dbt_features/enrichment/profiles.py +159 -0
- dbt_features/inference.py +118 -0
- dbt_features/parser.py +322 -0
- dbt_features/py.typed +0 -0
- dbt_features/renderer.py +699 -0
- dbt_features/schema.py +178 -0
- dbt_features/static/favicon.svg +6 -0
- dbt_features/static/filter.js +128 -0
- dbt_features/static/lineage.js +179 -0
- dbt_features/static/mermaid.min.js +2029 -0
- dbt_features/static/search.js +295 -0
- dbt_features/static/sort.js +142 -0
- dbt_features/static/style.css +922 -0
- dbt_features/static/tabs.js +36 -0
- dbt_features/static/theme.js +37 -0
- dbt_features/templates/base.html +104 -0
- dbt_features/templates/feature.html +143 -0
- dbt_features/templates/feature_group.html +213 -0
- dbt_features/templates/index.html +200 -0
- dbt_features/templates/lineage.html +49 -0
- dbt_features/templates/model.html +92 -0
- dbt_features/templates/models_index.html +35 -0
- dbt_features-0.3.0.dist-info/METADATA +332 -0
- dbt_features-0.3.0.dist-info/RECORD +46 -0
- dbt_features-0.3.0.dist-info/WHEEL +4 -0
- dbt_features-0.3.0.dist-info/entry_points.txt +2 -0
- dbt_features-0.3.0.dist-info/licenses/LICENSE +190 -0
dbt_features/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""dbt-features — a feature catalog for ML teams whose features live as dbt models."""
|
|
2
|
+
|
|
3
|
+
from dbt_features.catalog import Catalog, Feature, FeatureGroup
|
|
4
|
+
from dbt_features.parser import parse_project
|
|
5
|
+
from dbt_features.renderer import render_catalog
|
|
6
|
+
|
|
7
|
+
__version__ = "0.2.0"
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Catalog",
|
|
11
|
+
"Feature",
|
|
12
|
+
"FeatureGroup",
|
|
13
|
+
"__version__",
|
|
14
|
+
"parse_project",
|
|
15
|
+
"render_catalog",
|
|
16
|
+
]
|
dbt_features/__main__.py
ADDED
dbt_features/catalog.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Internal catalog data model — what the renderer consumes.
|
|
2
|
+
|
|
3
|
+
These are deliberately kept separate from the user-facing schema in
|
|
4
|
+
``schema.py``: the user's declared metadata gets normalized, joined with
|
|
5
|
+
manifest/catalog data, and resolved into these objects before rendering.
|
|
6
|
+
That separation keeps validation errors close to the user's input and keeps
|
|
7
|
+
the rendering layer simple.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
|
|
15
|
+
from dbt_features.schema import (
|
|
16
|
+
FeatureTableMeta,
|
|
17
|
+
FeatureType,
|
|
18
|
+
Freshness,
|
|
19
|
+
Lifecycle,
|
|
20
|
+
NullBehavior,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, slots=True)
|
|
25
|
+
class Feature:
|
|
26
|
+
name: str
|
|
27
|
+
description: str
|
|
28
|
+
column_type: str | None
|
|
29
|
+
feature_type: FeatureType | None
|
|
30
|
+
null_behavior: NullBehavior | None
|
|
31
|
+
used_by: tuple[str, ...]
|
|
32
|
+
tags: tuple[str, ...]
|
|
33
|
+
definition_version: int = 1
|
|
34
|
+
lifecycle: Lifecycle = Lifecycle.ACTIVE
|
|
35
|
+
replacement: str | None = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True, slots=True)
|
|
39
|
+
class LineageRef:
|
|
40
|
+
"""A reference to another node in the dbt graph.
|
|
41
|
+
|
|
42
|
+
``unique_id`` is dbt's identifier (e.g. ``model.jaffle.foo``). ``name``
|
|
43
|
+
is the short name used in the UI. ``is_feature_table`` lets the renderer
|
|
44
|
+
link to a catalog page if the dependency is itself a feature table.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
unique_id: str
|
|
48
|
+
name: str
|
|
49
|
+
resource_type: str
|
|
50
|
+
is_feature_table: bool
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True, slots=True)
|
|
54
|
+
class FeatureGroup:
|
|
55
|
+
"""A dbt model marked as a feature table.
|
|
56
|
+
|
|
57
|
+
Composed of: the user-declared ``FeatureTableMeta`` (validated upstream),
|
|
58
|
+
plus model facts pulled from ``manifest.json`` (description, schema,
|
|
59
|
+
materialization, lineage), plus the columns the user marked as features.
|
|
60
|
+
Non-feature columns (keys, timestamps) are intentionally excluded.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
name: str
|
|
64
|
+
unique_id: str
|
|
65
|
+
description: str
|
|
66
|
+
schema_name: str
|
|
67
|
+
database: str | None
|
|
68
|
+
materialization: str
|
|
69
|
+
package_name: str
|
|
70
|
+
file_path: str
|
|
71
|
+
meta: FeatureTableMeta
|
|
72
|
+
features: tuple[Feature, ...]
|
|
73
|
+
upstream: tuple[LineageRef, ...]
|
|
74
|
+
downstream: tuple[LineageRef, ...]
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def entity_columns(self) -> list[str]:
|
|
78
|
+
return self.meta.entity_columns
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def grain(self) -> list[str]:
|
|
82
|
+
return list(self.meta.grain)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def timestamp_column(self) -> str | None:
|
|
86
|
+
return self.meta.timestamp_column
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def freshness(self) -> Freshness | None:
|
|
90
|
+
return self.meta.freshness
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def owner(self) -> str | None:
|
|
94
|
+
return self.meta.owner
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def tags(self) -> list[str]:
|
|
98
|
+
return list(self.meta.tags)
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def definition_version(self) -> int:
|
|
102
|
+
return self.meta.definition_version
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def lifecycle(self) -> Lifecycle:
|
|
106
|
+
return self.meta.lifecycle
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def replacement(self) -> str | None:
|
|
110
|
+
return self.meta.replacement
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def fully_qualified_name(self) -> str:
|
|
114
|
+
parts = [p for p in (self.database, self.schema_name, self.name) if p]
|
|
115
|
+
return ".".join(parts)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dataclass(frozen=True, slots=True)
|
|
119
|
+
class Catalog:
|
|
120
|
+
project_name: str
|
|
121
|
+
feature_groups: tuple[FeatureGroup, ...]
|
|
122
|
+
generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def feature_count(self) -> int:
|
|
126
|
+
return sum(len(g.features) for g in self.feature_groups)
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def all_tags(self) -> list[str]:
|
|
130
|
+
seen: dict[str, None] = {}
|
|
131
|
+
for g in self.feature_groups:
|
|
132
|
+
for tag in g.tags:
|
|
133
|
+
seen[tag] = None
|
|
134
|
+
return sorted(seen)
|
|
135
|
+
|
|
136
|
+
def by_unique_id(self, unique_id: str) -> FeatureGroup | None:
|
|
137
|
+
for g in self.feature_groups:
|
|
138
|
+
if g.unique_id == unique_id:
|
|
139
|
+
return g
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
def feature_groups_by_tag(self) -> dict[str, list[FeatureGroup]]:
|
|
143
|
+
"""Group feature groups by tag.
|
|
144
|
+
|
|
145
|
+
Groups with no tags fall under ``"untagged"``. A group with multiple
|
|
146
|
+
tags appears under each one. Kept for backward compatibility and
|
|
147
|
+
for callers (e.g. exporters) that want a tag-faceted view; the
|
|
148
|
+
index page itself groups by entity now to avoid card duplication.
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
out: dict[str, list[FeatureGroup]] = {}
|
|
152
|
+
for g in self.feature_groups:
|
|
153
|
+
tags = g.tags or ["untagged"]
|
|
154
|
+
for tag in tags:
|
|
155
|
+
out.setdefault(tag, []).append(g)
|
|
156
|
+
for groups in out.values():
|
|
157
|
+
groups.sort(key=lambda g: g.name)
|
|
158
|
+
return dict(sorted(out.items()))
|
|
159
|
+
|
|
160
|
+
def feature_groups_by_entity(self) -> dict[str, list[FeatureGroup]]:
|
|
161
|
+
"""Group feature groups by their primary entity, no duplication.
|
|
162
|
+
|
|
163
|
+
Entity is the join key — the question every feature consumer asks
|
|
164
|
+
first ("what features can I join to a customer?"). We use the
|
|
165
|
+
first declared entity column as the section. Multi-entity groups
|
|
166
|
+
land in ``"Cross-entity"`` so they're discoverable but not
|
|
167
|
+
duplicated. Groups without an entity fall under ``"Other"``.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
out: dict[str, list[FeatureGroup]] = {}
|
|
171
|
+
for g in self.feature_groups:
|
|
172
|
+
entities = g.entity_columns
|
|
173
|
+
if not entities:
|
|
174
|
+
key = "Other"
|
|
175
|
+
elif len(entities) > 1:
|
|
176
|
+
key = "Cross-entity"
|
|
177
|
+
else:
|
|
178
|
+
key = entities[0]
|
|
179
|
+
out.setdefault(key, []).append(g)
|
|
180
|
+
for groups in out.values():
|
|
181
|
+
groups.sort(key=lambda g: g.name)
|
|
182
|
+
# Surface "Cross-entity" and "Other" last; everything else alpha.
|
|
183
|
+
def _order(k: str) -> tuple[int, str]:
|
|
184
|
+
if k == "Cross-entity":
|
|
185
|
+
return (1, k)
|
|
186
|
+
if k == "Other":
|
|
187
|
+
return (2, k)
|
|
188
|
+
return (0, k)
|
|
189
|
+
|
|
190
|
+
return {k: out[k] for k in sorted(out.keys(), key=_order)}
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def all_entities(self) -> list[str]:
|
|
194
|
+
seen: dict[str, None] = {}
|
|
195
|
+
for g in self.feature_groups:
|
|
196
|
+
for e in g.entity_columns:
|
|
197
|
+
seen[e] = None
|
|
198
|
+
return sorted(seen)
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def all_owners(self) -> list[str]:
|
|
202
|
+
seen: dict[str, None] = {}
|
|
203
|
+
for g in self.feature_groups:
|
|
204
|
+
if g.owner:
|
|
205
|
+
seen[g.owner] = None
|
|
206
|
+
return sorted(seen)
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def all_models(self) -> list[str]:
|
|
210
|
+
"""Distinct model names declared via column-level ``used_by``.
|
|
211
|
+
|
|
212
|
+
These are typically ML/analytics consumers that don't appear in
|
|
213
|
+
the dbt graph. Sorted, deduped.
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
seen: dict[str, None] = {}
|
|
217
|
+
for g in self.feature_groups:
|
|
218
|
+
for f in g.features:
|
|
219
|
+
for m in f.used_by:
|
|
220
|
+
seen[m] = None
|
|
221
|
+
return sorted(seen)
|
|
222
|
+
|
|
223
|
+
def features_by_model(self) -> dict[str, list[tuple[FeatureGroup, Feature]]]:
|
|
224
|
+
"""Inverted index: model name -> list of (group, feature) pairs.
|
|
225
|
+
|
|
226
|
+
Powers the ``/models/<name>/`` pages — the consumer-centric view
|
|
227
|
+
that was missing from v0.2.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
out: dict[str, list[tuple[FeatureGroup, Feature]]] = {}
|
|
231
|
+
for g in self.feature_groups:
|
|
232
|
+
for f in g.features:
|
|
233
|
+
for m in f.used_by:
|
|
234
|
+
out.setdefault(m, []).append((g, f))
|
|
235
|
+
for entries in out.values():
|
|
236
|
+
entries.sort(key=lambda gf: (gf[0].name, gf[1].name))
|
|
237
|
+
return dict(sorted(out.items()))
|