dbt-features 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dbt_features/__init__.py +16 -0
  2. dbt_features/__main__.py +4 -0
  3. dbt_features/catalog.py +237 -0
  4. dbt_features/cli.py +560 -0
  5. dbt_features/demo/__init__.py +15 -0
  6. dbt_features/demo/manifest.json +319 -0
  7. dbt_features/enrichment/__init__.py +25 -0
  8. dbt_features/enrichment/adapters/__init__.py +72 -0
  9. dbt_features/enrichment/adapters/_dbapi.py +116 -0
  10. dbt_features/enrichment/adapters/bigquery.py +212 -0
  11. dbt_features/enrichment/adapters/duckdb.py +141 -0
  12. dbt_features/enrichment/adapters/postgres.py +115 -0
  13. dbt_features/enrichment/adapters/redshift.py +143 -0
  14. dbt_features/enrichment/adapters/snowflake.py +192 -0
  15. dbt_features/enrichment/cache.py +126 -0
  16. dbt_features/enrichment/engine.py +66 -0
  17. dbt_features/enrichment/exceptions.py +12 -0
  18. dbt_features/enrichment/format.py +116 -0
  19. dbt_features/enrichment/models.py +54 -0
  20. dbt_features/enrichment/profiles.py +159 -0
  21. dbt_features/inference.py +118 -0
  22. dbt_features/parser.py +322 -0
  23. dbt_features/py.typed +0 -0
  24. dbt_features/renderer.py +699 -0
  25. dbt_features/schema.py +178 -0
  26. dbt_features/static/favicon.svg +6 -0
  27. dbt_features/static/filter.js +128 -0
  28. dbt_features/static/lineage.js +179 -0
  29. dbt_features/static/mermaid.min.js +2029 -0
  30. dbt_features/static/search.js +295 -0
  31. dbt_features/static/sort.js +142 -0
  32. dbt_features/static/style.css +922 -0
  33. dbt_features/static/tabs.js +36 -0
  34. dbt_features/static/theme.js +37 -0
  35. dbt_features/templates/base.html +104 -0
  36. dbt_features/templates/feature.html +143 -0
  37. dbt_features/templates/feature_group.html +213 -0
  38. dbt_features/templates/index.html +200 -0
  39. dbt_features/templates/lineage.html +49 -0
  40. dbt_features/templates/model.html +92 -0
  41. dbt_features/templates/models_index.html +35 -0
  42. dbt_features-0.3.0.dist-info/METADATA +332 -0
  43. dbt_features-0.3.0.dist-info/RECORD +46 -0
  44. dbt_features-0.3.0.dist-info/WHEEL +4 -0
  45. dbt_features-0.3.0.dist-info/entry_points.txt +2 -0
  46. dbt_features-0.3.0.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,16 @@
1
+ """dbt-features — a feature catalog for ML teams whose features live as dbt models."""
2
+
3
+ from dbt_features.catalog import Catalog, Feature, FeatureGroup
4
+ from dbt_features.parser import parse_project
5
+ from dbt_features.renderer import render_catalog
6
+
7
+ __version__ = "0.2.0"
8
+
9
+ __all__ = [
10
+ "Catalog",
11
+ "Feature",
12
+ "FeatureGroup",
13
+ "__version__",
14
+ "parse_project",
15
+ "render_catalog",
16
+ ]
@@ -0,0 +1,4 @@
1
+ from dbt_features.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,237 @@
1
+ """Internal catalog data model — what the renderer consumes.
2
+
3
+ These are deliberately kept separate from the user-facing schema in
4
+ ``schema.py``: the user's declared metadata gets normalized, joined with
5
+ manifest/catalog data, and resolved into these objects before rendering.
6
+ That separation keeps validation errors close to the user's input and keeps
7
+ the rendering layer simple.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime, timezone
14
+
15
+ from dbt_features.schema import (
16
+ FeatureTableMeta,
17
+ FeatureType,
18
+ Freshness,
19
+ Lifecycle,
20
+ NullBehavior,
21
+ )
22
+
23
+
24
+ @dataclass(frozen=True, slots=True)
25
+ class Feature:
26
+ name: str
27
+ description: str
28
+ column_type: str | None
29
+ feature_type: FeatureType | None
30
+ null_behavior: NullBehavior | None
31
+ used_by: tuple[str, ...]
32
+ tags: tuple[str, ...]
33
+ definition_version: int = 1
34
+ lifecycle: Lifecycle = Lifecycle.ACTIVE
35
+ replacement: str | None = None
36
+
37
+
38
+ @dataclass(frozen=True, slots=True)
39
+ class LineageRef:
40
+ """A reference to another node in the dbt graph.
41
+
42
+ ``unique_id`` is dbt's identifier (e.g. ``model.jaffle.foo``). ``name``
43
+ is the short name used in the UI. ``is_feature_table`` lets the renderer
44
+ link to a catalog page if the dependency is itself a feature table.
45
+ """
46
+
47
+ unique_id: str
48
+ name: str
49
+ resource_type: str
50
+ is_feature_table: bool
51
+
52
+
53
+ @dataclass(frozen=True, slots=True)
54
+ class FeatureGroup:
55
+ """A dbt model marked as a feature table.
56
+
57
+ Composed of: the user-declared ``FeatureTableMeta`` (validated upstream),
58
+ plus model facts pulled from ``manifest.json`` (description, schema,
59
+ materialization, lineage), plus the columns the user marked as features.
60
+ Non-feature columns (keys, timestamps) are intentionally excluded.
61
+ """
62
+
63
+ name: str
64
+ unique_id: str
65
+ description: str
66
+ schema_name: str
67
+ database: str | None
68
+ materialization: str
69
+ package_name: str
70
+ file_path: str
71
+ meta: FeatureTableMeta
72
+ features: tuple[Feature, ...]
73
+ upstream: tuple[LineageRef, ...]
74
+ downstream: tuple[LineageRef, ...]
75
+
76
+ @property
77
+ def entity_columns(self) -> list[str]:
78
+ return self.meta.entity_columns
79
+
80
+ @property
81
+ def grain(self) -> list[str]:
82
+ return list(self.meta.grain)
83
+
84
+ @property
85
+ def timestamp_column(self) -> str | None:
86
+ return self.meta.timestamp_column
87
+
88
+ @property
89
+ def freshness(self) -> Freshness | None:
90
+ return self.meta.freshness
91
+
92
+ @property
93
+ def owner(self) -> str | None:
94
+ return self.meta.owner
95
+
96
+ @property
97
+ def tags(self) -> list[str]:
98
+ return list(self.meta.tags)
99
+
100
+ @property
101
+ def definition_version(self) -> int:
102
+ return self.meta.definition_version
103
+
104
+ @property
105
+ def lifecycle(self) -> Lifecycle:
106
+ return self.meta.lifecycle
107
+
108
+ @property
109
+ def replacement(self) -> str | None:
110
+ return self.meta.replacement
111
+
112
+ @property
113
+ def fully_qualified_name(self) -> str:
114
+ parts = [p for p in (self.database, self.schema_name, self.name) if p]
115
+ return ".".join(parts)
116
+
117
+
118
+ @dataclass(frozen=True, slots=True)
119
+ class Catalog:
120
+ project_name: str
121
+ feature_groups: tuple[FeatureGroup, ...]
122
+ generated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
123
+
124
+ @property
125
+ def feature_count(self) -> int:
126
+ return sum(len(g.features) for g in self.feature_groups)
127
+
128
+ @property
129
+ def all_tags(self) -> list[str]:
130
+ seen: dict[str, None] = {}
131
+ for g in self.feature_groups:
132
+ for tag in g.tags:
133
+ seen[tag] = None
134
+ return sorted(seen)
135
+
136
+ def by_unique_id(self, unique_id: str) -> FeatureGroup | None:
137
+ for g in self.feature_groups:
138
+ if g.unique_id == unique_id:
139
+ return g
140
+ return None
141
+
142
+ def feature_groups_by_tag(self) -> dict[str, list[FeatureGroup]]:
143
+ """Group feature groups by tag.
144
+
145
+ Groups with no tags fall under ``"untagged"``. A group with multiple
146
+ tags appears under each one. Kept for backward compatibility and
147
+ for callers (e.g. exporters) that want a tag-faceted view; the
148
+ index page itself groups by entity now to avoid card duplication.
149
+ """
150
+
151
+ out: dict[str, list[FeatureGroup]] = {}
152
+ for g in self.feature_groups:
153
+ tags = g.tags or ["untagged"]
154
+ for tag in tags:
155
+ out.setdefault(tag, []).append(g)
156
+ for groups in out.values():
157
+ groups.sort(key=lambda g: g.name)
158
+ return dict(sorted(out.items()))
159
+
160
+ def feature_groups_by_entity(self) -> dict[str, list[FeatureGroup]]:
161
+ """Group feature groups by their primary entity, no duplication.
162
+
163
+ Entity is the join key — the question every feature consumer asks
164
+ first ("what features can I join to a customer?"). We use the
165
+ first declared entity column as the section. Multi-entity groups
166
+ land in ``"Cross-entity"`` so they're discoverable but not
167
+ duplicated. Groups without an entity fall under ``"Other"``.
168
+ """
169
+
170
+ out: dict[str, list[FeatureGroup]] = {}
171
+ for g in self.feature_groups:
172
+ entities = g.entity_columns
173
+ if not entities:
174
+ key = "Other"
175
+ elif len(entities) > 1:
176
+ key = "Cross-entity"
177
+ else:
178
+ key = entities[0]
179
+ out.setdefault(key, []).append(g)
180
+ for groups in out.values():
181
+ groups.sort(key=lambda g: g.name)
182
+ # Surface "Cross-entity" and "Other" last; everything else alpha.
183
+ def _order(k: str) -> tuple[int, str]:
184
+ if k == "Cross-entity":
185
+ return (1, k)
186
+ if k == "Other":
187
+ return (2, k)
188
+ return (0, k)
189
+
190
+ return {k: out[k] for k in sorted(out.keys(), key=_order)}
191
+
192
+ @property
193
+ def all_entities(self) -> list[str]:
194
+ seen: dict[str, None] = {}
195
+ for g in self.feature_groups:
196
+ for e in g.entity_columns:
197
+ seen[e] = None
198
+ return sorted(seen)
199
+
200
+ @property
201
+ def all_owners(self) -> list[str]:
202
+ seen: dict[str, None] = {}
203
+ for g in self.feature_groups:
204
+ if g.owner:
205
+ seen[g.owner] = None
206
+ return sorted(seen)
207
+
208
+ @property
209
+ def all_models(self) -> list[str]:
210
+ """Distinct model names declared via column-level ``used_by``.
211
+
212
+ These are typically ML/analytics consumers that don't appear in
213
+ the dbt graph. Sorted, deduped.
214
+ """
215
+
216
+ seen: dict[str, None] = {}
217
+ for g in self.feature_groups:
218
+ for f in g.features:
219
+ for m in f.used_by:
220
+ seen[m] = None
221
+ return sorted(seen)
222
+
223
+ def features_by_model(self) -> dict[str, list[tuple[FeatureGroup, Feature]]]:
224
+ """Inverted index: model name -> list of (group, feature) pairs.
225
+
226
+ Powers the ``/models/<name>/`` pages — the consumer-centric view
227
+ that was missing from v0.2.
228
+ """
229
+
230
+ out: dict[str, list[tuple[FeatureGroup, Feature]]] = {}
231
+ for g in self.feature_groups:
232
+ for f in g.features:
233
+ for m in f.used_by:
234
+ out.setdefault(m, []).append((g, f))
235
+ for entries in out.values():
236
+ entries.sort(key=lambda gf: (gf[0].name, gf[1].name))
237
+ return dict(sorted(out.items()))