recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +27 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +810 -480
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +39 -28
- recce/apis/check_func.py +33 -27
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +29 -23
- recce/artifact.py +119 -51
- recce/cli.py +1299 -323
- recce/config.py +42 -33
- recce/connect_to_cloud.py +138 -0
- recce/core.py +55 -47
- recce/data/404.html +1 -1
- recce/data/__next.__PAGE__.txt +10 -0
- recce/data/__next._full.txt +23 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +8 -0
- recce/data/__next._tree.txt +5 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
- recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
- recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
- recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
- recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
- recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
- recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
- recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
- recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
- recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
- recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
- recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
- recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
- recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
- recce/data/_next/static/chunks/99d638224186c118.js +1 -0
- recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
- recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
- recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
- recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
- recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
- recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
- recce/data/_not-found/__next._full.txt +17 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +8 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +3 -0
- recce/data/_not-found.html +1 -0
- recce/data/_not-found.txt +17 -0
- recce/data/auth_callback.html +68 -0
- recce/data/imgs/reload-image.svg +4 -0
- recce/data/index.html +1 -27
- recce/data/index.txt +23 -7
- recce/diff.py +6 -12
- recce/event/__init__.py +86 -74
- recce/event/collector.py +33 -22
- recce/event/track.py +49 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/mcp_server.py +716 -0
- recce/models/__init__.py +4 -1
- recce/models/check.py +6 -7
- recce/models/run.py +1 -0
- recce/models/types.py +131 -28
- recce/pull_request.py +27 -25
- recce/run.py +165 -121
- recce/server.py +303 -111
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +632 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +188 -143
- recce/tasks/__init__.py +19 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +82 -18
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +139 -87
- recce/tasks/rowcount.py +37 -31
- recce/tasks/schema.py +18 -15
- recce/tasks/top_k.py +35 -35
- recce/tasks/valuediff.py +216 -152
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +80 -0
- recce/util/breaking.py +87 -85
- recce/util/cll.py +274 -219
- recce/util/io.py +22 -17
- recce/util/lineage.py +65 -16
- recce/util/logger.py +1 -1
- recce/util/onboarding_state.py +45 -0
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +322 -72
- recce/util/singleton.py +4 -4
- recce/yaml/__init__.py +7 -10
- recce_cloud/__init__.py +24 -0
- recce_cloud/api/__init__.py +17 -0
- recce_cloud/api/base.py +111 -0
- recce_cloud/api/client.py +150 -0
- recce_cloud/api/exceptions.py +26 -0
- recce_cloud/api/factory.py +63 -0
- recce_cloud/api/github.py +76 -0
- recce_cloud/api/gitlab.py +82 -0
- recce_cloud/artifact.py +57 -0
- recce_cloud/ci_providers/__init__.py +9 -0
- recce_cloud/ci_providers/base.py +82 -0
- recce_cloud/ci_providers/detector.py +147 -0
- recce_cloud/ci_providers/github_actions.py +136 -0
- recce_cloud/ci_providers/gitlab_ci.py +130 -0
- recce_cloud/cli.py +245 -0
- recce_cloud/upload.py +214 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
- recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
- tests/adapter/dbt_adapter/conftest.py +9 -5
- tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
- tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
- tests/adapter/dbt_adapter/test_selector.py +22 -21
- tests/recce_cloud/__init__.py +0 -0
- tests/recce_cloud/test_ci_providers.py +351 -0
- tests/recce_cloud/test_cli.py +372 -0
- tests/recce_cloud/test_client.py +273 -0
- tests/recce_cloud/test_platform_clients.py +333 -0
- tests/tasks/conftest.py +1 -1
- tests/tasks/test_histogram.py +58 -66
- tests/tasks/test_lineage.py +36 -23
- tests/tasks/test_preset_checks.py +45 -31
- tests/tasks/test_profile.py +339 -15
- tests/tasks/test_query.py +46 -46
- tests/tasks/test_row_count.py +65 -46
- tests/tasks/test_schema.py +65 -42
- tests/tasks/test_top_k.py +22 -18
- tests/tasks/test_valuediff.py +43 -32
- tests/test_cli.py +174 -60
- tests/test_cli_mcp_optional.py +45 -0
- tests/test_cloud_listing_cli.py +324 -0
- tests/test_config.py +7 -9
- tests/test_connect_to_cloud.py +82 -0
- tests/test_core.py +151 -4
- tests/test_dbt.py +7 -7
- tests/test_mcp_server.py +332 -0
- tests/test_pull_request.py +1 -1
- tests/test_server.py +25 -19
- tests/test_summary.py +29 -17
- recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
- recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
- recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
- recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
- recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
- recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
- recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
- recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
- recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
- recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
- recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
- recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
- recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
- recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
- recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
- recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
- recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
- recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
- recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
- recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/state.py +0 -753
- recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
- tests/test_state.py +0 -123
- /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,14 +8,33 @@ from dataclasses import dataclass, fields
|
|
|
8
8
|
from errno import ENOENT
|
|
9
9
|
from functools import lru_cache
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import
|
|
11
|
+
from typing import (
|
|
12
|
+
Any,
|
|
13
|
+
Callable,
|
|
14
|
+
Dict,
|
|
15
|
+
Iterator,
|
|
16
|
+
List,
|
|
17
|
+
Literal,
|
|
18
|
+
Optional,
|
|
19
|
+
Set,
|
|
20
|
+
Tuple,
|
|
21
|
+
Type,
|
|
22
|
+
Union,
|
|
23
|
+
)
|
|
12
24
|
|
|
13
25
|
from recce.event import log_performance
|
|
14
26
|
from recce.exceptions import RecceException
|
|
15
|
-
from recce.util.cll import
|
|
16
|
-
from recce.util.lineage import
|
|
27
|
+
from recce.util.cll import CLLPerformanceTracking, cll
|
|
28
|
+
from recce.util.lineage import (
|
|
29
|
+
build_column_key,
|
|
30
|
+
filter_dependency_maps,
|
|
31
|
+
find_downstream,
|
|
32
|
+
find_upstream,
|
|
33
|
+
)
|
|
34
|
+
from recce.util.perf_tracking import LineagePerfTracker
|
|
35
|
+
|
|
17
36
|
from ...tasks.profile import ProfileTask
|
|
18
|
-
from ...util.breaking import
|
|
37
|
+
from ...util.breaking import BreakingPerformanceTracking, parse_change_category
|
|
19
38
|
|
|
20
39
|
try:
|
|
21
40
|
import agate
|
|
@@ -30,11 +49,30 @@ from watchdog.observers import Observer
|
|
|
30
49
|
|
|
31
50
|
from recce.adapter.base import BaseAdapter
|
|
32
51
|
from recce.state import ArtifactsRoot
|
|
33
|
-
|
|
52
|
+
|
|
34
53
|
from ...models import RunType
|
|
35
|
-
from ...models.types import
|
|
36
|
-
|
|
37
|
-
|
|
54
|
+
from ...models.types import (
|
|
55
|
+
CllColumn,
|
|
56
|
+
CllData,
|
|
57
|
+
CllNode,
|
|
58
|
+
LineageDiff,
|
|
59
|
+
NodeChange,
|
|
60
|
+
NodeDiff,
|
|
61
|
+
)
|
|
62
|
+
from ...tasks import (
|
|
63
|
+
HistogramDiffTask,
|
|
64
|
+
ProfileDiffTask,
|
|
65
|
+
QueryBaseTask,
|
|
66
|
+
QueryDiffTask,
|
|
67
|
+
QueryTask,
|
|
68
|
+
RowCountDiffTask,
|
|
69
|
+
RowCountTask,
|
|
70
|
+
Task,
|
|
71
|
+
TopKDiffTask,
|
|
72
|
+
ValueDiffDetailTask,
|
|
73
|
+
ValueDiffTask,
|
|
74
|
+
)
|
|
75
|
+
from .dbt_version import DbtVersion
|
|
38
76
|
|
|
39
77
|
dbt_supported_registry: Dict[RunType, Type[Task]] = {
|
|
40
78
|
RunType.QUERY: QueryTask,
|
|
@@ -56,7 +94,7 @@ get_adapter_orig = dbt.adapters.factory.get_adapter
|
|
|
56
94
|
|
|
57
95
|
|
|
58
96
|
def get_adapter(config):
|
|
59
|
-
if hasattr(config,
|
|
97
|
+
if hasattr(config, "adapter"):
|
|
60
98
|
return config.adapter
|
|
61
99
|
else:
|
|
62
100
|
return get_adapter_orig(config)
|
|
@@ -69,7 +107,11 @@ from dbt.adapters.base import Column # noqa: E402
|
|
|
69
107
|
from dbt.adapters.factory import get_adapter_class_by_name # noqa: E402
|
|
70
108
|
from dbt.adapters.sql import SQLAdapter # noqa: E402
|
|
71
109
|
from dbt.config.runtime import RuntimeConfig # noqa: E402
|
|
72
|
-
from dbt.contracts.graph.manifest import
|
|
110
|
+
from dbt.contracts.graph.manifest import ( # noqa: E402
|
|
111
|
+
MacroManifest,
|
|
112
|
+
Manifest,
|
|
113
|
+
WritableManifest,
|
|
114
|
+
)
|
|
73
115
|
from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
|
|
74
116
|
from dbt.contracts.results import CatalogArtifact # noqa: E402
|
|
75
117
|
from dbt.flags import set_from_args # noqa: E402
|
|
@@ -78,7 +120,7 @@ from dbt.parser.sql import SqlBlockParser # noqa: E402
|
|
|
78
120
|
|
|
79
121
|
dbt_version = DbtVersion()
|
|
80
122
|
|
|
81
|
-
if dbt_version <
|
|
123
|
+
if dbt_version < "v1.8":
|
|
82
124
|
from dbt.contracts.connection import Connection
|
|
83
125
|
else:
|
|
84
126
|
from dbt.adapters.contracts.connection import Connection
|
|
@@ -86,19 +128,22 @@ else:
|
|
|
86
128
|
|
|
87
129
|
@contextmanager
|
|
88
130
|
def silence_no_nodes_warning():
|
|
89
|
-
if dbt_version >=
|
|
131
|
+
if dbt_version >= "v1.8":
|
|
90
132
|
from dbt.events.types import NoNodesForSelectionCriteria
|
|
91
133
|
from dbt_common.events.functions import WARN_ERROR_OPTIONS
|
|
134
|
+
|
|
92
135
|
WARN_ERROR_OPTIONS.silence.append(NoNodesForSelectionCriteria.__name__)
|
|
93
136
|
try:
|
|
94
137
|
yield
|
|
95
138
|
finally:
|
|
96
|
-
if dbt_version >=
|
|
139
|
+
if dbt_version >= "v1.8":
|
|
97
140
|
from dbt_common.events.functions import WARN_ERROR_OPTIONS
|
|
141
|
+
|
|
98
142
|
WARN_ERROR_OPTIONS.silence.pop()
|
|
99
143
|
|
|
100
144
|
|
|
101
|
-
logger = logging.getLogger(
|
|
145
|
+
logger = logging.getLogger("uvicorn")
|
|
146
|
+
MIN_DBT_NODE_COMPOSITION = 3
|
|
102
147
|
|
|
103
148
|
|
|
104
149
|
class ArtifactsEventHandler(FileSystemEventHandler):
|
|
@@ -147,16 +192,18 @@ class EnvironmentEventHandler(FileSystemEventHandler):
|
|
|
147
192
|
|
|
148
193
|
|
|
149
194
|
def merge_tables(tables: List[agate.Table]) -> agate.Table:
|
|
150
|
-
if dbt_version <
|
|
195
|
+
if dbt_version < "v1.8":
|
|
151
196
|
from dbt.clients.agate_helper import merge_tables
|
|
197
|
+
|
|
152
198
|
return merge_tables(tables)
|
|
153
199
|
else:
|
|
154
200
|
from dbt_common.clients.agate_helper import merge_tables
|
|
201
|
+
|
|
155
202
|
return merge_tables(tables)
|
|
156
203
|
|
|
157
204
|
|
|
158
205
|
def as_manifest(m: WritableManifest) -> Manifest:
|
|
159
|
-
if dbt_version <
|
|
206
|
+
if dbt_version < "v1.8":
|
|
160
207
|
data = m.__dict__
|
|
161
208
|
all_fields = set([x.name for x in fields(Manifest)])
|
|
162
209
|
new_data = {k: v for k, v in data.items() if k in all_fields}
|
|
@@ -184,12 +231,13 @@ def load_catalog(path: str = None, data: dict = None):
|
|
|
184
231
|
|
|
185
232
|
|
|
186
233
|
def previous_state(state_path: Path, target_path: Path, project_root: Path) -> PreviousState:
|
|
187
|
-
if dbt_version <
|
|
234
|
+
if dbt_version < "v1.5.2":
|
|
188
235
|
return PreviousState(state_path, target_path)
|
|
189
236
|
else:
|
|
190
237
|
try:
|
|
191
238
|
# Overwrite the level_tag method temporarily to avoid the warning message
|
|
192
|
-
from dbt.events.types import
|
|
239
|
+
from dbt.events.types import EventLevel, WarnStateTargetEqual
|
|
240
|
+
|
|
193
241
|
original_level_tag_func = WarnStateTargetEqual.level_tag
|
|
194
242
|
WarnStateTargetEqual.level_tag = lambda x: EventLevel.DEBUG
|
|
195
243
|
except ImportError:
|
|
@@ -209,12 +257,12 @@ def previous_state(state_path: Path, target_path: Path, project_root: Path) -> P
|
|
|
209
257
|
def default_profiles_dir():
|
|
210
258
|
# Precedence: DBT_PROFILES_DIR > current working directory > ~/.dbt/
|
|
211
259
|
# https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
|
|
212
|
-
if os.getenv(
|
|
213
|
-
return os.getenv(
|
|
214
|
-
elif os.path.exists(os.path.join(os.getcwd(),
|
|
260
|
+
if os.getenv("DBT_PROFILES_DIR"):
|
|
261
|
+
return os.getenv("DBT_PROFILES_DIR")
|
|
262
|
+
elif os.path.exists(os.path.join(os.getcwd(), "profiles.yml")):
|
|
215
263
|
return os.getcwd()
|
|
216
264
|
else:
|
|
217
|
-
return os.path.expanduser(
|
|
265
|
+
return os.path.expanduser("~/.dbt/")
|
|
218
266
|
|
|
219
267
|
|
|
220
268
|
@dataclass()
|
|
@@ -222,15 +270,16 @@ class DbtArgs:
|
|
|
222
270
|
"""
|
|
223
271
|
Used for RuntimeConfig.from_args
|
|
224
272
|
"""
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
273
|
+
|
|
274
|
+
threads: Optional[int] = (1,)
|
|
275
|
+
target: Optional[str] = (None,)
|
|
276
|
+
profiles_dir: Optional[str] = (None,)
|
|
277
|
+
project_dir: Optional[str] = (None,)
|
|
278
|
+
profile: Optional[str] = (None,)
|
|
279
|
+
target_path: Optional[str] = (None,)
|
|
231
280
|
project_only_flags: Optional[Dict[str, Any]] = None
|
|
232
281
|
which: Optional[str] = None
|
|
233
|
-
state_modified_compare_more_unrendered_values: Optional[bool] =
|
|
282
|
+
state_modified_compare_more_unrendered_values: Optional[bool] = True # new flag added since dbt v1.9
|
|
234
283
|
|
|
235
284
|
|
|
236
285
|
@dataclass
|
|
@@ -258,32 +307,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
258
307
|
|
|
259
308
|
def support_tasks(self):
|
|
260
309
|
support_map = {run_type.value: True for run_type in dbt_supported_registry}
|
|
261
|
-
supported_dbt_packages = set([package.package_name for package in self.manifest.macros.values()])
|
|
262
|
-
|
|
263
|
-
if 'dbt_profiler' not in supported_dbt_packages:
|
|
264
|
-
support_map[RunType.PROFILE_DIFF.value] = False
|
|
265
|
-
support_map[RunType.PROFILE.value] = False
|
|
266
|
-
|
|
267
|
-
if 'audit_helper' not in supported_dbt_packages:
|
|
268
|
-
support_map[RunType.VALUE_DIFF.value] = False
|
|
269
|
-
support_map[RunType.VALUE_DIFF_DETAIL.value] = False
|
|
270
|
-
support_map['query_diff_with_primary_key'] = False
|
|
271
310
|
|
|
272
311
|
return support_map
|
|
273
312
|
|
|
274
313
|
@classmethod
|
|
275
|
-
def load(cls,
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
target = kwargs.get('target')
|
|
281
|
-
target_path = kwargs.get('target_path', 'target')
|
|
282
|
-
target_base_path = kwargs.get('target_base_path', 'target-base')
|
|
314
|
+
def load(cls, no_artifacts=False, review=False, **kwargs):
|
|
315
|
+
target = kwargs.get("target")
|
|
316
|
+
target_path = kwargs.get("target_path", "target")
|
|
317
|
+
target_base_path = kwargs.get("target_base_path", "target-base")
|
|
283
318
|
|
|
284
|
-
profile_name = kwargs.get(
|
|
285
|
-
project_dir = kwargs.get(
|
|
286
|
-
profiles_dir = kwargs.get(
|
|
319
|
+
profile_name = kwargs.get("profile")
|
|
320
|
+
project_dir = kwargs.get("project_dir")
|
|
321
|
+
profiles_dir = kwargs.get("profiles_dir")
|
|
287
322
|
|
|
288
323
|
if profiles_dir is None:
|
|
289
324
|
profiles_dir = default_profiles_dir()
|
|
@@ -297,21 +332,25 @@ class DbtAdapter(BaseAdapter):
|
|
|
297
332
|
profiles_dir=profiles_dir,
|
|
298
333
|
profile=profile_name,
|
|
299
334
|
project_only_flags={},
|
|
300
|
-
which=
|
|
335
|
+
which="list",
|
|
301
336
|
)
|
|
302
337
|
set_from_args(args, args)
|
|
303
338
|
|
|
304
339
|
from dbt.exceptions import DbtProjectError
|
|
340
|
+
|
|
305
341
|
try:
|
|
306
342
|
# adapter
|
|
307
|
-
if dbt_version <
|
|
343
|
+
if dbt_version < "v1.8":
|
|
308
344
|
runtime_config = RuntimeConfig.from_args(args)
|
|
309
345
|
adapter_name = runtime_config.credentials.type
|
|
310
346
|
adapter_cls = get_adapter_class_by_name(adapter_name)
|
|
311
347
|
adapter: SQLAdapter = adapter_cls(runtime_config)
|
|
312
348
|
else:
|
|
313
|
-
from dbt_common.context import set_invocation_context, get_invocation_context
|
|
314
349
|
from dbt.mp_context import get_mp_context
|
|
350
|
+
from dbt_common.context import (
|
|
351
|
+
get_invocation_context,
|
|
352
|
+
set_invocation_context,
|
|
353
|
+
)
|
|
315
354
|
|
|
316
355
|
set_invocation_context({})
|
|
317
356
|
get_invocation_context()._env = dict(os.environ)
|
|
@@ -320,6 +359,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
320
359
|
adapter_cls = get_adapter_class_by_name(adapter_name)
|
|
321
360
|
adapter: SQLAdapter = adapter_cls(runtime_config, get_mp_context())
|
|
322
361
|
from dbt.adapters.factory import FACTORY
|
|
362
|
+
|
|
323
363
|
FACTORY.adapters[adapter_name] = adapter
|
|
324
364
|
|
|
325
365
|
adapter.connections.set_connection_name()
|
|
@@ -329,7 +369,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
329
369
|
runtime_config=runtime_config,
|
|
330
370
|
adapter=adapter,
|
|
331
371
|
review_mode=review,
|
|
332
|
-
base_path=target_base_path
|
|
372
|
+
base_path=target_base_path,
|
|
333
373
|
)
|
|
334
374
|
except DbtProjectError as e:
|
|
335
375
|
raise e
|
|
@@ -350,27 +390,26 @@ class DbtAdapter(BaseAdapter):
|
|
|
350
390
|
|
|
351
391
|
def get_columns(self, model: str, base=False) -> List[Column]:
|
|
352
392
|
relation = self.create_relation(model, base)
|
|
353
|
-
get_columns_macro =
|
|
354
|
-
if self.adapter.connections.TYPE ==
|
|
355
|
-
get_columns_macro =
|
|
393
|
+
get_columns_macro = "get_columns_in_relation"
|
|
394
|
+
if self.adapter.connections.TYPE == "databricks":
|
|
395
|
+
get_columns_macro = "get_columns_comments"
|
|
356
396
|
|
|
357
|
-
if dbt_version <
|
|
397
|
+
if dbt_version < "v1.8":
|
|
358
398
|
columns = self.adapter.execute_macro(
|
|
359
|
-
get_columns_macro,
|
|
360
|
-
|
|
361
|
-
manifest=self.manifest)
|
|
399
|
+
get_columns_macro, kwargs={"relation": relation}, manifest=self.manifest
|
|
400
|
+
)
|
|
362
401
|
else:
|
|
363
402
|
from dbt.context.providers import generate_runtime_macro_context
|
|
403
|
+
|
|
364
404
|
macro_manifest = MacroManifest(self.manifest.macros)
|
|
365
405
|
self.adapter.set_macro_resolver(macro_manifest)
|
|
366
406
|
self.adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
367
|
-
columns = self.adapter.execute_macro(
|
|
368
|
-
get_columns_macro,
|
|
369
|
-
kwargs={"relation": relation})
|
|
407
|
+
columns = self.adapter.execute_macro(get_columns_macro, kwargs={"relation": relation})
|
|
370
408
|
|
|
371
|
-
if self.adapter.connections.TYPE ==
|
|
409
|
+
if self.adapter.connections.TYPE == "databricks":
|
|
372
410
|
# reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
|
|
373
|
-
from dbt.adapters.databricks import DatabricksColumn
|
|
411
|
+
from dbt.adapters.databricks.column import DatabricksColumn
|
|
412
|
+
|
|
374
413
|
rows = columns
|
|
375
414
|
columns = []
|
|
376
415
|
for row in rows:
|
|
@@ -378,7 +417,9 @@ class DbtAdapter(BaseAdapter):
|
|
|
378
417
|
break
|
|
379
418
|
columns.append(
|
|
380
419
|
DatabricksColumn(
|
|
381
|
-
column=row["col_name"],
|
|
420
|
+
column=row["col_name"],
|
|
421
|
+
dtype=row["data_type"],
|
|
422
|
+
comment=row["comment"],
|
|
382
423
|
)
|
|
383
424
|
)
|
|
384
425
|
return columns
|
|
@@ -389,29 +430,29 @@ class DbtAdapter(BaseAdapter):
|
|
|
389
430
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
390
431
|
manifest_dict = manifest.to_dict()
|
|
391
432
|
|
|
392
|
-
node = manifest_dict[
|
|
433
|
+
node = manifest_dict["nodes"].get(model_id)
|
|
393
434
|
if node is None:
|
|
394
435
|
return {}
|
|
395
436
|
|
|
396
|
-
node_name = node[
|
|
397
|
-
with self.adapter.connection_named(
|
|
437
|
+
node_name = node["name"]
|
|
438
|
+
with self.adapter.connection_named("model"):
|
|
398
439
|
columns = [column for column in self.get_columns(node_name, base=base)]
|
|
399
440
|
|
|
400
|
-
child_map: List[str] = manifest_dict[
|
|
441
|
+
child_map: List[str] = manifest_dict["child_map"][model_id]
|
|
401
442
|
cols_not_null = []
|
|
402
443
|
cols_unique = []
|
|
403
444
|
|
|
404
445
|
for child in child_map:
|
|
405
|
-
comps = child.split(
|
|
446
|
+
comps = child.split(".")
|
|
406
447
|
child_type = comps[0]
|
|
407
448
|
child_name = comps[2]
|
|
408
449
|
|
|
409
|
-
not_null_prefix = f
|
|
410
|
-
if child_type ==
|
|
411
|
-
cols_not_null.append(child_name[len(not_null_prefix):])
|
|
412
|
-
unique_prefix = f
|
|
413
|
-
if child_type ==
|
|
414
|
-
cols_unique.append(child_name[len(unique_prefix):])
|
|
450
|
+
not_null_prefix = f"not_null_{node_name}_"
|
|
451
|
+
if child_type == "test" and child_name.startswith(not_null_prefix):
|
|
452
|
+
cols_not_null.append(child_name[len(not_null_prefix) :])
|
|
453
|
+
unique_prefix = f"unique_{node_name}_"
|
|
454
|
+
if child_type == "test" and child_name.startswith(unique_prefix):
|
|
455
|
+
cols_unique.append(child_name[len(unique_prefix) :])
|
|
415
456
|
|
|
416
457
|
columns_info = {}
|
|
417
458
|
primary_key = None
|
|
@@ -419,16 +460,16 @@ class DbtAdapter(BaseAdapter):
|
|
|
419
460
|
col_name = c.column
|
|
420
461
|
col = dict(name=col_name, type=c.dtype)
|
|
421
462
|
if col_name in cols_not_null:
|
|
422
|
-
col[
|
|
463
|
+
col["not_null"] = True
|
|
423
464
|
if col_name in cols_unique:
|
|
424
|
-
col[
|
|
465
|
+
col["unique"] = True
|
|
425
466
|
if not primary_key:
|
|
426
467
|
primary_key = col_name
|
|
427
468
|
columns_info[col_name] = col
|
|
428
469
|
|
|
429
470
|
result = dict(columns=columns_info)
|
|
430
471
|
if primary_key:
|
|
431
|
-
result[
|
|
472
|
+
result["primary_key"] = primary_key
|
|
432
473
|
|
|
433
474
|
return result
|
|
434
475
|
|
|
@@ -437,7 +478,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
437
478
|
Load the artifacts from the 'target' and 'target-base' directory
|
|
438
479
|
"""
|
|
439
480
|
if self.runtime_config is None:
|
|
440
|
-
raise Exception(
|
|
481
|
+
raise Exception("Cannot find the dbt project configuration")
|
|
441
482
|
|
|
442
483
|
project_root = self.runtime_config.project_root
|
|
443
484
|
target_path = self.runtime_config.target_path
|
|
@@ -446,17 +487,17 @@ class DbtAdapter(BaseAdapter):
|
|
|
446
487
|
self.base_path = os.path.join(project_root, target_base_path)
|
|
447
488
|
|
|
448
489
|
# load the artifacts
|
|
449
|
-
path = os.path.join(project_root, target_path,
|
|
490
|
+
path = os.path.join(project_root, target_path, "manifest.json")
|
|
450
491
|
curr_manifest = load_manifest(path=path)
|
|
451
492
|
if curr_manifest is None:
|
|
452
493
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
453
|
-
path = os.path.join(project_root, target_base_path,
|
|
494
|
+
path = os.path.join(project_root, target_base_path, "manifest.json")
|
|
454
495
|
base_manifest = load_manifest(path=path)
|
|
455
496
|
if base_manifest is None:
|
|
456
497
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
457
498
|
|
|
458
|
-
curr_catalog = load_catalog(path=os.path.join(project_root, target_path,
|
|
459
|
-
base_catalog = load_catalog(path=os.path.join(project_root, target_base_path,
|
|
499
|
+
curr_catalog = load_catalog(path=os.path.join(project_root, target_path, "catalog.json"))
|
|
500
|
+
base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, "catalog.json"))
|
|
460
501
|
|
|
461
502
|
# set the value if all the artifacts are loaded successfully
|
|
462
503
|
self.curr_manifest = curr_manifest
|
|
@@ -474,22 +515,21 @@ class DbtAdapter(BaseAdapter):
|
|
|
474
515
|
|
|
475
516
|
# set the file paths to watch
|
|
476
517
|
self.artifacts_files = [
|
|
477
|
-
os.path.join(project_root, target_path,
|
|
478
|
-
os.path.join(project_root, target_path,
|
|
479
|
-
os.path.join(project_root, target_base_path,
|
|
480
|
-
os.path.join(project_root, target_base_path,
|
|
518
|
+
os.path.join(project_root, target_path, "manifest.json"),
|
|
519
|
+
os.path.join(project_root, target_path, "catalog.json"),
|
|
520
|
+
os.path.join(project_root, target_base_path, "manifest.json"),
|
|
521
|
+
os.path.join(project_root, target_base_path, "catalog.json"),
|
|
481
522
|
]
|
|
482
523
|
|
|
483
524
|
def is_python_model(self, node_id: str, base: Optional[bool] = False):
|
|
484
525
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
485
526
|
model = manifest.nodes.get(node_id)
|
|
486
|
-
if hasattr(model,
|
|
487
|
-
return model.language ==
|
|
527
|
+
if hasattr(model, "language"):
|
|
528
|
+
return model.language == "python"
|
|
488
529
|
|
|
489
530
|
return False
|
|
490
531
|
|
|
491
532
|
def find_node_by_name(self, node_name, base=False) -> Optional[ManifestNode]:
|
|
492
|
-
|
|
493
533
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
494
534
|
|
|
495
535
|
for key, node in manifest.nodes.items():
|
|
@@ -499,22 +539,22 @@ class DbtAdapter(BaseAdapter):
|
|
|
499
539
|
return None
|
|
500
540
|
|
|
501
541
|
def get_node_name_by_id(self, unique_id):
|
|
502
|
-
if unique_id.startswith(
|
|
542
|
+
if unique_id.startswith("source."):
|
|
503
543
|
if unique_id in self.curr_manifest.sources:
|
|
504
544
|
return self.curr_manifest.sources[unique_id].name
|
|
505
545
|
elif unique_id in self.base_manifest.sources:
|
|
506
546
|
return self.base_manifest.sources[unique_id].name
|
|
507
|
-
elif unique_id.startswith(
|
|
547
|
+
elif unique_id.startswith("metric."):
|
|
508
548
|
if unique_id in self.curr_manifest.metrics:
|
|
509
549
|
return self.curr_manifest.metrics[unique_id].name
|
|
510
550
|
elif unique_id in self.base_manifest.metrics:
|
|
511
551
|
return self.base_manifest.metrics[unique_id].name
|
|
512
|
-
elif unique_id.startswith(
|
|
552
|
+
elif unique_id.startswith("exposure."):
|
|
513
553
|
if unique_id in self.curr_manifest.exposures:
|
|
514
554
|
return self.curr_manifest.exposures[unique_id].name
|
|
515
555
|
elif unique_id in self.base_manifest.exposures:
|
|
516
556
|
return self.base_manifest.exposures[unique_id].name
|
|
517
|
-
elif unique_id.startswith(
|
|
557
|
+
elif unique_id.startswith("semantic_model."):
|
|
518
558
|
if unique_id in self.curr_manifest.semantic_models:
|
|
519
559
|
return self.curr_manifest.semantic_models[unique_id].name
|
|
520
560
|
elif unique_id in self.base_manifest.semantic_models:
|
|
@@ -529,14 +569,24 @@ class DbtAdapter(BaseAdapter):
|
|
|
529
569
|
def get_manifest(self, base: bool):
|
|
530
570
|
return self.curr_manifest if base is False else self.base_manifest
|
|
531
571
|
|
|
532
|
-
def generate_sql(
|
|
572
|
+
def generate_sql(
|
|
573
|
+
self,
|
|
574
|
+
sql_template: str,
|
|
575
|
+
base: bool = False,
|
|
576
|
+
context=None,
|
|
577
|
+
provided_manifest=None,
|
|
578
|
+
):
|
|
533
579
|
if context is None:
|
|
534
580
|
context = {}
|
|
535
581
|
manifest = provided_manifest if provided_manifest is not None else as_manifest(self.get_manifest(base))
|
|
536
582
|
parser = SqlBlockParser(self.runtime_config, manifest, self.runtime_config)
|
|
537
583
|
|
|
538
|
-
if dbt_version >= dbt_version.parse(
|
|
539
|
-
from dbt_common.context import
|
|
584
|
+
if dbt_version >= dbt_version.parse("v1.8"):
|
|
585
|
+
from dbt_common.context import (
|
|
586
|
+
get_invocation_context,
|
|
587
|
+
set_invocation_context,
|
|
588
|
+
)
|
|
589
|
+
|
|
540
590
|
set_invocation_context({})
|
|
541
591
|
get_invocation_context()._env = dict(os.environ)
|
|
542
592
|
|
|
@@ -544,21 +594,35 @@ class DbtAdapter(BaseAdapter):
|
|
|
544
594
|
node = parser.parse_remote(sql_template, node_id)
|
|
545
595
|
process_node(self.runtime_config, manifest, node)
|
|
546
596
|
|
|
547
|
-
if dbt_version < dbt_version.parse(
|
|
597
|
+
if dbt_version < dbt_version.parse("v1.8"):
|
|
548
598
|
compiler = self.adapter.get_compiler()
|
|
549
599
|
compiler.compile_node(node, manifest, context)
|
|
550
600
|
return node.compiled_code
|
|
551
601
|
else:
|
|
552
|
-
from dbt.context.providers import generate_runtime_model_context
|
|
553
602
|
from dbt.clients import jinja
|
|
603
|
+
from dbt.context.providers import (
|
|
604
|
+
generate_runtime_macro_context,
|
|
605
|
+
generate_runtime_model_context,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Set up macro resolver for dbt >= 1.8
|
|
609
|
+
macro_manifest = MacroManifest(manifest.macros)
|
|
610
|
+
self.adapter.set_macro_resolver(macro_manifest)
|
|
611
|
+
self.adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
612
|
+
|
|
554
613
|
jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
|
|
555
614
|
jinja_ctx.update(context)
|
|
556
615
|
compiled_code = jinja.get_rendered(sql_template, jinja_ctx, node)
|
|
557
616
|
return compiled_code
|
|
558
617
|
|
|
559
|
-
def execute(
|
|
560
|
-
|
|
561
|
-
|
|
618
|
+
def execute(
|
|
619
|
+
self,
|
|
620
|
+
sql: str,
|
|
621
|
+
auto_begin: bool = False,
|
|
622
|
+
fetch: bool = False,
|
|
623
|
+
limit: Optional[int] = None,
|
|
624
|
+
) -> Tuple[any, agate.Table]:
|
|
625
|
+
if dbt_version < dbt_version.parse("v1.6"):
|
|
562
626
|
return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch)
|
|
563
627
|
|
|
564
628
|
return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
|
|
@@ -569,7 +633,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
569
633
|
|
|
570
634
|
node_ids = nodes.keys()
|
|
571
635
|
parent_map = {}
|
|
572
|
-
for k, parents in manifest_dict[
|
|
636
|
+
for k, parents in manifest_dict["parent_map"].items():
|
|
573
637
|
if k not in node_ids:
|
|
574
638
|
continue
|
|
575
639
|
parent_map[k] = [parent for parent in parents if parent in node_ids]
|
|
@@ -580,8 +644,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
580
644
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
581
645
|
manifest_dict = manifest.to_dict()
|
|
582
646
|
|
|
583
|
-
if node_id in manifest_dict[
|
|
584
|
-
return manifest_dict[
|
|
647
|
+
if node_id in manifest_dict["parent_map"]:
|
|
648
|
+
return manifest_dict["parent_map"][node_id]
|
|
585
649
|
|
|
586
650
|
def get_lineage(self, base: Optional[bool] = False):
|
|
587
651
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
@@ -590,19 +654,21 @@ class DbtAdapter(BaseAdapter):
|
|
|
590
654
|
return self.get_lineage_cached(base, cache_key)
|
|
591
655
|
|
|
592
656
|
def get_lineage_diff(self) -> LineageDiff:
|
|
593
|
-
cache_key = hash(
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
657
|
+
cache_key = hash(
|
|
658
|
+
(
|
|
659
|
+
id(self.base_manifest),
|
|
660
|
+
id(self.base_catalog),
|
|
661
|
+
id(self.curr_manifest),
|
|
662
|
+
id(self.curr_catalog),
|
|
663
|
+
)
|
|
664
|
+
)
|
|
599
665
|
return self._get_lineage_diff_cached(cache_key)
|
|
600
666
|
|
|
601
667
|
@lru_cache(maxsize=2)
|
|
602
668
|
def get_lineage_cached(self, base: Optional[bool] = False, cache_key=0):
|
|
603
669
|
if base is False:
|
|
604
|
-
|
|
605
|
-
|
|
670
|
+
perf_tracker = LineagePerfTracker()
|
|
671
|
+
perf_tracker.start_lineage()
|
|
606
672
|
|
|
607
673
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
608
674
|
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
@@ -614,48 +680,48 @@ class DbtAdapter(BaseAdapter):
|
|
|
614
680
|
|
|
615
681
|
nodes = {}
|
|
616
682
|
|
|
617
|
-
for node in manifest_dict[
|
|
618
|
-
unique_id = node[
|
|
619
|
-
resource_type = node[
|
|
683
|
+
for node in manifest_dict["nodes"].values():
|
|
684
|
+
unique_id = node["unique_id"]
|
|
685
|
+
resource_type = node["resource_type"]
|
|
620
686
|
|
|
621
|
-
if resource_type not in [
|
|
687
|
+
if resource_type not in ["model", "seed", "exposure", "snapshot"]:
|
|
622
688
|
continue
|
|
623
689
|
|
|
624
690
|
nodes[unique_id] = {
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
691
|
+
"id": node["unique_id"],
|
|
692
|
+
"name": node["name"],
|
|
693
|
+
"resource_type": node["resource_type"],
|
|
694
|
+
"package_name": node["package_name"],
|
|
695
|
+
"schema": node["schema"],
|
|
696
|
+
"config": node["config"],
|
|
697
|
+
"checksum": node["checksum"],
|
|
698
|
+
"raw_code": node["raw_code"],
|
|
633
699
|
}
|
|
634
700
|
|
|
635
701
|
# List of <type>.<package_name>.<node_name>.<hash>
|
|
636
702
|
# model.jaffle_shop.customer_segments
|
|
637
703
|
# test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d
|
|
638
704
|
# test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1
|
|
639
|
-
child_map: List[str] = manifest_dict[
|
|
705
|
+
child_map: List[str] = manifest_dict["child_map"][unique_id]
|
|
640
706
|
cols_not_null = []
|
|
641
707
|
cols_unique = []
|
|
642
708
|
|
|
643
709
|
for child in child_map:
|
|
644
|
-
node_name = node[
|
|
645
|
-
comps = child.split(
|
|
646
|
-
if len(comps) <
|
|
710
|
+
node_name = node["name"]
|
|
711
|
+
comps = child.split(".")
|
|
712
|
+
if len(comps) < MIN_DBT_NODE_COMPOSITION:
|
|
647
713
|
# only happens in unittest
|
|
648
714
|
continue
|
|
649
715
|
|
|
650
716
|
child_type = comps[0]
|
|
651
717
|
child_name = comps[2]
|
|
652
718
|
|
|
653
|
-
not_null_prefix = f
|
|
654
|
-
if child_type ==
|
|
655
|
-
cols_not_null.append(child_name[len(not_null_prefix):])
|
|
656
|
-
unique_prefix = f
|
|
657
|
-
if child_type ==
|
|
658
|
-
cols_unique.append(child_name[len(unique_prefix):])
|
|
719
|
+
not_null_prefix = f"not_null_{node_name}_"
|
|
720
|
+
if child_type == "test" and child_name.startswith(not_null_prefix):
|
|
721
|
+
cols_not_null.append(child_name[len(not_null_prefix) :])
|
|
722
|
+
unique_prefix = f"unique_{node_name}_"
|
|
723
|
+
if child_type == "test" and child_name.startswith(unique_prefix):
|
|
724
|
+
cols_unique.append(child_name[len(unique_prefix) :])
|
|
659
725
|
|
|
660
726
|
if catalog is not None and unique_id in catalog.nodes:
|
|
661
727
|
columns = {}
|
|
@@ -663,70 +729,68 @@ class DbtAdapter(BaseAdapter):
|
|
|
663
729
|
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
664
730
|
col = dict(name=col_name, type=col_metadata.type)
|
|
665
731
|
if col_name in cols_not_null:
|
|
666
|
-
col[
|
|
732
|
+
col["not_null"] = True
|
|
667
733
|
if col_name in cols_unique:
|
|
668
|
-
col[
|
|
734
|
+
col["unique"] = True
|
|
669
735
|
if not primary_key:
|
|
670
736
|
primary_key = col_name
|
|
671
737
|
columns[col_name] = col
|
|
672
|
-
nodes[unique_id][
|
|
738
|
+
nodes[unique_id]["columns"] = columns
|
|
673
739
|
if primary_key:
|
|
674
|
-
nodes[unique_id][
|
|
740
|
+
nodes[unique_id]["primary_key"] = primary_key
|
|
675
741
|
|
|
676
|
-
for source in manifest_dict[
|
|
677
|
-
unique_id = source[
|
|
742
|
+
for source in manifest_dict["sources"].values():
|
|
743
|
+
unique_id = source["unique_id"]
|
|
678
744
|
|
|
679
745
|
nodes[unique_id] = {
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
746
|
+
"id": source["unique_id"],
|
|
747
|
+
"name": source["name"],
|
|
748
|
+
"source_name": source["source_name"],
|
|
749
|
+
"resource_type": source["resource_type"],
|
|
750
|
+
"package_name": source["package_name"],
|
|
751
|
+
"config": source["config"],
|
|
685
752
|
}
|
|
686
753
|
|
|
687
754
|
if catalog is not None and unique_id in catalog.sources:
|
|
688
|
-
nodes[unique_id][
|
|
689
|
-
col_name: {
|
|
690
|
-
'name': col_name,
|
|
691
|
-
'type': col_metadata.type
|
|
692
|
-
}
|
|
755
|
+
nodes[unique_id]["columns"] = {
|
|
756
|
+
col_name: {"name": col_name, "type": col_metadata.type}
|
|
693
757
|
for col_name, col_metadata in catalog.sources[unique_id].columns.items()
|
|
694
758
|
}
|
|
695
759
|
|
|
696
|
-
for exposure in manifest_dict[
|
|
697
|
-
nodes[exposure[
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
760
|
+
for exposure in manifest_dict["exposures"].values():
|
|
761
|
+
nodes[exposure["unique_id"]] = {
|
|
762
|
+
"id": exposure["unique_id"],
|
|
763
|
+
"name": exposure["name"],
|
|
764
|
+
"resource_type": exposure["resource_type"],
|
|
765
|
+
"package_name": exposure["package_name"],
|
|
766
|
+
"config": exposure["config"],
|
|
703
767
|
}
|
|
704
|
-
for metric in manifest_dict[
|
|
705
|
-
nodes[metric[
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
768
|
+
for metric in manifest_dict["metrics"].values():
|
|
769
|
+
nodes[metric["unique_id"]] = {
|
|
770
|
+
"id": metric["unique_id"],
|
|
771
|
+
"name": metric["name"],
|
|
772
|
+
"resource_type": metric["resource_type"],
|
|
773
|
+
"package_name": metric["package_name"],
|
|
774
|
+
"config": metric["config"],
|
|
711
775
|
}
|
|
712
776
|
|
|
713
|
-
if
|
|
714
|
-
for semantic_models in manifest_dict[
|
|
715
|
-
nodes[semantic_models[
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
777
|
+
if "semantic_models" in manifest_dict:
|
|
778
|
+
for semantic_models in manifest_dict["semantic_models"].values():
|
|
779
|
+
nodes[semantic_models["unique_id"]] = {
|
|
780
|
+
"id": semantic_models["unique_id"],
|
|
781
|
+
"name": semantic_models["name"],
|
|
782
|
+
"resource_type": semantic_models["resource_type"],
|
|
783
|
+
"package_name": semantic_models["package_name"],
|
|
784
|
+
"config": semantic_models["config"],
|
|
721
785
|
}
|
|
722
786
|
|
|
723
787
|
parent_map = self.build_parent_map(nodes, base)
|
|
724
788
|
|
|
725
789
|
if base is False:
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
log_performance(
|
|
729
|
-
|
|
790
|
+
perf_tracker.end_lineage()
|
|
791
|
+
perf_tracker.set_total_nodes(len(nodes))
|
|
792
|
+
log_performance("model lineage", perf_tracker.to_dict())
|
|
793
|
+
perf_tracker.reset()
|
|
730
794
|
|
|
731
795
|
return dict(
|
|
732
796
|
parent_map=parent_map,
|
|
@@ -739,18 +803,43 @@ class DbtAdapter(BaseAdapter):
|
|
|
739
803
|
def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
|
|
740
804
|
base = self.get_lineage(base=True)
|
|
741
805
|
current = self.get_lineage(base=False)
|
|
742
|
-
keys = {
|
|
743
|
-
*base.get('nodes', {}).keys(),
|
|
744
|
-
*current.get('nodes', {}).keys()
|
|
745
|
-
}
|
|
746
806
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
807
|
+
modified_nodes = self.select_nodes(select="state:modified")
|
|
808
|
+
diff = {}
|
|
809
|
+
for node_id in modified_nodes:
|
|
810
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
811
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
812
|
+
if base_node and curr_node:
|
|
813
|
+
diff[node_id] = NodeDiff(change_status="modified")
|
|
814
|
+
elif base_node:
|
|
815
|
+
diff[node_id] = NodeDiff(change_status="removed")
|
|
816
|
+
elif curr_node:
|
|
817
|
+
diff[node_id] = NodeDiff(change_status="added")
|
|
818
|
+
|
|
819
|
+
return LineageDiff(
|
|
820
|
+
base=base,
|
|
821
|
+
current=current,
|
|
822
|
+
diff=diff,
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
@lru_cache(maxsize=128)
|
|
826
|
+
def get_change_analysis_cached(self, node_id: str):
|
|
827
|
+
breaking_perf_tracker = BreakingPerformanceTracking()
|
|
828
|
+
lineage_diff = self.get_lineage_diff()
|
|
829
|
+
diff = lineage_diff.diff
|
|
830
|
+
|
|
831
|
+
if node_id not in diff or diff[node_id].change_status != "modified":
|
|
832
|
+
return diff.get(node_id)
|
|
833
|
+
|
|
834
|
+
breaking_perf_tracker.increment_modified_nodes()
|
|
835
|
+
breaking_perf_tracker.start_lineage_diff()
|
|
836
|
+
|
|
837
|
+
base = lineage_diff.base
|
|
838
|
+
current = lineage_diff.current
|
|
750
839
|
|
|
751
840
|
base_manifest = as_manifest(self.get_manifest(True))
|
|
752
841
|
curr_manifest = as_manifest(self.get_manifest(False))
|
|
753
|
-
|
|
842
|
+
breaking_perf_tracker.record_checkpoint("manifest")
|
|
754
843
|
|
|
755
844
|
def ref_func(*args):
|
|
756
845
|
if len(args) == 1:
|
|
@@ -762,7 +851,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
762
851
|
return node
|
|
763
852
|
|
|
764
853
|
def source_func(source_name, table_name):
|
|
765
|
-
source_name = source_name.replace(
|
|
854
|
+
source_name = source_name.replace("-", "_")
|
|
766
855
|
return f"__{source_name}__{table_name}"
|
|
767
856
|
|
|
768
857
|
jinja_context = dict(
|
|
@@ -770,305 +859,534 @@ class DbtAdapter(BaseAdapter):
|
|
|
770
859
|
source=source_func,
|
|
771
860
|
)
|
|
772
861
|
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
curr_node
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
change = parse_change_category(
|
|
827
|
-
base_sql,
|
|
828
|
-
curr_sql,
|
|
829
|
-
old_schema=base_schema,
|
|
830
|
-
new_schema=curr_schema,
|
|
831
|
-
dialect=dialect,
|
|
832
|
-
perf_tracking=perf_tracking,
|
|
833
|
-
)
|
|
834
|
-
|
|
835
|
-
# Make sure that the case of the column names are the same
|
|
836
|
-
changed_columns = {
|
|
837
|
-
column.lower(): change_status
|
|
838
|
-
for column, change_status in (change.columns or {}).items()
|
|
839
|
-
}
|
|
840
|
-
changed_columns_names = set(changed_columns)
|
|
841
|
-
changed_columns_final = {}
|
|
842
|
-
|
|
843
|
-
base_columns = base_node.get('columns') or {}
|
|
844
|
-
curr_columns = curr_node.get('columns') or {}
|
|
845
|
-
columns_names = set(base_columns) | set(curr_columns)
|
|
846
|
-
|
|
847
|
-
for column_name in columns_names:
|
|
848
|
-
if column_name.lower() in changed_columns_names:
|
|
849
|
-
changed_columns_final[column_name] = changed_columns[column_name.lower()]
|
|
850
|
-
|
|
851
|
-
change.columns = changed_columns_final
|
|
852
|
-
except Exception:
|
|
853
|
-
change = NodeChange(category='unknown')
|
|
854
|
-
|
|
855
|
-
diff[key] = NodeDiff(change_status='modified', change=change)
|
|
856
|
-
elif base_node:
|
|
857
|
-
diff[key] = NodeDiff(change_status='removed')
|
|
858
|
-
elif curr_node:
|
|
859
|
-
diff[key] = NodeDiff(change_status='added')
|
|
860
|
-
|
|
861
|
-
perf_tracking.end_lineage_diff()
|
|
862
|
-
log_performance('model lineage diff', perf_tracking.to_dict())
|
|
862
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
863
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
864
|
+
change = NodeChange(category="unknown")
|
|
865
|
+
if (
|
|
866
|
+
curr_node.get("resource_type") in ["model", "snapshot"]
|
|
867
|
+
and curr_node.get("raw_code") is not None
|
|
868
|
+
and base_node.get("raw_code") is not None
|
|
869
|
+
):
|
|
870
|
+
try:
|
|
871
|
+
|
|
872
|
+
def _get_schema(lineage):
|
|
873
|
+
schema = {}
|
|
874
|
+
nodes = lineage["nodes"]
|
|
875
|
+
parent_list = lineage["parent_map"].get(node_id, [])
|
|
876
|
+
for parent_id in parent_list:
|
|
877
|
+
parent_node = nodes.get(parent_id)
|
|
878
|
+
if parent_node is None:
|
|
879
|
+
continue
|
|
880
|
+
columns = parent_node.get("columns") or {}
|
|
881
|
+
name = parent_node.get("name")
|
|
882
|
+
if parent_node.get("resource_type") == "source":
|
|
883
|
+
parts = parent_id.split(".")
|
|
884
|
+
source = parts[2]
|
|
885
|
+
table = parts[3]
|
|
886
|
+
source = source.replace("-", "_")
|
|
887
|
+
name = f"__{source}__{table}"
|
|
888
|
+
schema[name] = {name: column.get("type") for name, column in columns.items()}
|
|
889
|
+
return schema
|
|
890
|
+
|
|
891
|
+
base_sql = self.generate_sql(
|
|
892
|
+
base_node.get("raw_code"),
|
|
893
|
+
context=jinja_context,
|
|
894
|
+
provided_manifest=base_manifest,
|
|
895
|
+
)
|
|
896
|
+
curr_sql = self.generate_sql(
|
|
897
|
+
curr_node.get("raw_code"),
|
|
898
|
+
context=jinja_context,
|
|
899
|
+
provided_manifest=curr_manifest,
|
|
900
|
+
)
|
|
901
|
+
base_schema = _get_schema(base)
|
|
902
|
+
curr_schema = _get_schema(current)
|
|
903
|
+
dialect = self.adapter.connections.TYPE
|
|
904
|
+
if curr_manifest.metadata.adapter_type is not None:
|
|
905
|
+
dialect = curr_manifest.metadata.adapter_type
|
|
906
|
+
|
|
907
|
+
change = parse_change_category(
|
|
908
|
+
base_sql,
|
|
909
|
+
curr_sql,
|
|
910
|
+
old_schema=base_schema,
|
|
911
|
+
new_schema=curr_schema,
|
|
912
|
+
dialect=dialect,
|
|
913
|
+
perf_tracking=breaking_perf_tracker,
|
|
914
|
+
)
|
|
863
915
|
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
916
|
+
# Make sure that the case of the column names are the same
|
|
917
|
+
changed_columns = {
|
|
918
|
+
column.lower(): change_status for column, change_status in (change.columns or {}).items()
|
|
919
|
+
}
|
|
920
|
+
changed_columns_names = set(changed_columns)
|
|
921
|
+
changed_columns_final = {}
|
|
922
|
+
|
|
923
|
+
base_columns = base_node.get("columns") or {}
|
|
924
|
+
curr_columns = curr_node.get("columns") or {}
|
|
925
|
+
columns_names = set(base_columns) | set(curr_columns)
|
|
926
|
+
|
|
927
|
+
for column_name in columns_names:
|
|
928
|
+
if column_name.lower() in changed_columns_names:
|
|
929
|
+
changed_columns_final[column_name] = changed_columns[column_name.lower()]
|
|
930
|
+
|
|
931
|
+
change.columns = changed_columns_final
|
|
932
|
+
except Exception:
|
|
933
|
+
# TODO: telemetry
|
|
934
|
+
pass
|
|
935
|
+
|
|
936
|
+
breaking_perf_tracker.end_lineage_diff()
|
|
937
|
+
log_performance("change analysis per node", breaking_perf_tracker.to_dict())
|
|
938
|
+
breaking_perf_tracker.reset()
|
|
939
|
+
node_diff = diff.get(node_id)
|
|
940
|
+
node_diff.change = change
|
|
941
|
+
return node_diff
|
|
942
|
+
|
|
943
|
+
def get_cll(
|
|
944
|
+
self,
|
|
945
|
+
node_id: Optional[str] = None,
|
|
946
|
+
column: Optional[str] = None,
|
|
947
|
+
change_analysis: Optional[bool] = False,
|
|
948
|
+
no_cll: Optional[bool] = False,
|
|
949
|
+
no_upstream: Optional[bool] = False,
|
|
950
|
+
no_downstream: Optional[bool] = False,
|
|
951
|
+
no_filter: Optional[bool] = False,
|
|
952
|
+
) -> CllData:
|
|
953
|
+
cll_tracker = LineagePerfTracker()
|
|
954
|
+
cll_tracker.set_params(
|
|
955
|
+
has_node=node_id is not None,
|
|
956
|
+
has_column=column is not None,
|
|
957
|
+
change_analysis=change_analysis,
|
|
958
|
+
no_cll=no_cll,
|
|
959
|
+
no_upstream=no_upstream,
|
|
960
|
+
no_downstream=no_downstream,
|
|
868
961
|
)
|
|
869
|
-
|
|
870
|
-
def get_cll_by_node_id(self, node_id: str, base: Optional[bool] = False):
|
|
871
|
-
cll_tracker = CLLPerformanceTracking()
|
|
872
962
|
cll_tracker.start_column_lineage()
|
|
873
963
|
|
|
874
|
-
manifest = self.curr_manifest
|
|
964
|
+
manifest = self.curr_manifest
|
|
875
965
|
manifest_dict = manifest.to_dict()
|
|
876
966
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
967
|
+
# Find related model nodes
|
|
968
|
+
if node_id is not None:
|
|
969
|
+
cll_node_ids = {node_id}
|
|
970
|
+
else:
|
|
971
|
+
lineage_diff = self.get_lineage_diff()
|
|
972
|
+
cll_node_ids = set(lineage_diff.diff.keys())
|
|
973
|
+
|
|
974
|
+
cll_tracker.set_init_nodes(len(cll_node_ids))
|
|
881
975
|
|
|
882
|
-
node_manifest = self.get_lineage_nodes_metadata(base=base)
|
|
883
976
|
nodes = {}
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
977
|
+
columns = {}
|
|
978
|
+
parent_map = {}
|
|
979
|
+
child_map = {}
|
|
980
|
+
|
|
981
|
+
if not no_upstream:
|
|
982
|
+
cll_node_ids = cll_node_ids.union(find_upstream(cll_node_ids, manifest_dict.get("parent_map")))
|
|
983
|
+
if not no_downstream:
|
|
984
|
+
cll_node_ids = cll_node_ids.union(find_downstream(cll_node_ids, manifest_dict.get("child_map")))
|
|
985
|
+
|
|
986
|
+
if not no_cll:
|
|
987
|
+
allowed_related_nodes = set()
|
|
988
|
+
for key in ["sources", "nodes", "exposures", "metrics"]:
|
|
989
|
+
attr = getattr(manifest, key)
|
|
990
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
991
|
+
if hasattr(manifest, "semantic_models"):
|
|
992
|
+
attr = getattr(manifest, "semantic_models")
|
|
993
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
994
|
+
for cll_node_id in cll_node_ids:
|
|
995
|
+
if cll_node_id not in allowed_related_nodes:
|
|
996
|
+
continue
|
|
997
|
+
cll_data_one = deepcopy(self.get_cll_cached(cll_node_id, base=False))
|
|
998
|
+
cll_tracker.increment_cll_nodes()
|
|
999
|
+
if cll_data_one is None:
|
|
1000
|
+
continue
|
|
1001
|
+
|
|
1002
|
+
nodes[cll_node_id] = cll_data_one.nodes.get(cll_node_id)
|
|
1003
|
+
node_diff = None
|
|
1004
|
+
if change_analysis:
|
|
1005
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1006
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1007
|
+
if node_diff is not None:
|
|
1008
|
+
nodes[cll_node_id].change_status = node_diff.change_status
|
|
1009
|
+
if node_diff.change is not None:
|
|
1010
|
+
nodes[cll_node_id].change_category = node_diff.change.category
|
|
1011
|
+
for c_id, c in cll_data_one.columns.items():
|
|
1012
|
+
columns[c_id] = c
|
|
1013
|
+
if node_diff is not None:
|
|
1014
|
+
if node_diff.change_status == "added":
|
|
1015
|
+
c.change_status = "added"
|
|
1016
|
+
elif node_diff.change_status == "removed":
|
|
1017
|
+
c.change_status = "removed"
|
|
1018
|
+
elif node_diff.change is not None and node_diff.change.columns is not None:
|
|
1019
|
+
column_diff = node_diff.change.columns.get(c.name)
|
|
1020
|
+
if column_diff:
|
|
1021
|
+
c.change_status = column_diff
|
|
1022
|
+
|
|
1023
|
+
for p_id, parents in cll_data_one.parent_map.items():
|
|
1024
|
+
parent_map[p_id] = parents
|
|
1025
|
+
else:
|
|
1026
|
+
for cll_node_id in cll_node_ids:
|
|
1027
|
+
cll_node = None
|
|
1028
|
+
cll_node_columns: Dict[str, CllColumn] = {}
|
|
1029
|
+
|
|
1030
|
+
if cll_node_id in manifest.sources:
|
|
1031
|
+
cll_node = CllNode.build_cll_node(manifest, "sources", cll_node_id)
|
|
1032
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.sources:
|
|
1033
|
+
cll_node_columns = {
|
|
1034
|
+
column.name: CllColumn(
|
|
1035
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1036
|
+
table_id=cll_node_id,
|
|
1037
|
+
name=column.name,
|
|
1038
|
+
type=column.type,
|
|
1039
|
+
)
|
|
1040
|
+
for column in self.curr_catalog.sources[cll_node_id].columns.values()
|
|
1041
|
+
}
|
|
1042
|
+
elif cll_node_id in manifest.nodes:
|
|
1043
|
+
cll_node = CllNode.build_cll_node(manifest, "nodes", cll_node_id)
|
|
1044
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.nodes:
|
|
1045
|
+
cll_node_columns = {
|
|
1046
|
+
column.name: CllColumn(
|
|
1047
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1048
|
+
table_id=cll_node_id,
|
|
1049
|
+
name=column.name,
|
|
1050
|
+
type=column.type,
|
|
1051
|
+
)
|
|
1052
|
+
for column in self.curr_catalog.nodes[cll_node_id].columns.values()
|
|
1053
|
+
}
|
|
1054
|
+
elif cll_node_id in manifest.exposures:
|
|
1055
|
+
cll_node = CllNode.build_cll_node(manifest, "exposures", cll_node_id)
|
|
1056
|
+
elif hasattr(manifest, "semantic_models") and cll_node_id in manifest.semantic_models:
|
|
1057
|
+
cll_node = CllNode.build_cll_node(manifest, "semantic_models", cll_node_id)
|
|
1058
|
+
elif cll_node_id in manifest.metrics:
|
|
1059
|
+
cll_node = CllNode.build_cll_node(manifest, "metrics", cll_node_id)
|
|
1060
|
+
|
|
1061
|
+
if not cll_node:
|
|
1062
|
+
continue
|
|
1063
|
+
nodes[cll_node_id] = cll_node
|
|
1064
|
+
|
|
1065
|
+
node_diff = None
|
|
1066
|
+
if change_analysis:
|
|
1067
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1068
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1069
|
+
if node_diff is not None:
|
|
1070
|
+
cll_node.change_status = node_diff.change_status
|
|
1071
|
+
if node_diff.change is not None:
|
|
1072
|
+
cll_node.change_category = node_diff.change.category
|
|
1073
|
+
for c, cll_column in cll_node_columns.items():
|
|
1074
|
+
cll_node.columns[c] = cll_column
|
|
1075
|
+
columns[cll_column.id] = cll_column
|
|
1076
|
+
if node_diff.change.columns and c in node_diff.change.columns:
|
|
1077
|
+
cll_column.change_status = node_diff.change.columns[c]
|
|
1078
|
+
|
|
1079
|
+
parent_map[cll_node_id] = manifest.parent_map.get(cll_node_id, [])
|
|
1080
|
+
|
|
1081
|
+
# build the child map
|
|
1082
|
+
for parent_id, parents in parent_map.items():
|
|
1083
|
+
for parent in parents:
|
|
1084
|
+
if parent not in child_map:
|
|
1085
|
+
child_map[parent] = set()
|
|
1086
|
+
child_map[parent].add(parent_id)
|
|
1087
|
+
|
|
1088
|
+
# Find the anchor nodes
|
|
1089
|
+
anchor_node_ids = set()
|
|
1090
|
+
extra_node_ids = set()
|
|
1091
|
+
if node_id is None and column is None:
|
|
1092
|
+
if change_analysis:
|
|
1093
|
+
# If change analysis is requested, we need to find the nodes that have changes
|
|
1094
|
+
lineage_diff = self.get_lineage_diff()
|
|
1095
|
+
for nid, nd in lineage_diff.diff.items():
|
|
1096
|
+
if nd.change_status == "added":
|
|
1097
|
+
anchor_node_ids.add(nid)
|
|
1098
|
+
n = lineage_diff.current["nodes"].get(nid)
|
|
1099
|
+
n_columns = n.get("columns", {})
|
|
1100
|
+
for c in n_columns:
|
|
1101
|
+
anchor_node_ids.add(build_column_key(nid, c))
|
|
1102
|
+
continue
|
|
1103
|
+
if nd.change_status == "removed":
|
|
1104
|
+
extra_node_ids.add(nid)
|
|
1105
|
+
continue
|
|
1106
|
+
|
|
1107
|
+
node_diff = self.get_change_analysis_cached(nid)
|
|
1108
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1109
|
+
extra_node_ids.add(nid)
|
|
1110
|
+
if no_cll:
|
|
1111
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1112
|
+
anchor_node_ids.add(nid)
|
|
1113
|
+
else:
|
|
1114
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1115
|
+
anchor_node_ids.add(nid)
|
|
1116
|
+
if node_diff.change.columns is not None:
|
|
1117
|
+
for column_name in node_diff.change.columns:
|
|
1118
|
+
anchor_node_ids.add(f"{nid}_{column_name}")
|
|
1119
|
+
else:
|
|
1120
|
+
lineage_diff = self.get_lineage_diff()
|
|
1121
|
+
anchor_node_ids = lineage_diff.diff.keys()
|
|
1122
|
+
elif node_id is not None and column is None:
|
|
1123
|
+
if change_analysis:
|
|
1124
|
+
# If change analysis is requested, we need to find the nodes that have changes
|
|
1125
|
+
node_diff = self.get_change_analysis_cached(node_id)
|
|
1126
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1127
|
+
extra_node_ids.add(node_id)
|
|
1128
|
+
if no_cll:
|
|
1129
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1130
|
+
anchor_node_ids.add(node_id)
|
|
1131
|
+
else:
|
|
1132
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1133
|
+
anchor_node_ids.add(node_id)
|
|
1134
|
+
if node_diff.change.columns is not None:
|
|
1135
|
+
for column_name in node_diff.change.columns:
|
|
1136
|
+
anchor_node_ids.add(f"{node_id}_{column_name}")
|
|
1137
|
+
else:
|
|
1138
|
+
anchor_node_ids.add(node_id)
|
|
1139
|
+
else:
|
|
1140
|
+
anchor_node_ids.add(node_id)
|
|
1141
|
+
if not no_cll:
|
|
1142
|
+
node = nodes.get(node_id)
|
|
1143
|
+
if node:
|
|
1144
|
+
for column_name in node.columns:
|
|
1145
|
+
column_key = build_column_key(node_id, column_name)
|
|
1146
|
+
anchor_node_ids.add(column_key)
|
|
1147
|
+
else:
|
|
1148
|
+
anchor_node_ids.add(f"{node_id}_{column}")
|
|
1149
|
+
|
|
1150
|
+
cll_tracker.set_anchor_nodes(len(anchor_node_ids))
|
|
1151
|
+
result_node_ids = set(anchor_node_ids)
|
|
1152
|
+
if not no_upstream:
|
|
1153
|
+
result_node_ids = result_node_ids.union(find_upstream(anchor_node_ids, parent_map))
|
|
1154
|
+
if not no_downstream:
|
|
1155
|
+
result_node_ids = result_node_ids.union(find_downstream(anchor_node_ids, child_map))
|
|
1156
|
+
|
|
1157
|
+
# Filter the nodes and columns based on the anchor nodes
|
|
1158
|
+
if not no_filter:
|
|
1159
|
+
nodes = {k: v for k, v in nodes.items() if k in result_node_ids or k in extra_node_ids}
|
|
1160
|
+
columns = {k: v for k, v in columns.items() if k in result_node_ids or k in extra_node_ids}
|
|
1161
|
+
|
|
1162
|
+
for node in nodes.values():
|
|
1163
|
+
node.columns = {
|
|
1164
|
+
k: v for k, v in node.columns.items() if v.id in result_node_ids or v.id in extra_node_ids
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
if change_analysis:
|
|
1168
|
+
node.impacted = node.id in result_node_ids
|
|
1169
|
+
|
|
1170
|
+
parent_map, child_map = filter_dependency_maps(parent_map, child_map, result_node_ids)
|
|
888
1171
|
|
|
889
1172
|
cll_tracker.end_column_lineage()
|
|
890
|
-
cll_tracker.set_total_nodes(len(nodes))
|
|
891
|
-
log_performance(
|
|
1173
|
+
cll_tracker.set_total_nodes(len(nodes) + len(columns))
|
|
1174
|
+
log_performance("column level lineage", cll_tracker.to_dict())
|
|
892
1175
|
cll_tracker.reset()
|
|
893
1176
|
|
|
894
|
-
return
|
|
1177
|
+
return CllData(
|
|
1178
|
+
nodes=nodes,
|
|
1179
|
+
columns=columns,
|
|
1180
|
+
parent_map=parent_map,
|
|
1181
|
+
child_map=child_map,
|
|
1182
|
+
)
|
|
895
1183
|
|
|
896
1184
|
@lru_cache(maxsize=128)
|
|
897
|
-
def get_cll_cached(self, node_id: str, base: Optional[bool] = False):
|
|
898
|
-
|
|
1185
|
+
def get_cll_cached(self, node_id: str, base: Optional[bool] = False) -> Optional[CllData]:
|
|
1186
|
+
cll_tracker = CLLPerformanceTracking()
|
|
899
1187
|
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
col
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
for cl in column_lineage.values():
|
|
918
|
-
for depend_on in cl.depends_on:
|
|
919
|
-
if depend_on.node.startswith('__'):
|
|
920
|
-
for n in nodes.values():
|
|
921
|
-
if n.get('resource_type') != 'source':
|
|
922
|
-
continue
|
|
923
|
-
# __source__table -> source.table
|
|
924
|
-
source_table = depend_on.node.lstrip("_").replace("__", ".", 1).lower()
|
|
925
|
-
if source_table in n.get('id'):
|
|
926
|
-
depend_on.node = n.get('id')
|
|
927
|
-
break
|
|
928
|
-
else:
|
|
929
|
-
for n in nodes.values():
|
|
930
|
-
if n.get('name') == depend_on.node.lower():
|
|
931
|
-
depend_on.node = n.get('id')
|
|
932
|
-
break
|
|
1188
|
+
node, parent_list = self.get_cll_node(node_id, base=base)
|
|
1189
|
+
if node is None:
|
|
1190
|
+
return None
|
|
1191
|
+
|
|
1192
|
+
cll_tracker.set_total_nodes(1)
|
|
1193
|
+
cll_tracker.start_column_lineage()
|
|
1194
|
+
|
|
1195
|
+
def _apply_all_columns(node: CllNode, transformation_type):
|
|
1196
|
+
cll_data = CllData()
|
|
1197
|
+
cll_data.nodes[node.id] = node
|
|
1198
|
+
cll_data.parent_map[node.id] = set(parent_list)
|
|
1199
|
+
for col in node.columns.values():
|
|
1200
|
+
column_id = f"{node.id}_{col.name}"
|
|
1201
|
+
col.transformation_type = transformation_type
|
|
1202
|
+
cll_data.columns[column_id] = col
|
|
1203
|
+
cll_data.parent_map[column_id] = set()
|
|
1204
|
+
return cll_data
|
|
933
1205
|
|
|
934
|
-
cll_tracker = CLLPerformanceTracking()
|
|
935
|
-
nodes = self.get_lineage_nodes_metadata(base=base)
|
|
936
1206
|
manifest = as_manifest(self.get_manifest(base))
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
1207
|
+
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
1208
|
+
resource_type = node.resource_type
|
|
1209
|
+
if resource_type not in {"model", "seed", "source", "snapshot"}:
|
|
1210
|
+
return _apply_all_columns(node, "unknown")
|
|
940
1211
|
|
|
941
|
-
if resource_type ==
|
|
942
|
-
_apply_all_columns(node,
|
|
943
|
-
return
|
|
1212
|
+
if resource_type == "source" or resource_type == "seed":
|
|
1213
|
+
return _apply_all_columns(node, "source")
|
|
944
1214
|
|
|
945
|
-
if node.
|
|
946
|
-
_apply_all_columns(node,
|
|
947
|
-
return
|
|
1215
|
+
if node.raw_code is None or self.is_python_model(node.id, base=base):
|
|
1216
|
+
return _apply_all_columns(node, "unknown")
|
|
948
1217
|
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
_apply_all_columns(node, 'source', [])
|
|
952
|
-
return
|
|
1218
|
+
if node.name == "metricflow_time_spine":
|
|
1219
|
+
return _apply_all_columns(node, "source")
|
|
953
1220
|
|
|
954
|
-
if not node.
|
|
955
|
-
|
|
956
|
-
|
|
1221
|
+
if not node.columns:
|
|
1222
|
+
return _apply_all_columns(node, "unknown")
|
|
1223
|
+
|
|
1224
|
+
table_id_map = {}
|
|
957
1225
|
|
|
958
1226
|
def ref_func(*args):
|
|
1227
|
+
node_name: str = None
|
|
1228
|
+
project_or_package: str = None
|
|
1229
|
+
|
|
959
1230
|
if len(args) == 1:
|
|
960
|
-
|
|
961
|
-
elif len(args) > 1:
|
|
962
|
-
node = args[1]
|
|
1231
|
+
node_name = args[0]
|
|
963
1232
|
else:
|
|
964
|
-
|
|
965
|
-
|
|
1233
|
+
project_or_package = args[0]
|
|
1234
|
+
node_name = args[1]
|
|
966
1235
|
|
|
967
|
-
|
|
968
|
-
|
|
1236
|
+
for key, n in manifest.nodes.items():
|
|
1237
|
+
if n.name != node_name:
|
|
1238
|
+
continue
|
|
1239
|
+
if project_or_package is not None and n.package_name != project_or_package:
|
|
1240
|
+
continue
|
|
1241
|
+
|
|
1242
|
+
# replace id "." to "_"
|
|
1243
|
+
unique_id = n.unique_id
|
|
1244
|
+
table_name = unique_id.replace(".", "_")
|
|
1245
|
+
table_id_map[table_name.lower()] = unique_id
|
|
1246
|
+
return table_name
|
|
1247
|
+
|
|
1248
|
+
raise ValueError(f"Cannot find node {node_name} in the manifest")
|
|
1249
|
+
|
|
1250
|
+
def source_func(source_name, name):
|
|
1251
|
+
for key, n in manifest.sources.items():
|
|
1252
|
+
if n.source_name != source_name:
|
|
1253
|
+
continue
|
|
1254
|
+
if n.name != name:
|
|
1255
|
+
continue
|
|
969
1256
|
|
|
970
|
-
|
|
1257
|
+
# replace id "." to "_"
|
|
1258
|
+
unique_id = n.unique_id
|
|
1259
|
+
table_name = unique_id.replace(".", "_")
|
|
1260
|
+
table_id_map[table_name.lower()] = unique_id
|
|
1261
|
+
return table_name
|
|
1262
|
+
|
|
1263
|
+
raise ValueError(f"Cannot find source {source_name}.{name} in the manifest")
|
|
1264
|
+
|
|
1265
|
+
raw_code = node.raw_code
|
|
971
1266
|
jinja_context = dict(
|
|
972
1267
|
ref=ref_func,
|
|
973
1268
|
source=source_func,
|
|
974
1269
|
)
|
|
975
1270
|
|
|
976
1271
|
schema = {}
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
schema[name] = {
|
|
989
|
-
name: column.get('type') for name, column in columns.items()
|
|
990
|
-
}
|
|
1272
|
+
if catalog is not None:
|
|
1273
|
+
for parent_id in parent_list:
|
|
1274
|
+
table_name = parent_id.replace(".", "_")
|
|
1275
|
+
columns = {}
|
|
1276
|
+
if parent_id in catalog.nodes:
|
|
1277
|
+
for col_name, col_metadata in catalog.nodes[parent_id].columns.items():
|
|
1278
|
+
columns[col_name] = col_metadata.type
|
|
1279
|
+
if parent_id in catalog.sources:
|
|
1280
|
+
for col_name, col_metadata in catalog.sources[parent_id].columns.items():
|
|
1281
|
+
columns[col_name] = col_metadata.type
|
|
1282
|
+
schema[table_name] = columns
|
|
991
1283
|
|
|
992
1284
|
try:
|
|
993
|
-
# provide a manifest to speedup and not pollute the manifest
|
|
994
1285
|
compiled_sql = self.generate_sql(raw_code, base=base, context=jinja_context, provided_manifest=manifest)
|
|
995
1286
|
dialect = self.adapter.type()
|
|
996
|
-
# find adapter type from the manifest, otherwise we use the adapter type from the adapter
|
|
997
1287
|
if self.get_manifest(base).metadata.adapter_type is not None:
|
|
998
1288
|
dialect = self.get_manifest(base).metadata.adapter_type
|
|
999
|
-
|
|
1289
|
+
m2c, c2c_map = cll(compiled_sql, schema=schema, dialect=dialect)
|
|
1000
1290
|
except RecceException:
|
|
1001
|
-
# TODO: provide parsing error message if needed
|
|
1002
|
-
_apply_all_columns(node, 'unknown', [])
|
|
1003
1291
|
cll_tracker.increment_sqlglot_error_nodes()
|
|
1004
|
-
return
|
|
1292
|
+
return _apply_all_columns(node, "unknown")
|
|
1005
1293
|
except Exception:
|
|
1006
|
-
_apply_all_columns(node, 'unknown', [])
|
|
1007
1294
|
cll_tracker.increment_other_error_nodes()
|
|
1008
|
-
return
|
|
1009
|
-
|
|
1010
|
-
|
|
1295
|
+
return _apply_all_columns(node, "unknown")
|
|
1296
|
+
|
|
1297
|
+
# Add cll dependency to the node.
|
|
1298
|
+
cll_data = CllData()
|
|
1299
|
+
cll_data.nodes[node.id] = node
|
|
1300
|
+
cll_data.columns = {f"{node.id}_{col.name}": col for col in node.columns.values()}
|
|
1301
|
+
|
|
1302
|
+
# parent map for node
|
|
1303
|
+
depends_on = set(parent_list)
|
|
1304
|
+
for d in m2c:
|
|
1305
|
+
parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
|
|
1306
|
+
depends_on.add(parent_key)
|
|
1307
|
+
cll_data.parent_map[node_id] = depends_on
|
|
1308
|
+
|
|
1309
|
+
# parent map for columns
|
|
1310
|
+
for name, column in node.columns.items():
|
|
1311
|
+
depends_on = set()
|
|
1312
|
+
column_id = f"{node.id}_{name}"
|
|
1313
|
+
if name in c2c_map:
|
|
1314
|
+
for d in c2c_map[name].depends_on:
|
|
1315
|
+
parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
|
|
1316
|
+
depends_on.add(parent_key)
|
|
1317
|
+
column.transformation_type = c2c_map[name].transformation_type
|
|
1318
|
+
cll_data.parent_map[column_id] = set(depends_on)
|
|
1011
1319
|
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1320
|
+
cll_tracker.end_column_lineage()
|
|
1321
|
+
log_performance("column level lineage per node", cll_tracker.to_dict())
|
|
1322
|
+
cll_tracker.reset()
|
|
1323
|
+
return cll_data
|
|
1016
1324
|
|
|
1017
|
-
|
|
1018
|
-
def get_lineage_nodes_metadata(self, base: Optional[bool] = False):
|
|
1325
|
+
def get_cll_node(self, node_id: str, base: Optional[bool] = False) -> Tuple[Optional[CllNode], list[str]]:
|
|
1019
1326
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
1020
1327
|
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
1021
|
-
|
|
1328
|
+
parent_list = []
|
|
1329
|
+
node = None
|
|
1022
1330
|
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1331
|
+
# model, seed, snapshot
|
|
1332
|
+
if node_id in manifest.nodes:
|
|
1333
|
+
found = manifest.nodes[node_id]
|
|
1334
|
+
unique_id = found.unique_id
|
|
1335
|
+
node = CllNode.build_cll_node(manifest, "nodes", node_id)
|
|
1336
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1337
|
+
parent_list = found.depends_on.nodes
|
|
1027
1338
|
|
|
1028
|
-
if
|
|
1029
|
-
|
|
1339
|
+
if catalog is not None and node is not None and unique_id in catalog.nodes:
|
|
1340
|
+
columns = {}
|
|
1341
|
+
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
1342
|
+
column_id = f"{unique_id}_{col_name}"
|
|
1343
|
+
col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
|
|
1344
|
+
columns[col_name] = col
|
|
1345
|
+
node.columns = columns
|
|
1030
1346
|
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1347
|
+
# source
|
|
1348
|
+
if node_id in manifest.sources:
|
|
1349
|
+
found = manifest.sources[node_id]
|
|
1350
|
+
unique_id = found.unique_id
|
|
1351
|
+
node = CllNode.build_cll_node(manifest, "sources", node_id)
|
|
1352
|
+
parent_list = []
|
|
1037
1353
|
|
|
1038
|
-
if catalog is not None and unique_id in catalog.
|
|
1354
|
+
if catalog is not None and node is not None and unique_id in catalog.sources:
|
|
1039
1355
|
columns = {}
|
|
1040
|
-
for col_name, col_metadata in catalog.
|
|
1041
|
-
|
|
1356
|
+
for col_name, col_metadata in catalog.sources[unique_id].columns.items():
|
|
1357
|
+
column_id = f"{unique_id}_{col_name}"
|
|
1358
|
+
col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
|
|
1042
1359
|
columns[col_name] = col
|
|
1043
|
-
|
|
1360
|
+
node.columns = columns
|
|
1044
1361
|
|
|
1045
|
-
|
|
1046
|
-
|
|
1362
|
+
# exposure
|
|
1363
|
+
if node_id in manifest.exposures:
|
|
1364
|
+
found = manifest.exposures[node_id]
|
|
1365
|
+
node = CllNode.build_cll_node(manifest, "exposures", node_id)
|
|
1366
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1367
|
+
parent_list = found.depends_on.nodes
|
|
1047
1368
|
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1369
|
+
if hasattr(manifest, "semantic_models") and node_id in manifest.semantic_models:
|
|
1370
|
+
found = manifest.semantic_models[node_id]
|
|
1371
|
+
node = CllNode.build_cll_node(manifest, "semantic_models", node_id)
|
|
1372
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1373
|
+
parent_list = found.depends_on.nodes
|
|
1053
1374
|
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
}
|
|
1060
|
-
for col_name, col_metadata in catalog.sources[unique_id].columns.items()
|
|
1061
|
-
}
|
|
1375
|
+
if node_id in manifest.metrics:
|
|
1376
|
+
found = manifest.metrics[node_id]
|
|
1377
|
+
node = CllNode.build_cll_node(manifest, "metrics", node_id)
|
|
1378
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1379
|
+
parent_list = found.depends_on.nodes
|
|
1062
1380
|
|
|
1063
|
-
return
|
|
1381
|
+
return node, parent_list
|
|
1064
1382
|
|
|
1065
1383
|
def get_manifests_by_id(self, unique_id: str):
|
|
1066
1384
|
curr_manifest = self.get_manifest(base=False)
|
|
1067
1385
|
base_manifest = self.get_manifest(base=True)
|
|
1068
1386
|
if unique_id in curr_manifest.nodes.keys() or unique_id in base_manifest.nodes.keys():
|
|
1069
1387
|
return {
|
|
1070
|
-
|
|
1071
|
-
|
|
1388
|
+
"current": curr_manifest.nodes.get(unique_id),
|
|
1389
|
+
"base": base_manifest.nodes.get(unique_id),
|
|
1072
1390
|
}
|
|
1073
1391
|
return None
|
|
1074
1392
|
|
|
@@ -1091,39 +1409,40 @@ class DbtAdapter(BaseAdapter):
|
|
|
1091
1409
|
if self.base_path:
|
|
1092
1410
|
self.artifacts_observer.schedule(event_handler, self.base_path, recursive=False)
|
|
1093
1411
|
self.artifacts_observer.start()
|
|
1094
|
-
logger.info(
|
|
1412
|
+
logger.info("Start monitoring dbt artifacts")
|
|
1095
1413
|
|
|
1096
1414
|
def stop_monitor_artifacts(self):
|
|
1097
1415
|
if self.artifacts_files:
|
|
1098
1416
|
self.artifacts_observer.stop()
|
|
1099
1417
|
self.artifacts_observer.join()
|
|
1100
|
-
logger.info(
|
|
1418
|
+
logger.info("Stop monitoring artifacts")
|
|
1101
1419
|
|
|
1102
1420
|
def start_monitor_base_env(self, callback: Callable = None):
|
|
1103
|
-
target_base_dir = os.path.join(self.runtime_config.project_root,
|
|
1421
|
+
target_base_dir = os.path.join(self.runtime_config.project_root, "target-base")
|
|
1104
1422
|
base_env_files = {
|
|
1105
|
-
os.path.join(target_base_dir,
|
|
1106
|
-
os.path.join(target_base_dir,
|
|
1423
|
+
os.path.join(target_base_dir, "manifest.json"),
|
|
1424
|
+
os.path.join(target_base_dir, "catalog.json"),
|
|
1107
1425
|
}
|
|
1108
1426
|
event_handler = EnvironmentEventHandler(self.base_env_observer, base_env_files, callback=callback)
|
|
1109
1427
|
self.base_env_observer.schedule(event_handler, self.runtime_config.project_root, recursive=True)
|
|
1110
1428
|
self.base_env_observer.start()
|
|
1111
|
-
logger.info(
|
|
1429
|
+
logger.info("Start monitoring base environment")
|
|
1112
1430
|
|
|
1113
1431
|
def stop_monitor_base_env(self):
|
|
1114
1432
|
if self.base_env_observer.is_alive():
|
|
1115
1433
|
self.base_env_observer.stop()
|
|
1116
1434
|
self.base_env_observer.join()
|
|
1117
|
-
logger.info(
|
|
1118
|
-
|
|
1119
|
-
def set_artifacts(
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1435
|
+
logger.info("Stop monitoring base environment")
|
|
1436
|
+
|
|
1437
|
+
def set_artifacts(
|
|
1438
|
+
self,
|
|
1439
|
+
base_manifest: WritableManifest,
|
|
1440
|
+
curr_manifest: WritableManifest,
|
|
1441
|
+
manifest: Manifest,
|
|
1442
|
+
previous_manifest: Manifest,
|
|
1443
|
+
base_catalog: CatalogArtifact,
|
|
1444
|
+
curr_catalog: CatalogArtifact,
|
|
1445
|
+
):
|
|
1127
1446
|
self.curr_manifest = curr_manifest
|
|
1128
1447
|
self.base_manifest = base_manifest
|
|
1129
1448
|
self.manifest = manifest
|
|
@@ -1132,7 +1451,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1132
1451
|
self.previous_state = previous_state(
|
|
1133
1452
|
Path(self.base_path),
|
|
1134
1453
|
Path(self.runtime_config.target_path),
|
|
1135
|
-
Path(self.runtime_config.project_root)
|
|
1454
|
+
Path(self.runtime_config.project_root),
|
|
1136
1455
|
)
|
|
1137
1456
|
self.previous_state.manifest = previous_manifest
|
|
1138
1457
|
|
|
@@ -1154,19 +1473,22 @@ class DbtAdapter(BaseAdapter):
|
|
|
1154
1473
|
# we capture the original manifest as base and only update the current
|
|
1155
1474
|
target_type = os.path.basename(os.path.dirname(refresh_file_path))
|
|
1156
1475
|
if self.target_path and target_type == os.path.basename(self.target_path):
|
|
1157
|
-
if refresh_file_path.endswith(
|
|
1476
|
+
if refresh_file_path.endswith("manifest.json"):
|
|
1158
1477
|
self.curr_manifest = load_manifest(path=refresh_file_path)
|
|
1159
1478
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1160
1479
|
self.get_cll_cached.cache_clear()
|
|
1161
|
-
self.
|
|
1162
|
-
elif refresh_file_path.endswith(
|
|
1480
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1481
|
+
elif refresh_file_path.endswith("catalog.json"):
|
|
1163
1482
|
self.curr_catalog = load_catalog(path=refresh_file_path)
|
|
1164
|
-
self.
|
|
1483
|
+
self.get_cll_cached.cache_clear()
|
|
1484
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1165
1485
|
elif self.base_path and target_type == os.path.basename(self.base_path):
|
|
1166
|
-
if refresh_file_path.endswith(
|
|
1486
|
+
if refresh_file_path.endswith("manifest.json"):
|
|
1167
1487
|
self.base_manifest = load_manifest(path=refresh_file_path)
|
|
1168
|
-
|
|
1488
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1489
|
+
elif refresh_file_path.endswith("catalog.json"):
|
|
1169
1490
|
self.base_catalog = load_catalog(path=refresh_file_path)
|
|
1491
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1170
1492
|
|
|
1171
1493
|
def create_relation(self, model, base=False):
|
|
1172
1494
|
node = self.find_node_by_name(model, base)
|
|
@@ -1180,18 +1502,22 @@ class DbtAdapter(BaseAdapter):
|
|
|
1180
1502
|
select: Optional[str] = None,
|
|
1181
1503
|
exclude: Optional[str] = None,
|
|
1182
1504
|
packages: Optional[list[str]] = None,
|
|
1183
|
-
view_mode: Optional[Literal[
|
|
1505
|
+
view_mode: Optional[Literal["all", "changed_models"]] = None,
|
|
1184
1506
|
) -> Set[str]:
|
|
1185
|
-
from dbt.graph import NodeSelector
|
|
1186
|
-
from dbt.compilation import Compiler
|
|
1187
|
-
from dbt.graph import parse_difference, SelectionIntersection, SelectionUnion
|
|
1188
1507
|
import dbt.compilation
|
|
1508
|
+
from dbt.compilation import Compiler
|
|
1509
|
+
from dbt.graph import (
|
|
1510
|
+
NodeSelector,
|
|
1511
|
+
SelectionIntersection,
|
|
1512
|
+
SelectionUnion,
|
|
1513
|
+
parse_difference,
|
|
1514
|
+
)
|
|
1189
1515
|
|
|
1190
1516
|
select_list = [select] if select else None
|
|
1191
1517
|
exclude_list = [exclude] if exclude else None
|
|
1192
1518
|
|
|
1193
1519
|
def _parse_difference(include, exclude):
|
|
1194
|
-
if dbt_version <
|
|
1520
|
+
if dbt_version < "v1.8":
|
|
1195
1521
|
return parse_difference(include, exclude, "eager")
|
|
1196
1522
|
else:
|
|
1197
1523
|
return parse_difference(include, exclude)
|
|
@@ -1199,10 +1525,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
1199
1525
|
specs = [_parse_difference(select_list, exclude_list)]
|
|
1200
1526
|
|
|
1201
1527
|
if packages is not None:
|
|
1202
|
-
package_spec = SelectionUnion([_parse_difference([f
|
|
1528
|
+
package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
|
|
1203
1529
|
specs.append(package_spec)
|
|
1204
|
-
if view_mode and view_mode ==
|
|
1205
|
-
specs.append(_parse_difference([
|
|
1530
|
+
if view_mode and view_mode == "changed_models":
|
|
1531
|
+
specs.append(_parse_difference(["1+state:modified+"], None))
|
|
1206
1532
|
spec = SelectionIntersection(specs)
|
|
1207
1533
|
|
|
1208
1534
|
manifest = Manifest()
|
|
@@ -1215,8 +1541,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
1215
1541
|
for node_id, node in manifest_prev.nodes.items():
|
|
1216
1542
|
if node_id not in manifest.nodes:
|
|
1217
1543
|
node_dict = node.to_dict()
|
|
1218
|
-
if
|
|
1219
|
-
node_dict[
|
|
1544
|
+
if "raw_code" in node_dict:
|
|
1545
|
+
node_dict["raw_code"] = "__removed__"
|
|
1220
1546
|
node_class = type(node)
|
|
1221
1547
|
removed_node = node_class.from_dict(node_dict)
|
|
1222
1548
|
manifest.nodes[node_id] = removed_node
|
|
@@ -1225,8 +1551,11 @@ class DbtAdapter(BaseAdapter):
|
|
|
1225
1551
|
manifest.sources = {**manifest_prev.sources, **manifest_curr.sources}
|
|
1226
1552
|
manifest.exposures = {**manifest_prev.exposures, **manifest_curr.exposures}
|
|
1227
1553
|
manifest.metrics = {**manifest_prev.metrics, **manifest_curr.metrics}
|
|
1228
|
-
if hasattr(manifest_prev,
|
|
1229
|
-
manifest.semantic_models = {
|
|
1554
|
+
if hasattr(manifest_prev, "semantic_models"):
|
|
1555
|
+
manifest.semantic_models = {
|
|
1556
|
+
**manifest_prev.semantic_models,
|
|
1557
|
+
**manifest_curr.semantic_models,
|
|
1558
|
+
}
|
|
1230
1559
|
|
|
1231
1560
|
compiler = Compiler(self.runtime_config)
|
|
1232
1561
|
# disable to print compile states
|
|
@@ -1241,28 +1570,28 @@ class DbtAdapter(BaseAdapter):
|
|
|
1241
1570
|
return selector.get_selected(spec)
|
|
1242
1571
|
|
|
1243
1572
|
def export_artifacts(self) -> ArtifactsRoot:
|
|
1244
|
-
|
|
1573
|
+
"""
|
|
1245
1574
|
Export the artifacts from the current state
|
|
1246
|
-
|
|
1575
|
+
"""
|
|
1247
1576
|
artifacts = ArtifactsRoot()
|
|
1248
1577
|
|
|
1249
1578
|
def _load_artifact(artifact):
|
|
1250
1579
|
return artifact.to_dict() if artifact else None
|
|
1251
1580
|
|
|
1252
1581
|
artifacts.base = {
|
|
1253
|
-
|
|
1254
|
-
|
|
1582
|
+
"manifest": _load_artifact(self.base_manifest),
|
|
1583
|
+
"catalog": _load_artifact(self.base_catalog),
|
|
1255
1584
|
}
|
|
1256
1585
|
artifacts.current = {
|
|
1257
|
-
|
|
1258
|
-
|
|
1586
|
+
"manifest": _load_artifact(self.curr_manifest),
|
|
1587
|
+
"catalog": _load_artifact(self.curr_catalog),
|
|
1259
1588
|
}
|
|
1260
1589
|
return artifacts
|
|
1261
1590
|
|
|
1262
1591
|
def export_artifacts_from_file(self) -> ArtifactsRoot:
|
|
1263
|
-
|
|
1592
|
+
"""
|
|
1264
1593
|
Export the artifacts from the state file. This is the old implementation
|
|
1265
|
-
|
|
1594
|
+
"""
|
|
1266
1595
|
artifacts = ArtifactsRoot()
|
|
1267
1596
|
target_path = self.runtime_config.target_path
|
|
1268
1597
|
target_base_path = self.base_path
|
|
@@ -1271,18 +1600,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
1271
1600
|
if not os.path.isfile(path):
|
|
1272
1601
|
return None
|
|
1273
1602
|
|
|
1274
|
-
with open(path,
|
|
1603
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
1275
1604
|
json_content = f.read()
|
|
1276
1605
|
return json.loads(json_content)
|
|
1277
1606
|
|
|
1278
1607
|
project_root = self.runtime_config.project_root
|
|
1279
1608
|
artifacts.base = {
|
|
1280
|
-
|
|
1281
|
-
|
|
1609
|
+
"manifest": _load_artifact(os.path.join(project_root, target_base_path, "manifest.json")),
|
|
1610
|
+
"catalog": _load_artifact(os.path.join(project_root, target_base_path, "catalog.json")),
|
|
1282
1611
|
}
|
|
1283
1612
|
artifacts.current = {
|
|
1284
|
-
|
|
1285
|
-
|
|
1613
|
+
"manifest": _load_artifact(os.path.join(project_root, target_path, "manifest.json")),
|
|
1614
|
+
"catalog": _load_artifact(os.path.join(project_root, target_path, "catalog.json")),
|
|
1286
1615
|
}
|
|
1287
1616
|
return artifacts
|
|
1288
1617
|
|
|
@@ -1290,7 +1619,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1290
1619
|
# Merge the artifacts from the state file or cloud
|
|
1291
1620
|
def _select_artifact(
|
|
1292
1621
|
original: Union[WritableManifest, CatalogArtifact],
|
|
1293
|
-
new: Union[WritableManifest, CatalogArtifact]
|
|
1622
|
+
new: Union[WritableManifest, CatalogArtifact],
|
|
1294
1623
|
):
|
|
1295
1624
|
if merge:
|
|
1296
1625
|
if not original:
|
|
@@ -1301,16 +1630,16 @@ class DbtAdapter(BaseAdapter):
|
|
|
1301
1630
|
else:
|
|
1302
1631
|
return new
|
|
1303
1632
|
|
|
1304
|
-
self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get(
|
|
1305
|
-
self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get(
|
|
1306
|
-
self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get(
|
|
1307
|
-
self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get(
|
|
1633
|
+
self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get("manifest")))
|
|
1634
|
+
self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get("manifest")))
|
|
1635
|
+
self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get("catalog")))
|
|
1636
|
+
self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get("catalog")))
|
|
1308
1637
|
|
|
1309
1638
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1310
1639
|
self.previous_state = previous_state(
|
|
1311
1640
|
Path(self.base_path),
|
|
1312
1641
|
Path(self.runtime_config.target_path),
|
|
1313
|
-
Path(self.runtime_config.project_root)
|
|
1642
|
+
Path(self.runtime_config.project_root),
|
|
1314
1643
|
)
|
|
1315
1644
|
self.previous_state.manifest = as_manifest(self.base_manifest)
|
|
1316
1645
|
|
|
@@ -1326,7 +1655,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
1326
1655
|
|
|
1327
1656
|
if not self.curr_manifest or not self.base_manifest:
|
|
1328
1657
|
raise Exception(
|
|
1329
|
-
|
|
1658
|
+
"No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state"
|
|
1659
|
+
)
|
|
1330
1660
|
|
|
1331
1661
|
@contextmanager
|
|
1332
1662
|
def connection_named(self, name: str) -> Iterator[None]:
|