recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +27 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +845 -461
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +59 -42
- recce/apis/check_events_api.py +353 -0
- recce/apis/check_func.py +41 -35
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +64 -25
- recce/artifact.py +119 -51
- recce/cli.py +1301 -324
- recce/config.py +43 -34
- recce/connect_to_cloud.py +138 -0
- recce/core.py +55 -47
- recce/data/404/index.html +2 -0
- recce/data/404.html +2 -1
- recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
- recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
- recce/data/__next.__PAGE__.txt +6 -0
- recce/data/__next._full.txt +32 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +14 -0
- recce/data/__next._tree.txt +8 -0
- recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
- recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
- recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
- recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
- recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
- recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
- recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
- recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
- recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
- recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
- recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
- recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
- recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
- recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
- recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
- recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
- recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
- recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
- recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
- recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
- recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
- recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
- recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
- recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
- recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
- recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
- recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
- recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
- recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
- recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
- recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
- recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
- recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
- recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
- recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
- recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
- recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
- recce/data/_not-found/__next._full.txt +24 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +13 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +6 -0
- recce/data/_not-found/index.html +2 -0
- recce/data/_not-found/index.txt +24 -0
- recce/data/auth_callback.html +68 -0
- recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/checks/__next._full.txt +39 -0
- recce/data/checks/__next._head.txt +8 -0
- recce/data/checks/__next._index.txt +14 -0
- recce/data/checks/__next._tree.txt +8 -0
- recce/data/checks/__next.checks.__PAGE__.txt +10 -0
- recce/data/checks/__next.checks.txt +4 -0
- recce/data/checks/index.html +2 -0
- recce/data/checks/index.txt +39 -0
- recce/data/imgs/reload-image.svg +4 -0
- recce/data/index.html +2 -27
- recce/data/index.txt +32 -7
- recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/lineage/__next._full.txt +39 -0
- recce/data/lineage/__next._head.txt +8 -0
- recce/data/lineage/__next._index.txt +14 -0
- recce/data/lineage/__next._tree.txt +8 -0
- recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
- recce/data/lineage/__next.lineage.txt +4 -0
- recce/data/lineage/index.html +2 -0
- recce/data/lineage/index.txt +39 -0
- recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/query/__next._full.txt +37 -0
- recce/data/query/__next._head.txt +8 -0
- recce/data/query/__next._index.txt +14 -0
- recce/data/query/__next._tree.txt +8 -0
- recce/data/query/__next.query.__PAGE__.txt +9 -0
- recce/data/query/__next.query.txt +4 -0
- recce/data/query/index.html +2 -0
- recce/data/query/index.txt +37 -0
- recce/diff.py +6 -12
- recce/event/CONFIG.bak +1 -0
- recce/event/__init__.py +86 -74
- recce/event/collector.py +33 -22
- recce/event/track.py +49 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/mcp_server.py +725 -0
- recce/models/__init__.py +4 -1
- recce/models/check.py +438 -21
- recce/models/run.py +1 -0
- recce/models/types.py +134 -28
- recce/pull_request.py +27 -25
- recce/run.py +179 -122
- recce/server.py +394 -104
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +644 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +196 -149
- recce/tasks/__init__.py +19 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +82 -18
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +152 -86
- recce/tasks/query.py +180 -89
- recce/tasks/rowcount.py +37 -31
- recce/tasks/schema.py +18 -15
- recce/tasks/top_k.py +35 -35
- recce/tasks/utils.py +147 -0
- recce/tasks/valuediff.py +247 -155
- recce/util/__init__.py +3 -0
- recce/util/api_token.py +80 -0
- recce/util/breaking.py +105 -100
- recce/util/cll.py +274 -219
- recce/util/cloud/__init__.py +15 -0
- recce/util/cloud/base.py +115 -0
- recce/util/cloud/check_events.py +190 -0
- recce/util/cloud/checks.py +242 -0
- recce/util/io.py +22 -17
- recce/util/lineage.py +65 -16
- recce/util/logger.py +1 -1
- recce/util/onboarding_state.py +45 -0
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +347 -72
- recce/util/singleton.py +4 -4
- recce/util/startup_perf.py +121 -0
- recce/yaml/__init__.py +7 -10
- recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
- recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
- recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
- recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
- recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
- recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
- recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
- recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
- recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
- recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
- recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
- recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
- recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
- recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
- recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
- recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
- recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
- recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
- recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
- recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
- recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
- recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
- recce/state.py +0 -753
- recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
- recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
- recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/conftest.py +0 -13
- tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
- tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
- tests/adapter/dbt_adapter/test_selector.py +0 -177
- tests/tasks/__init__.py +0 -0
- tests/tasks/conftest.py +0 -4
- tests/tasks/test_histogram.py +0 -137
- tests/tasks/test_lineage.py +0 -42
- tests/tasks/test_preset_checks.py +0 -50
- tests/tasks/test_profile.py +0 -73
- tests/tasks/test_query.py +0 -151
- tests/tasks/test_row_count.py +0 -116
- tests/tasks/test_schema.py +0 -99
- tests/tasks/test_top_k.py +0 -73
- tests/tasks/test_valuediff.py +0 -74
- tests/test_cli.py +0 -122
- tests/test_config.py +0 -45
- tests/test_core.py +0 -27
- tests/test_dbt.py +0 -36
- tests/test_pull_request.py +0 -130
- tests/test_server.py +0 -98
- tests/test_state.py +0 -123
- tests/test_summary.py +0 -57
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
- {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,14 +8,34 @@ from dataclasses import dataclass, fields
|
|
|
8
8
|
from errno import ENOENT
|
|
9
9
|
from functools import lru_cache
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import
|
|
11
|
+
from typing import (
|
|
12
|
+
Any,
|
|
13
|
+
Callable,
|
|
14
|
+
Dict,
|
|
15
|
+
Iterator,
|
|
16
|
+
List,
|
|
17
|
+
Literal,
|
|
18
|
+
Optional,
|
|
19
|
+
Set,
|
|
20
|
+
Tuple,
|
|
21
|
+
Type,
|
|
22
|
+
Union,
|
|
23
|
+
)
|
|
12
24
|
|
|
13
25
|
from recce.event import log_performance
|
|
14
26
|
from recce.exceptions import RecceException
|
|
15
|
-
from recce.util.cll import
|
|
16
|
-
from recce.util.lineage import
|
|
27
|
+
from recce.util.cll import CLLPerformanceTracking, cll
|
|
28
|
+
from recce.util.lineage import (
|
|
29
|
+
build_column_key,
|
|
30
|
+
filter_dependency_maps,
|
|
31
|
+
find_downstream,
|
|
32
|
+
find_upstream,
|
|
33
|
+
)
|
|
34
|
+
from recce.util.perf_tracking import LineagePerfTracker
|
|
35
|
+
from recce.util.startup_perf import track_timing
|
|
36
|
+
|
|
17
37
|
from ...tasks.profile import ProfileTask
|
|
18
|
-
from ...util.breaking import
|
|
38
|
+
from ...util.breaking import BreakingPerformanceTracking, parse_change_category
|
|
19
39
|
|
|
20
40
|
try:
|
|
21
41
|
import agate
|
|
@@ -30,11 +50,30 @@ from watchdog.observers import Observer
|
|
|
30
50
|
|
|
31
51
|
from recce.adapter.base import BaseAdapter
|
|
32
52
|
from recce.state import ArtifactsRoot
|
|
33
|
-
|
|
53
|
+
|
|
34
54
|
from ...models import RunType
|
|
35
|
-
from ...models.types import
|
|
36
|
-
|
|
37
|
-
|
|
55
|
+
from ...models.types import (
|
|
56
|
+
CllColumn,
|
|
57
|
+
CllData,
|
|
58
|
+
CllNode,
|
|
59
|
+
LineageDiff,
|
|
60
|
+
NodeChange,
|
|
61
|
+
NodeDiff,
|
|
62
|
+
)
|
|
63
|
+
from ...tasks import (
|
|
64
|
+
HistogramDiffTask,
|
|
65
|
+
ProfileDiffTask,
|
|
66
|
+
QueryBaseTask,
|
|
67
|
+
QueryDiffTask,
|
|
68
|
+
QueryTask,
|
|
69
|
+
RowCountDiffTask,
|
|
70
|
+
RowCountTask,
|
|
71
|
+
Task,
|
|
72
|
+
TopKDiffTask,
|
|
73
|
+
ValueDiffDetailTask,
|
|
74
|
+
ValueDiffTask,
|
|
75
|
+
)
|
|
76
|
+
from .dbt_version import DbtVersion
|
|
38
77
|
|
|
39
78
|
dbt_supported_registry: Dict[RunType, Type[Task]] = {
|
|
40
79
|
RunType.QUERY: QueryTask,
|
|
@@ -56,7 +95,7 @@ get_adapter_orig = dbt.adapters.factory.get_adapter
|
|
|
56
95
|
|
|
57
96
|
|
|
58
97
|
def get_adapter(config):
|
|
59
|
-
if hasattr(config,
|
|
98
|
+
if hasattr(config, "adapter"):
|
|
60
99
|
return config.adapter
|
|
61
100
|
else:
|
|
62
101
|
return get_adapter_orig(config)
|
|
@@ -69,7 +108,12 @@ from dbt.adapters.base import Column # noqa: E402
|
|
|
69
108
|
from dbt.adapters.factory import get_adapter_class_by_name # noqa: E402
|
|
70
109
|
from dbt.adapters.sql import SQLAdapter # noqa: E402
|
|
71
110
|
from dbt.config.runtime import RuntimeConfig # noqa: E402
|
|
72
|
-
from dbt.contracts.graph.manifest import
|
|
111
|
+
from dbt.contracts.graph.manifest import ( # noqa: E402
|
|
112
|
+
MacroManifest,
|
|
113
|
+
Manifest,
|
|
114
|
+
ManifestMetadata,
|
|
115
|
+
WritableManifest,
|
|
116
|
+
)
|
|
73
117
|
from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
|
|
74
118
|
from dbt.contracts.results import CatalogArtifact # noqa: E402
|
|
75
119
|
from dbt.flags import set_from_args # noqa: E402
|
|
@@ -78,7 +122,7 @@ from dbt.parser.sql import SqlBlockParser # noqa: E402
|
|
|
78
122
|
|
|
79
123
|
dbt_version = DbtVersion()
|
|
80
124
|
|
|
81
|
-
if dbt_version <
|
|
125
|
+
if dbt_version < "v1.8":
|
|
82
126
|
from dbt.contracts.connection import Connection
|
|
83
127
|
else:
|
|
84
128
|
from dbt.adapters.contracts.connection import Connection
|
|
@@ -86,19 +130,22 @@ else:
|
|
|
86
130
|
|
|
87
131
|
@contextmanager
|
|
88
132
|
def silence_no_nodes_warning():
|
|
89
|
-
if dbt_version >=
|
|
133
|
+
if dbt_version >= "v1.8":
|
|
90
134
|
from dbt.events.types import NoNodesForSelectionCriteria
|
|
91
135
|
from dbt_common.events.functions import WARN_ERROR_OPTIONS
|
|
136
|
+
|
|
92
137
|
WARN_ERROR_OPTIONS.silence.append(NoNodesForSelectionCriteria.__name__)
|
|
93
138
|
try:
|
|
94
139
|
yield
|
|
95
140
|
finally:
|
|
96
|
-
if dbt_version >=
|
|
141
|
+
if dbt_version >= "v1.8":
|
|
97
142
|
from dbt_common.events.functions import WARN_ERROR_OPTIONS
|
|
143
|
+
|
|
98
144
|
WARN_ERROR_OPTIONS.silence.pop()
|
|
99
145
|
|
|
100
146
|
|
|
101
|
-
logger = logging.getLogger(
|
|
147
|
+
logger = logging.getLogger("uvicorn")
|
|
148
|
+
MIN_DBT_NODE_COMPOSITION = 3
|
|
102
149
|
|
|
103
150
|
|
|
104
151
|
class ArtifactsEventHandler(FileSystemEventHandler):
|
|
@@ -147,24 +194,29 @@ class EnvironmentEventHandler(FileSystemEventHandler):
|
|
|
147
194
|
|
|
148
195
|
|
|
149
196
|
def merge_tables(tables: List[agate.Table]) -> agate.Table:
|
|
150
|
-
if dbt_version <
|
|
197
|
+
if dbt_version < "v1.8":
|
|
151
198
|
from dbt.clients.agate_helper import merge_tables
|
|
199
|
+
|
|
152
200
|
return merge_tables(tables)
|
|
153
201
|
else:
|
|
154
202
|
from dbt_common.clients.agate_helper import merge_tables
|
|
203
|
+
|
|
155
204
|
return merge_tables(tables)
|
|
156
205
|
|
|
157
206
|
|
|
158
207
|
def as_manifest(m: WritableManifest) -> Manifest:
|
|
159
|
-
if dbt_version <
|
|
208
|
+
if dbt_version < "v1.8":
|
|
160
209
|
data = m.__dict__
|
|
161
210
|
all_fields = set([x.name for x in fields(Manifest)])
|
|
162
211
|
new_data = {k: v for k, v in data.items() if k in all_fields}
|
|
163
212
|
return Manifest(**new_data)
|
|
164
213
|
else:
|
|
165
|
-
|
|
214
|
+
result = Manifest.from_writable_manifest(m)
|
|
215
|
+
result.metadata = ManifestMetadata(**m.metadata.__dict__)
|
|
216
|
+
return result
|
|
166
217
|
|
|
167
218
|
|
|
219
|
+
@track_timing(record_size=True)
|
|
168
220
|
def load_manifest(path: str = None, data: dict = None):
|
|
169
221
|
if path is not None:
|
|
170
222
|
if not os.path.isfile(path):
|
|
@@ -174,6 +226,7 @@ def load_manifest(path: str = None, data: dict = None):
|
|
|
174
226
|
return WritableManifest.upgrade_schema_version(data)
|
|
175
227
|
|
|
176
228
|
|
|
229
|
+
@track_timing(record_size=True)
|
|
177
230
|
def load_catalog(path: str = None, data: dict = None):
|
|
178
231
|
if path is not None:
|
|
179
232
|
if not os.path.isfile(path):
|
|
@@ -184,12 +237,13 @@ def load_catalog(path: str = None, data: dict = None):
|
|
|
184
237
|
|
|
185
238
|
|
|
186
239
|
def previous_state(state_path: Path, target_path: Path, project_root: Path) -> PreviousState:
|
|
187
|
-
if dbt_version <
|
|
240
|
+
if dbt_version < "v1.5.2":
|
|
188
241
|
return PreviousState(state_path, target_path)
|
|
189
242
|
else:
|
|
190
243
|
try:
|
|
191
244
|
# Overwrite the level_tag method temporarily to avoid the warning message
|
|
192
|
-
from dbt.events.types import
|
|
245
|
+
from dbt.events.types import EventLevel, WarnStateTargetEqual
|
|
246
|
+
|
|
193
247
|
original_level_tag_func = WarnStateTargetEqual.level_tag
|
|
194
248
|
WarnStateTargetEqual.level_tag = lambda x: EventLevel.DEBUG
|
|
195
249
|
except ImportError:
|
|
@@ -209,12 +263,12 @@ def previous_state(state_path: Path, target_path: Path, project_root: Path) -> P
|
|
|
209
263
|
def default_profiles_dir():
|
|
210
264
|
# Precedence: DBT_PROFILES_DIR > current working directory > ~/.dbt/
|
|
211
265
|
# https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
|
|
212
|
-
if os.getenv(
|
|
213
|
-
return os.getenv(
|
|
214
|
-
elif os.path.exists(os.path.join(os.getcwd(),
|
|
266
|
+
if os.getenv("DBT_PROFILES_DIR"):
|
|
267
|
+
return os.getenv("DBT_PROFILES_DIR")
|
|
268
|
+
elif os.path.exists(os.path.join(os.getcwd(), "profiles.yml")):
|
|
215
269
|
return os.getcwd()
|
|
216
270
|
else:
|
|
217
|
-
return os.path.expanduser(
|
|
271
|
+
return os.path.expanduser("~/.dbt/")
|
|
218
272
|
|
|
219
273
|
|
|
220
274
|
@dataclass()
|
|
@@ -222,15 +276,16 @@ class DbtArgs:
|
|
|
222
276
|
"""
|
|
223
277
|
Used for RuntimeConfig.from_args
|
|
224
278
|
"""
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
279
|
+
|
|
280
|
+
threads: Optional[int] = (1,)
|
|
281
|
+
target: Optional[str] = (None,)
|
|
282
|
+
profiles_dir: Optional[str] = (None,)
|
|
283
|
+
project_dir: Optional[str] = (None,)
|
|
284
|
+
profile: Optional[str] = (None,)
|
|
285
|
+
target_path: Optional[str] = (None,)
|
|
231
286
|
project_only_flags: Optional[Dict[str, Any]] = None
|
|
232
287
|
which: Optional[str] = None
|
|
233
|
-
state_modified_compare_more_unrendered_values: Optional[bool] =
|
|
288
|
+
state_modified_compare_more_unrendered_values: Optional[bool] = True # new flag added since dbt v1.9
|
|
234
289
|
|
|
235
290
|
|
|
236
291
|
@dataclass
|
|
@@ -258,32 +313,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
258
313
|
|
|
259
314
|
def support_tasks(self):
|
|
260
315
|
support_map = {run_type.value: True for run_type in dbt_supported_registry}
|
|
261
|
-
supported_dbt_packages = set([package.package_name for package in self.manifest.macros.values()])
|
|
262
|
-
|
|
263
|
-
if 'dbt_profiler' not in supported_dbt_packages:
|
|
264
|
-
support_map[RunType.PROFILE_DIFF.value] = False
|
|
265
|
-
support_map[RunType.PROFILE.value] = False
|
|
266
|
-
|
|
267
|
-
if 'audit_helper' not in supported_dbt_packages:
|
|
268
|
-
support_map[RunType.VALUE_DIFF.value] = False
|
|
269
|
-
support_map[RunType.VALUE_DIFF_DETAIL.value] = False
|
|
270
|
-
support_map['query_diff_with_primary_key'] = False
|
|
271
316
|
|
|
272
317
|
return support_map
|
|
273
318
|
|
|
274
319
|
@classmethod
|
|
275
|
-
def load(cls,
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
target = kwargs.get('target')
|
|
281
|
-
target_path = kwargs.get('target_path', 'target')
|
|
282
|
-
target_base_path = kwargs.get('target_base_path', 'target-base')
|
|
320
|
+
def load(cls, no_artifacts=False, review=False, **kwargs):
|
|
321
|
+
target = kwargs.get("target")
|
|
322
|
+
target_path = kwargs.get("target_path", "target")
|
|
323
|
+
target_base_path = kwargs.get("target_base_path", "target-base")
|
|
283
324
|
|
|
284
|
-
profile_name = kwargs.get(
|
|
285
|
-
project_dir = kwargs.get(
|
|
286
|
-
profiles_dir = kwargs.get(
|
|
325
|
+
profile_name = kwargs.get("profile")
|
|
326
|
+
project_dir = kwargs.get("project_dir")
|
|
327
|
+
profiles_dir = kwargs.get("profiles_dir")
|
|
287
328
|
|
|
288
329
|
if profiles_dir is None:
|
|
289
330
|
profiles_dir = default_profiles_dir()
|
|
@@ -297,21 +338,25 @@ class DbtAdapter(BaseAdapter):
|
|
|
297
338
|
profiles_dir=profiles_dir,
|
|
298
339
|
profile=profile_name,
|
|
299
340
|
project_only_flags={},
|
|
300
|
-
which=
|
|
341
|
+
which="list",
|
|
301
342
|
)
|
|
302
343
|
set_from_args(args, args)
|
|
303
344
|
|
|
304
345
|
from dbt.exceptions import DbtProjectError
|
|
346
|
+
|
|
305
347
|
try:
|
|
306
348
|
# adapter
|
|
307
|
-
if dbt_version <
|
|
349
|
+
if dbt_version < "v1.8":
|
|
308
350
|
runtime_config = RuntimeConfig.from_args(args)
|
|
309
351
|
adapter_name = runtime_config.credentials.type
|
|
310
352
|
adapter_cls = get_adapter_class_by_name(adapter_name)
|
|
311
353
|
adapter: SQLAdapter = adapter_cls(runtime_config)
|
|
312
354
|
else:
|
|
313
|
-
from dbt_common.context import set_invocation_context, get_invocation_context
|
|
314
355
|
from dbt.mp_context import get_mp_context
|
|
356
|
+
from dbt_common.context import (
|
|
357
|
+
get_invocation_context,
|
|
358
|
+
set_invocation_context,
|
|
359
|
+
)
|
|
315
360
|
|
|
316
361
|
set_invocation_context({})
|
|
317
362
|
get_invocation_context()._env = dict(os.environ)
|
|
@@ -319,6 +364,9 @@ class DbtAdapter(BaseAdapter):
|
|
|
319
364
|
adapter_name = runtime_config.credentials.type
|
|
320
365
|
adapter_cls = get_adapter_class_by_name(adapter_name)
|
|
321
366
|
adapter: SQLAdapter = adapter_cls(runtime_config, get_mp_context())
|
|
367
|
+
from dbt.adapters.factory import FACTORY
|
|
368
|
+
|
|
369
|
+
FACTORY.adapters[adapter_name] = adapter
|
|
322
370
|
|
|
323
371
|
adapter.connections.set_connection_name()
|
|
324
372
|
runtime_config.adapter = adapter
|
|
@@ -327,7 +375,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
327
375
|
runtime_config=runtime_config,
|
|
328
376
|
adapter=adapter,
|
|
329
377
|
review_mode=review,
|
|
330
|
-
base_path=target_base_path
|
|
378
|
+
base_path=target_base_path,
|
|
331
379
|
)
|
|
332
380
|
except DbtProjectError as e:
|
|
333
381
|
raise e
|
|
@@ -348,27 +396,26 @@ class DbtAdapter(BaseAdapter):
|
|
|
348
396
|
|
|
349
397
|
def get_columns(self, model: str, base=False) -> List[Column]:
|
|
350
398
|
relation = self.create_relation(model, base)
|
|
351
|
-
get_columns_macro =
|
|
352
|
-
if self.adapter.connections.TYPE ==
|
|
353
|
-
get_columns_macro =
|
|
399
|
+
get_columns_macro = "get_columns_in_relation"
|
|
400
|
+
if self.adapter.connections.TYPE == "databricks":
|
|
401
|
+
get_columns_macro = "get_columns_comments"
|
|
354
402
|
|
|
355
|
-
if dbt_version <
|
|
403
|
+
if dbt_version < "v1.8":
|
|
356
404
|
columns = self.adapter.execute_macro(
|
|
357
|
-
get_columns_macro,
|
|
358
|
-
|
|
359
|
-
manifest=self.manifest)
|
|
405
|
+
get_columns_macro, kwargs={"relation": relation}, manifest=self.manifest
|
|
406
|
+
)
|
|
360
407
|
else:
|
|
361
408
|
from dbt.context.providers import generate_runtime_macro_context
|
|
409
|
+
|
|
362
410
|
macro_manifest = MacroManifest(self.manifest.macros)
|
|
363
411
|
self.adapter.set_macro_resolver(macro_manifest)
|
|
364
412
|
self.adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
365
|
-
columns = self.adapter.execute_macro(
|
|
366
|
-
get_columns_macro,
|
|
367
|
-
kwargs={"relation": relation})
|
|
413
|
+
columns = self.adapter.execute_macro(get_columns_macro, kwargs={"relation": relation})
|
|
368
414
|
|
|
369
|
-
if self.adapter.connections.TYPE ==
|
|
415
|
+
if self.adapter.connections.TYPE == "databricks":
|
|
370
416
|
# reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
|
|
371
|
-
from dbt.adapters.databricks import DatabricksColumn
|
|
417
|
+
from dbt.adapters.databricks.column import DatabricksColumn
|
|
418
|
+
|
|
372
419
|
rows = columns
|
|
373
420
|
columns = []
|
|
374
421
|
for row in rows:
|
|
@@ -376,7 +423,9 @@ class DbtAdapter(BaseAdapter):
|
|
|
376
423
|
break
|
|
377
424
|
columns.append(
|
|
378
425
|
DatabricksColumn(
|
|
379
|
-
column=row["col_name"],
|
|
426
|
+
column=row["col_name"],
|
|
427
|
+
dtype=row["data_type"],
|
|
428
|
+
comment=row["comment"],
|
|
380
429
|
)
|
|
381
430
|
)
|
|
382
431
|
return columns
|
|
@@ -387,29 +436,29 @@ class DbtAdapter(BaseAdapter):
|
|
|
387
436
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
388
437
|
manifest_dict = manifest.to_dict()
|
|
389
438
|
|
|
390
|
-
node = manifest_dict[
|
|
439
|
+
node = manifest_dict["nodes"].get(model_id)
|
|
391
440
|
if node is None:
|
|
392
441
|
return {}
|
|
393
442
|
|
|
394
|
-
node_name = node[
|
|
395
|
-
with self.adapter.connection_named(
|
|
443
|
+
node_name = node["name"]
|
|
444
|
+
with self.adapter.connection_named("model"):
|
|
396
445
|
columns = [column for column in self.get_columns(node_name, base=base)]
|
|
397
446
|
|
|
398
|
-
child_map: List[str] = manifest_dict[
|
|
447
|
+
child_map: List[str] = manifest_dict["child_map"][model_id]
|
|
399
448
|
cols_not_null = []
|
|
400
449
|
cols_unique = []
|
|
401
450
|
|
|
402
451
|
for child in child_map:
|
|
403
|
-
comps = child.split(
|
|
452
|
+
comps = child.split(".")
|
|
404
453
|
child_type = comps[0]
|
|
405
454
|
child_name = comps[2]
|
|
406
455
|
|
|
407
|
-
not_null_prefix = f
|
|
408
|
-
if child_type ==
|
|
409
|
-
cols_not_null.append(child_name[len(not_null_prefix):])
|
|
410
|
-
unique_prefix = f
|
|
411
|
-
if child_type ==
|
|
412
|
-
cols_unique.append(child_name[len(unique_prefix):])
|
|
456
|
+
not_null_prefix = f"not_null_{node_name}_"
|
|
457
|
+
if child_type == "test" and child_name.startswith(not_null_prefix):
|
|
458
|
+
cols_not_null.append(child_name[len(not_null_prefix) :])
|
|
459
|
+
unique_prefix = f"unique_{node_name}_"
|
|
460
|
+
if child_type == "test" and child_name.startswith(unique_prefix):
|
|
461
|
+
cols_unique.append(child_name[len(unique_prefix) :])
|
|
413
462
|
|
|
414
463
|
columns_info = {}
|
|
415
464
|
primary_key = None
|
|
@@ -417,25 +466,26 @@ class DbtAdapter(BaseAdapter):
|
|
|
417
466
|
col_name = c.column
|
|
418
467
|
col = dict(name=col_name, type=c.dtype)
|
|
419
468
|
if col_name in cols_not_null:
|
|
420
|
-
col[
|
|
469
|
+
col["not_null"] = True
|
|
421
470
|
if col_name in cols_unique:
|
|
422
|
-
col[
|
|
471
|
+
col["unique"] = True
|
|
423
472
|
if not primary_key:
|
|
424
473
|
primary_key = col_name
|
|
425
474
|
columns_info[col_name] = col
|
|
426
475
|
|
|
427
476
|
result = dict(columns=columns_info)
|
|
428
477
|
if primary_key:
|
|
429
|
-
result[
|
|
478
|
+
result["primary_key"] = primary_key
|
|
430
479
|
|
|
431
480
|
return result
|
|
432
481
|
|
|
482
|
+
@track_timing("artifact_load")
|
|
433
483
|
def load_artifacts(self):
|
|
434
484
|
"""
|
|
435
485
|
Load the artifacts from the 'target' and 'target-base' directory
|
|
436
486
|
"""
|
|
437
487
|
if self.runtime_config is None:
|
|
438
|
-
raise Exception(
|
|
488
|
+
raise Exception("Cannot find the dbt project configuration")
|
|
439
489
|
|
|
440
490
|
project_root = self.runtime_config.project_root
|
|
441
491
|
target_path = self.runtime_config.target_path
|
|
@@ -444,17 +494,21 @@ class DbtAdapter(BaseAdapter):
|
|
|
444
494
|
self.base_path = os.path.join(project_root, target_base_path)
|
|
445
495
|
|
|
446
496
|
# load the artifacts
|
|
447
|
-
path = os.path.join(project_root, target_path,
|
|
448
|
-
curr_manifest = load_manifest(path=path)
|
|
497
|
+
path = os.path.join(project_root, target_path, "manifest.json")
|
|
498
|
+
curr_manifest = load_manifest(path=path, timing_name="curr_manifest")
|
|
449
499
|
if curr_manifest is None:
|
|
450
500
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
451
|
-
path = os.path.join(project_root, target_base_path,
|
|
452
|
-
base_manifest = load_manifest(path=path)
|
|
501
|
+
path = os.path.join(project_root, target_base_path, "manifest.json")
|
|
502
|
+
base_manifest = load_manifest(path=path, timing_name="base_manifest")
|
|
453
503
|
if base_manifest is None:
|
|
454
504
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
455
505
|
|
|
456
|
-
curr_catalog = load_catalog(
|
|
457
|
-
|
|
506
|
+
curr_catalog = load_catalog(
|
|
507
|
+
path=os.path.join(project_root, target_path, "catalog.json"), timing_name="curr_catalog"
|
|
508
|
+
)
|
|
509
|
+
base_catalog = load_catalog(
|
|
510
|
+
path=os.path.join(project_root, target_base_path, "catalog.json"), timing_name="base_catalog"
|
|
511
|
+
)
|
|
458
512
|
|
|
459
513
|
# set the value if all the artifacts are loaded successfully
|
|
460
514
|
self.curr_manifest = curr_manifest
|
|
@@ -472,22 +526,21 @@ class DbtAdapter(BaseAdapter):
|
|
|
472
526
|
|
|
473
527
|
# set the file paths to watch
|
|
474
528
|
self.artifacts_files = [
|
|
475
|
-
os.path.join(project_root, target_path,
|
|
476
|
-
os.path.join(project_root, target_path,
|
|
477
|
-
os.path.join(project_root, target_base_path,
|
|
478
|
-
os.path.join(project_root, target_base_path,
|
|
529
|
+
os.path.join(project_root, target_path, "manifest.json"),
|
|
530
|
+
os.path.join(project_root, target_path, "catalog.json"),
|
|
531
|
+
os.path.join(project_root, target_base_path, "manifest.json"),
|
|
532
|
+
os.path.join(project_root, target_base_path, "catalog.json"),
|
|
479
533
|
]
|
|
480
534
|
|
|
481
535
|
def is_python_model(self, node_id: str, base: Optional[bool] = False):
|
|
482
536
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
483
537
|
model = manifest.nodes.get(node_id)
|
|
484
|
-
if hasattr(model,
|
|
485
|
-
return model.language ==
|
|
538
|
+
if hasattr(model, "language"):
|
|
539
|
+
return model.language == "python"
|
|
486
540
|
|
|
487
541
|
return False
|
|
488
542
|
|
|
489
543
|
def find_node_by_name(self, node_name, base=False) -> Optional[ManifestNode]:
|
|
490
|
-
|
|
491
544
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
492
545
|
|
|
493
546
|
for key, node in manifest.nodes.items():
|
|
@@ -497,22 +550,22 @@ class DbtAdapter(BaseAdapter):
|
|
|
497
550
|
return None
|
|
498
551
|
|
|
499
552
|
def get_node_name_by_id(self, unique_id):
|
|
500
|
-
if unique_id.startswith(
|
|
553
|
+
if unique_id.startswith("source."):
|
|
501
554
|
if unique_id in self.curr_manifest.sources:
|
|
502
555
|
return self.curr_manifest.sources[unique_id].name
|
|
503
556
|
elif unique_id in self.base_manifest.sources:
|
|
504
557
|
return self.base_manifest.sources[unique_id].name
|
|
505
|
-
elif unique_id.startswith(
|
|
558
|
+
elif unique_id.startswith("metric."):
|
|
506
559
|
if unique_id in self.curr_manifest.metrics:
|
|
507
560
|
return self.curr_manifest.metrics[unique_id].name
|
|
508
561
|
elif unique_id in self.base_manifest.metrics:
|
|
509
562
|
return self.base_manifest.metrics[unique_id].name
|
|
510
|
-
elif unique_id.startswith(
|
|
563
|
+
elif unique_id.startswith("exposure."):
|
|
511
564
|
if unique_id in self.curr_manifest.exposures:
|
|
512
565
|
return self.curr_manifest.exposures[unique_id].name
|
|
513
566
|
elif unique_id in self.base_manifest.exposures:
|
|
514
567
|
return self.base_manifest.exposures[unique_id].name
|
|
515
|
-
elif unique_id.startswith(
|
|
568
|
+
elif unique_id.startswith("semantic_model."):
|
|
516
569
|
if unique_id in self.curr_manifest.semantic_models:
|
|
517
570
|
return self.curr_manifest.semantic_models[unique_id].name
|
|
518
571
|
elif unique_id in self.base_manifest.semantic_models:
|
|
@@ -527,14 +580,24 @@ class DbtAdapter(BaseAdapter):
|
|
|
527
580
|
def get_manifest(self, base: bool):
|
|
528
581
|
return self.curr_manifest if base is False else self.base_manifest
|
|
529
582
|
|
|
530
|
-
def generate_sql(
|
|
583
|
+
def generate_sql(
|
|
584
|
+
self,
|
|
585
|
+
sql_template: str,
|
|
586
|
+
base: bool = False,
|
|
587
|
+
context=None,
|
|
588
|
+
provided_manifest=None,
|
|
589
|
+
):
|
|
531
590
|
if context is None:
|
|
532
591
|
context = {}
|
|
533
592
|
manifest = provided_manifest if provided_manifest is not None else as_manifest(self.get_manifest(base))
|
|
534
593
|
parser = SqlBlockParser(self.runtime_config, manifest, self.runtime_config)
|
|
535
594
|
|
|
536
|
-
if dbt_version >= dbt_version.parse(
|
|
537
|
-
from dbt_common.context import
|
|
595
|
+
if dbt_version >= dbt_version.parse("v1.8"):
|
|
596
|
+
from dbt_common.context import (
|
|
597
|
+
get_invocation_context,
|
|
598
|
+
set_invocation_context,
|
|
599
|
+
)
|
|
600
|
+
|
|
538
601
|
set_invocation_context({})
|
|
539
602
|
get_invocation_context()._env = dict(os.environ)
|
|
540
603
|
|
|
@@ -542,21 +605,35 @@ class DbtAdapter(BaseAdapter):
|
|
|
542
605
|
node = parser.parse_remote(sql_template, node_id)
|
|
543
606
|
process_node(self.runtime_config, manifest, node)
|
|
544
607
|
|
|
545
|
-
if dbt_version < dbt_version.parse(
|
|
608
|
+
if dbt_version < dbt_version.parse("v1.8"):
|
|
546
609
|
compiler = self.adapter.get_compiler()
|
|
547
610
|
compiler.compile_node(node, manifest, context)
|
|
548
611
|
return node.compiled_code
|
|
549
612
|
else:
|
|
550
|
-
from dbt.context.providers import generate_runtime_model_context
|
|
551
613
|
from dbt.clients import jinja
|
|
614
|
+
from dbt.context.providers import (
|
|
615
|
+
generate_runtime_macro_context,
|
|
616
|
+
generate_runtime_model_context,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# Set up macro resolver for dbt >= 1.8
|
|
620
|
+
macro_manifest = MacroManifest(manifest.macros)
|
|
621
|
+
self.adapter.set_macro_resolver(macro_manifest)
|
|
622
|
+
self.adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
623
|
+
|
|
552
624
|
jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
|
|
553
625
|
jinja_ctx.update(context)
|
|
554
626
|
compiled_code = jinja.get_rendered(sql_template, jinja_ctx, node)
|
|
555
627
|
return compiled_code
|
|
556
628
|
|
|
557
|
-
def execute(
|
|
558
|
-
|
|
559
|
-
|
|
629
|
+
def execute(
|
|
630
|
+
self,
|
|
631
|
+
sql: str,
|
|
632
|
+
auto_begin: bool = False,
|
|
633
|
+
fetch: bool = False,
|
|
634
|
+
limit: Optional[int] = None,
|
|
635
|
+
) -> Tuple[any, agate.Table]:
|
|
636
|
+
if dbt_version < dbt_version.parse("v1.6"):
|
|
560
637
|
return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch)
|
|
561
638
|
|
|
562
639
|
return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
|
|
@@ -567,7 +644,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
567
644
|
|
|
568
645
|
node_ids = nodes.keys()
|
|
569
646
|
parent_map = {}
|
|
570
|
-
for k, parents in manifest_dict[
|
|
647
|
+
for k, parents in manifest_dict["parent_map"].items():
|
|
571
648
|
if k not in node_ids:
|
|
572
649
|
continue
|
|
573
650
|
parent_map[k] = [parent for parent in parents if parent in node_ids]
|
|
@@ -578,8 +655,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
578
655
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
579
656
|
manifest_dict = manifest.to_dict()
|
|
580
657
|
|
|
581
|
-
if node_id in manifest_dict[
|
|
582
|
-
return manifest_dict[
|
|
658
|
+
if node_id in manifest_dict["parent_map"]:
|
|
659
|
+
return manifest_dict["parent_map"][node_id]
|
|
583
660
|
|
|
584
661
|
def get_lineage(self, base: Optional[bool] = False):
|
|
585
662
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
@@ -588,19 +665,21 @@ class DbtAdapter(BaseAdapter):
|
|
|
588
665
|
return self.get_lineage_cached(base, cache_key)
|
|
589
666
|
|
|
590
667
|
def get_lineage_diff(self) -> LineageDiff:
|
|
591
|
-
cache_key = hash(
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
668
|
+
cache_key = hash(
|
|
669
|
+
(
|
|
670
|
+
id(self.base_manifest),
|
|
671
|
+
id(self.base_catalog),
|
|
672
|
+
id(self.curr_manifest),
|
|
673
|
+
id(self.curr_catalog),
|
|
674
|
+
)
|
|
675
|
+
)
|
|
597
676
|
return self._get_lineage_diff_cached(cache_key)
|
|
598
677
|
|
|
599
678
|
@lru_cache(maxsize=2)
|
|
600
679
|
def get_lineage_cached(self, base: Optional[bool] = False, cache_key=0):
|
|
601
680
|
if base is False:
|
|
602
|
-
|
|
603
|
-
|
|
681
|
+
perf_tracker = LineagePerfTracker()
|
|
682
|
+
perf_tracker.start_lineage()
|
|
604
683
|
|
|
605
684
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
606
685
|
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
@@ -612,48 +691,48 @@ class DbtAdapter(BaseAdapter):
|
|
|
612
691
|
|
|
613
692
|
nodes = {}
|
|
614
693
|
|
|
615
|
-
for node in manifest_dict[
|
|
616
|
-
unique_id = node[
|
|
617
|
-
resource_type = node[
|
|
694
|
+
for node in manifest_dict["nodes"].values():
|
|
695
|
+
unique_id = node["unique_id"]
|
|
696
|
+
resource_type = node["resource_type"]
|
|
618
697
|
|
|
619
|
-
if resource_type not in [
|
|
698
|
+
if resource_type not in ["model", "seed", "exposure", "snapshot"]:
|
|
620
699
|
continue
|
|
621
700
|
|
|
622
701
|
nodes[unique_id] = {
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
702
|
+
"id": node["unique_id"],
|
|
703
|
+
"name": node["name"],
|
|
704
|
+
"resource_type": node["resource_type"],
|
|
705
|
+
"package_name": node["package_name"],
|
|
706
|
+
"schema": node["schema"],
|
|
707
|
+
"config": node["config"],
|
|
708
|
+
"checksum": node["checksum"],
|
|
709
|
+
"raw_code": node["raw_code"],
|
|
631
710
|
}
|
|
632
711
|
|
|
633
712
|
# List of <type>.<package_name>.<node_name>.<hash>
|
|
634
713
|
# model.jaffle_shop.customer_segments
|
|
635
714
|
# test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d
|
|
636
715
|
# test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1
|
|
637
|
-
child_map: List[str] = manifest_dict[
|
|
716
|
+
child_map: List[str] = manifest_dict["child_map"][unique_id]
|
|
638
717
|
cols_not_null = []
|
|
639
718
|
cols_unique = []
|
|
640
719
|
|
|
641
720
|
for child in child_map:
|
|
642
|
-
node_name = node[
|
|
643
|
-
comps = child.split(
|
|
644
|
-
if len(comps) <
|
|
721
|
+
node_name = node["name"]
|
|
722
|
+
comps = child.split(".")
|
|
723
|
+
if len(comps) < MIN_DBT_NODE_COMPOSITION:
|
|
645
724
|
# only happens in unittest
|
|
646
725
|
continue
|
|
647
726
|
|
|
648
727
|
child_type = comps[0]
|
|
649
728
|
child_name = comps[2]
|
|
650
729
|
|
|
651
|
-
not_null_prefix = f
|
|
652
|
-
if child_type ==
|
|
653
|
-
cols_not_null.append(child_name[len(not_null_prefix):])
|
|
654
|
-
unique_prefix = f
|
|
655
|
-
if child_type ==
|
|
656
|
-
cols_unique.append(child_name[len(unique_prefix):])
|
|
730
|
+
not_null_prefix = f"not_null_{node_name}_"
|
|
731
|
+
if child_type == "test" and child_name.startswith(not_null_prefix):
|
|
732
|
+
cols_not_null.append(child_name[len(not_null_prefix) :])
|
|
733
|
+
unique_prefix = f"unique_{node_name}_"
|
|
734
|
+
if child_type == "test" and child_name.startswith(unique_prefix):
|
|
735
|
+
cols_unique.append(child_name[len(unique_prefix) :])
|
|
657
736
|
|
|
658
737
|
if catalog is not None and unique_id in catalog.nodes:
|
|
659
738
|
columns = {}
|
|
@@ -661,70 +740,68 @@ class DbtAdapter(BaseAdapter):
|
|
|
661
740
|
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
662
741
|
col = dict(name=col_name, type=col_metadata.type)
|
|
663
742
|
if col_name in cols_not_null:
|
|
664
|
-
col[
|
|
743
|
+
col["not_null"] = True
|
|
665
744
|
if col_name in cols_unique:
|
|
666
|
-
col[
|
|
745
|
+
col["unique"] = True
|
|
667
746
|
if not primary_key:
|
|
668
747
|
primary_key = col_name
|
|
669
748
|
columns[col_name] = col
|
|
670
|
-
nodes[unique_id][
|
|
749
|
+
nodes[unique_id]["columns"] = columns
|
|
671
750
|
if primary_key:
|
|
672
|
-
nodes[unique_id][
|
|
751
|
+
nodes[unique_id]["primary_key"] = primary_key
|
|
673
752
|
|
|
674
|
-
for source in manifest_dict[
|
|
675
|
-
unique_id = source[
|
|
753
|
+
for source in manifest_dict["sources"].values():
|
|
754
|
+
unique_id = source["unique_id"]
|
|
676
755
|
|
|
677
756
|
nodes[unique_id] = {
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
757
|
+
"id": source["unique_id"],
|
|
758
|
+
"name": source["name"],
|
|
759
|
+
"source_name": source["source_name"],
|
|
760
|
+
"resource_type": source["resource_type"],
|
|
761
|
+
"package_name": source["package_name"],
|
|
762
|
+
"config": source["config"],
|
|
683
763
|
}
|
|
684
764
|
|
|
685
765
|
if catalog is not None and unique_id in catalog.sources:
|
|
686
|
-
nodes[unique_id][
|
|
687
|
-
col_name: {
|
|
688
|
-
'name': col_name,
|
|
689
|
-
'type': col_metadata.type
|
|
690
|
-
}
|
|
766
|
+
nodes[unique_id]["columns"] = {
|
|
767
|
+
col_name: {"name": col_name, "type": col_metadata.type}
|
|
691
768
|
for col_name, col_metadata in catalog.sources[unique_id].columns.items()
|
|
692
769
|
}
|
|
693
770
|
|
|
694
|
-
for exposure in manifest_dict[
|
|
695
|
-
nodes[exposure[
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
771
|
+
for exposure in manifest_dict["exposures"].values():
|
|
772
|
+
nodes[exposure["unique_id"]] = {
|
|
773
|
+
"id": exposure["unique_id"],
|
|
774
|
+
"name": exposure["name"],
|
|
775
|
+
"resource_type": exposure["resource_type"],
|
|
776
|
+
"package_name": exposure["package_name"],
|
|
777
|
+
"config": exposure["config"],
|
|
701
778
|
}
|
|
702
|
-
for metric in manifest_dict[
|
|
703
|
-
nodes[metric[
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
779
|
+
for metric in manifest_dict["metrics"].values():
|
|
780
|
+
nodes[metric["unique_id"]] = {
|
|
781
|
+
"id": metric["unique_id"],
|
|
782
|
+
"name": metric["name"],
|
|
783
|
+
"resource_type": metric["resource_type"],
|
|
784
|
+
"package_name": metric["package_name"],
|
|
785
|
+
"config": metric["config"],
|
|
709
786
|
}
|
|
710
787
|
|
|
711
|
-
if
|
|
712
|
-
for semantic_models in manifest_dict[
|
|
713
|
-
nodes[semantic_models[
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
788
|
+
if "semantic_models" in manifest_dict:
|
|
789
|
+
for semantic_models in manifest_dict["semantic_models"].values():
|
|
790
|
+
nodes[semantic_models["unique_id"]] = {
|
|
791
|
+
"id": semantic_models["unique_id"],
|
|
792
|
+
"name": semantic_models["name"],
|
|
793
|
+
"resource_type": semantic_models["resource_type"],
|
|
794
|
+
"package_name": semantic_models["package_name"],
|
|
795
|
+
"config": semantic_models["config"],
|
|
719
796
|
}
|
|
720
797
|
|
|
721
798
|
parent_map = self.build_parent_map(nodes, base)
|
|
722
799
|
|
|
723
800
|
if base is False:
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
log_performance(
|
|
727
|
-
|
|
801
|
+
perf_tracker.end_lineage()
|
|
802
|
+
perf_tracker.set_total_nodes(len(nodes))
|
|
803
|
+
log_performance("model lineage", perf_tracker.to_dict())
|
|
804
|
+
perf_tracker.reset()
|
|
728
805
|
|
|
729
806
|
return dict(
|
|
730
807
|
parent_map=parent_map,
|
|
@@ -737,18 +814,43 @@ class DbtAdapter(BaseAdapter):
|
|
|
737
814
|
def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
|
|
738
815
|
base = self.get_lineage(base=True)
|
|
739
816
|
current = self.get_lineage(base=False)
|
|
740
|
-
keys = {
|
|
741
|
-
*base.get('nodes', {}).keys(),
|
|
742
|
-
*current.get('nodes', {}).keys()
|
|
743
|
-
}
|
|
744
817
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
818
|
+
modified_nodes = self.select_nodes(select="state:modified")
|
|
819
|
+
diff = {}
|
|
820
|
+
for node_id in modified_nodes:
|
|
821
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
822
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
823
|
+
if base_node and curr_node:
|
|
824
|
+
diff[node_id] = NodeDiff(change_status="modified")
|
|
825
|
+
elif base_node:
|
|
826
|
+
diff[node_id] = NodeDiff(change_status="removed")
|
|
827
|
+
elif curr_node:
|
|
828
|
+
diff[node_id] = NodeDiff(change_status="added")
|
|
829
|
+
|
|
830
|
+
return LineageDiff(
|
|
831
|
+
base=base,
|
|
832
|
+
current=current,
|
|
833
|
+
diff=diff,
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
@lru_cache(maxsize=128)
|
|
837
|
+
def get_change_analysis_cached(self, node_id: str):
|
|
838
|
+
breaking_perf_tracker = BreakingPerformanceTracking()
|
|
839
|
+
lineage_diff = self.get_lineage_diff()
|
|
840
|
+
diff = lineage_diff.diff
|
|
841
|
+
|
|
842
|
+
if node_id not in diff or diff[node_id].change_status != "modified":
|
|
843
|
+
return diff.get(node_id)
|
|
844
|
+
|
|
845
|
+
breaking_perf_tracker.increment_modified_nodes()
|
|
846
|
+
breaking_perf_tracker.start_lineage_diff()
|
|
847
|
+
|
|
848
|
+
base = lineage_diff.base
|
|
849
|
+
current = lineage_diff.current
|
|
748
850
|
|
|
749
851
|
base_manifest = as_manifest(self.get_manifest(True))
|
|
750
852
|
curr_manifest = as_manifest(self.get_manifest(False))
|
|
751
|
-
|
|
853
|
+
breaking_perf_tracker.record_checkpoint("manifest")
|
|
752
854
|
|
|
753
855
|
def ref_func(*args):
|
|
754
856
|
if len(args) == 1:
|
|
@@ -760,7 +862,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
760
862
|
return node
|
|
761
863
|
|
|
762
864
|
def source_func(source_name, table_name):
|
|
763
|
-
source_name = source_name.replace(
|
|
865
|
+
source_name = source_name.replace("-", "_")
|
|
764
866
|
return f"__{source_name}__{table_name}"
|
|
765
867
|
|
|
766
868
|
jinja_context = dict(
|
|
@@ -768,284 +870,534 @@ class DbtAdapter(BaseAdapter):
|
|
|
768
870
|
source=source_func,
|
|
769
871
|
)
|
|
770
872
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
curr_node
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
old_schema=base_schema,
|
|
825
|
-
new_schema=curr_schema,
|
|
826
|
-
dialect=dialect,
|
|
827
|
-
perf_tracking=perf_tracking,
|
|
828
|
-
)
|
|
829
|
-
except Exception:
|
|
830
|
-
change = NodeChange(category='unknown')
|
|
831
|
-
|
|
832
|
-
diff[key] = NodeDiff(change_status='modified', change=change)
|
|
833
|
-
elif base_node:
|
|
834
|
-
diff[key] = NodeDiff(change_status='removed')
|
|
835
|
-
elif curr_node:
|
|
836
|
-
diff[key] = NodeDiff(change_status='added')
|
|
837
|
-
|
|
838
|
-
perf_tracking.end_lineage_diff()
|
|
839
|
-
log_performance('model lineage diff', perf_tracking.to_dict())
|
|
873
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
874
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
875
|
+
change = NodeChange(category="unknown")
|
|
876
|
+
if (
|
|
877
|
+
curr_node.get("resource_type") in ["model", "snapshot"]
|
|
878
|
+
and curr_node.get("raw_code") is not None
|
|
879
|
+
and base_node.get("raw_code") is not None
|
|
880
|
+
):
|
|
881
|
+
try:
|
|
882
|
+
|
|
883
|
+
def _get_schema(lineage):
|
|
884
|
+
schema = {}
|
|
885
|
+
nodes = lineage["nodes"]
|
|
886
|
+
parent_list = lineage["parent_map"].get(node_id, [])
|
|
887
|
+
for parent_id in parent_list:
|
|
888
|
+
parent_node = nodes.get(parent_id)
|
|
889
|
+
if parent_node is None:
|
|
890
|
+
continue
|
|
891
|
+
columns = parent_node.get("columns") or {}
|
|
892
|
+
name = parent_node.get("name")
|
|
893
|
+
if parent_node.get("resource_type") == "source":
|
|
894
|
+
parts = parent_id.split(".")
|
|
895
|
+
source = parts[2]
|
|
896
|
+
table = parts[3]
|
|
897
|
+
source = source.replace("-", "_")
|
|
898
|
+
name = f"__{source}__{table}"
|
|
899
|
+
schema[name] = {name: column.get("type") for name, column in columns.items()}
|
|
900
|
+
return schema
|
|
901
|
+
|
|
902
|
+
base_sql = self.generate_sql(
|
|
903
|
+
base_node.get("raw_code"),
|
|
904
|
+
context=jinja_context,
|
|
905
|
+
provided_manifest=base_manifest,
|
|
906
|
+
)
|
|
907
|
+
curr_sql = self.generate_sql(
|
|
908
|
+
curr_node.get("raw_code"),
|
|
909
|
+
context=jinja_context,
|
|
910
|
+
provided_manifest=curr_manifest,
|
|
911
|
+
)
|
|
912
|
+
base_schema = _get_schema(base)
|
|
913
|
+
curr_schema = _get_schema(current)
|
|
914
|
+
dialect = self.adapter.connections.TYPE
|
|
915
|
+
if curr_manifest.metadata.adapter_type is not None:
|
|
916
|
+
dialect = curr_manifest.metadata.adapter_type
|
|
917
|
+
|
|
918
|
+
change = parse_change_category(
|
|
919
|
+
base_sql,
|
|
920
|
+
curr_sql,
|
|
921
|
+
old_schema=base_schema,
|
|
922
|
+
new_schema=curr_schema,
|
|
923
|
+
dialect=dialect,
|
|
924
|
+
perf_tracking=breaking_perf_tracker,
|
|
925
|
+
)
|
|
840
926
|
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
927
|
+
# Make sure that the case of the column names are the same
|
|
928
|
+
changed_columns = {
|
|
929
|
+
column.lower(): change_status for column, change_status in (change.columns or {}).items()
|
|
930
|
+
}
|
|
931
|
+
changed_columns_names = set(changed_columns)
|
|
932
|
+
changed_columns_final = {}
|
|
933
|
+
|
|
934
|
+
base_columns = base_node.get("columns") or {}
|
|
935
|
+
curr_columns = curr_node.get("columns") or {}
|
|
936
|
+
columns_names = set(base_columns) | set(curr_columns)
|
|
937
|
+
|
|
938
|
+
for column_name in columns_names:
|
|
939
|
+
if column_name.lower() in changed_columns_names:
|
|
940
|
+
changed_columns_final[column_name] = changed_columns[column_name.lower()]
|
|
941
|
+
|
|
942
|
+
change.columns = changed_columns_final
|
|
943
|
+
except Exception:
|
|
944
|
+
# TODO: telemetry
|
|
945
|
+
pass
|
|
946
|
+
|
|
947
|
+
breaking_perf_tracker.end_lineage_diff()
|
|
948
|
+
log_performance("change analysis per node", breaking_perf_tracker.to_dict())
|
|
949
|
+
breaking_perf_tracker.reset()
|
|
950
|
+
node_diff = diff.get(node_id)
|
|
951
|
+
node_diff.change = change
|
|
952
|
+
return node_diff
|
|
953
|
+
|
|
954
|
+
def get_cll(
|
|
955
|
+
self,
|
|
956
|
+
node_id: Optional[str] = None,
|
|
957
|
+
column: Optional[str] = None,
|
|
958
|
+
change_analysis: Optional[bool] = False,
|
|
959
|
+
no_cll: Optional[bool] = False,
|
|
960
|
+
no_upstream: Optional[bool] = False,
|
|
961
|
+
no_downstream: Optional[bool] = False,
|
|
962
|
+
no_filter: Optional[bool] = False,
|
|
963
|
+
) -> CllData:
|
|
964
|
+
cll_tracker = LineagePerfTracker()
|
|
965
|
+
cll_tracker.set_params(
|
|
966
|
+
has_node=node_id is not None,
|
|
967
|
+
has_column=column is not None,
|
|
968
|
+
change_analysis=change_analysis,
|
|
969
|
+
no_cll=no_cll,
|
|
970
|
+
no_upstream=no_upstream,
|
|
971
|
+
no_downstream=no_downstream,
|
|
845
972
|
)
|
|
846
|
-
|
|
847
|
-
def get_cll_by_node_id(self, node_id: str, base: Optional[bool] = False):
|
|
848
|
-
cll_tracker = CLLPerformanceTracking()
|
|
849
973
|
cll_tracker.start_column_lineage()
|
|
850
974
|
|
|
851
|
-
manifest = self.curr_manifest
|
|
975
|
+
manifest = self.curr_manifest
|
|
852
976
|
manifest_dict = manifest.to_dict()
|
|
853
977
|
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
978
|
+
# Find related model nodes
|
|
979
|
+
if node_id is not None:
|
|
980
|
+
cll_node_ids = {node_id}
|
|
981
|
+
else:
|
|
982
|
+
lineage_diff = self.get_lineage_diff()
|
|
983
|
+
cll_node_ids = set(lineage_diff.diff.keys())
|
|
984
|
+
|
|
985
|
+
cll_tracker.set_init_nodes(len(cll_node_ids))
|
|
858
986
|
|
|
859
|
-
node_manifest = self.get_lineage_nodes_metadata(base=base)
|
|
860
987
|
nodes = {}
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
988
|
+
columns = {}
|
|
989
|
+
parent_map = {}
|
|
990
|
+
child_map = {}
|
|
991
|
+
|
|
992
|
+
if not no_upstream:
|
|
993
|
+
cll_node_ids = cll_node_ids.union(find_upstream(cll_node_ids, manifest_dict.get("parent_map")))
|
|
994
|
+
if not no_downstream:
|
|
995
|
+
cll_node_ids = cll_node_ids.union(find_downstream(cll_node_ids, manifest_dict.get("child_map")))
|
|
996
|
+
|
|
997
|
+
if not no_cll:
|
|
998
|
+
allowed_related_nodes = set()
|
|
999
|
+
for key in ["sources", "nodes", "exposures", "metrics"]:
|
|
1000
|
+
attr = getattr(manifest, key)
|
|
1001
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
1002
|
+
if hasattr(manifest, "semantic_models"):
|
|
1003
|
+
attr = getattr(manifest, "semantic_models")
|
|
1004
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
1005
|
+
for cll_node_id in cll_node_ids:
|
|
1006
|
+
if cll_node_id not in allowed_related_nodes:
|
|
1007
|
+
continue
|
|
1008
|
+
cll_data_one = deepcopy(self.get_cll_cached(cll_node_id, base=False))
|
|
1009
|
+
cll_tracker.increment_cll_nodes()
|
|
1010
|
+
if cll_data_one is None:
|
|
1011
|
+
continue
|
|
1012
|
+
|
|
1013
|
+
nodes[cll_node_id] = cll_data_one.nodes.get(cll_node_id)
|
|
1014
|
+
node_diff = None
|
|
1015
|
+
if change_analysis:
|
|
1016
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1017
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1018
|
+
if node_diff is not None:
|
|
1019
|
+
nodes[cll_node_id].change_status = node_diff.change_status
|
|
1020
|
+
if node_diff.change is not None:
|
|
1021
|
+
nodes[cll_node_id].change_category = node_diff.change.category
|
|
1022
|
+
for c_id, c in cll_data_one.columns.items():
|
|
1023
|
+
columns[c_id] = c
|
|
1024
|
+
if node_diff is not None:
|
|
1025
|
+
if node_diff.change_status == "added":
|
|
1026
|
+
c.change_status = "added"
|
|
1027
|
+
elif node_diff.change_status == "removed":
|
|
1028
|
+
c.change_status = "removed"
|
|
1029
|
+
elif node_diff.change is not None and node_diff.change.columns is not None:
|
|
1030
|
+
column_diff = node_diff.change.columns.get(c.name)
|
|
1031
|
+
if column_diff:
|
|
1032
|
+
c.change_status = column_diff
|
|
1033
|
+
|
|
1034
|
+
for p_id, parents in cll_data_one.parent_map.items():
|
|
1035
|
+
parent_map[p_id] = parents
|
|
1036
|
+
else:
|
|
1037
|
+
for cll_node_id in cll_node_ids:
|
|
1038
|
+
cll_node = None
|
|
1039
|
+
cll_node_columns: Dict[str, CllColumn] = {}
|
|
1040
|
+
|
|
1041
|
+
if cll_node_id in manifest.sources:
|
|
1042
|
+
cll_node = CllNode.build_cll_node(manifest, "sources", cll_node_id)
|
|
1043
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.sources:
|
|
1044
|
+
cll_node_columns = {
|
|
1045
|
+
column.name: CllColumn(
|
|
1046
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1047
|
+
table_id=cll_node_id,
|
|
1048
|
+
name=column.name,
|
|
1049
|
+
type=column.type,
|
|
1050
|
+
)
|
|
1051
|
+
for column in self.curr_catalog.sources[cll_node_id].columns.values()
|
|
1052
|
+
}
|
|
1053
|
+
elif cll_node_id in manifest.nodes:
|
|
1054
|
+
cll_node = CllNode.build_cll_node(manifest, "nodes", cll_node_id)
|
|
1055
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.nodes:
|
|
1056
|
+
cll_node_columns = {
|
|
1057
|
+
column.name: CllColumn(
|
|
1058
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1059
|
+
table_id=cll_node_id,
|
|
1060
|
+
name=column.name,
|
|
1061
|
+
type=column.type,
|
|
1062
|
+
)
|
|
1063
|
+
for column in self.curr_catalog.nodes[cll_node_id].columns.values()
|
|
1064
|
+
}
|
|
1065
|
+
elif cll_node_id in manifest.exposures:
|
|
1066
|
+
cll_node = CllNode.build_cll_node(manifest, "exposures", cll_node_id)
|
|
1067
|
+
elif hasattr(manifest, "semantic_models") and cll_node_id in manifest.semantic_models:
|
|
1068
|
+
cll_node = CllNode.build_cll_node(manifest, "semantic_models", cll_node_id)
|
|
1069
|
+
elif cll_node_id in manifest.metrics:
|
|
1070
|
+
cll_node = CllNode.build_cll_node(manifest, "metrics", cll_node_id)
|
|
1071
|
+
|
|
1072
|
+
if not cll_node:
|
|
1073
|
+
continue
|
|
1074
|
+
nodes[cll_node_id] = cll_node
|
|
1075
|
+
|
|
1076
|
+
node_diff = None
|
|
1077
|
+
if change_analysis:
|
|
1078
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1079
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1080
|
+
if node_diff is not None:
|
|
1081
|
+
cll_node.change_status = node_diff.change_status
|
|
1082
|
+
if node_diff.change is not None:
|
|
1083
|
+
cll_node.change_category = node_diff.change.category
|
|
1084
|
+
for c, cll_column in cll_node_columns.items():
|
|
1085
|
+
cll_node.columns[c] = cll_column
|
|
1086
|
+
columns[cll_column.id] = cll_column
|
|
1087
|
+
if node_diff.change.columns and c in node_diff.change.columns:
|
|
1088
|
+
cll_column.change_status = node_diff.change.columns[c]
|
|
1089
|
+
|
|
1090
|
+
parent_map[cll_node_id] = manifest.parent_map.get(cll_node_id, [])
|
|
1091
|
+
|
|
1092
|
+
# build the child map
|
|
1093
|
+
for parent_id, parents in parent_map.items():
|
|
1094
|
+
for parent in parents:
|
|
1095
|
+
if parent not in child_map:
|
|
1096
|
+
child_map[parent] = set()
|
|
1097
|
+
child_map[parent].add(parent_id)
|
|
1098
|
+
|
|
1099
|
+
# Find the anchor nodes
|
|
1100
|
+
anchor_node_ids = set()
|
|
1101
|
+
extra_node_ids = set()
|
|
1102
|
+
if node_id is None and column is None:
|
|
1103
|
+
if change_analysis:
|
|
1104
|
+
# If change analysis is requested, we need to find the nodes that have changes
|
|
1105
|
+
lineage_diff = self.get_lineage_diff()
|
|
1106
|
+
for nid, nd in lineage_diff.diff.items():
|
|
1107
|
+
if nd.change_status == "added":
|
|
1108
|
+
anchor_node_ids.add(nid)
|
|
1109
|
+
n = lineage_diff.current["nodes"].get(nid)
|
|
1110
|
+
n_columns = n.get("columns", {})
|
|
1111
|
+
for c in n_columns:
|
|
1112
|
+
anchor_node_ids.add(build_column_key(nid, c))
|
|
1113
|
+
continue
|
|
1114
|
+
if nd.change_status == "removed":
|
|
1115
|
+
extra_node_ids.add(nid)
|
|
1116
|
+
continue
|
|
1117
|
+
|
|
1118
|
+
node_diff = self.get_change_analysis_cached(nid)
|
|
1119
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1120
|
+
extra_node_ids.add(nid)
|
|
1121
|
+
if no_cll:
|
|
1122
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1123
|
+
anchor_node_ids.add(nid)
|
|
1124
|
+
else:
|
|
1125
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1126
|
+
anchor_node_ids.add(nid)
|
|
1127
|
+
if node_diff.change.columns is not None:
|
|
1128
|
+
for column_name in node_diff.change.columns:
|
|
1129
|
+
anchor_node_ids.add(f"{nid}_{column_name}")
|
|
1130
|
+
else:
|
|
1131
|
+
lineage_diff = self.get_lineage_diff()
|
|
1132
|
+
anchor_node_ids = lineage_diff.diff.keys()
|
|
1133
|
+
elif node_id is not None and column is None:
|
|
1134
|
+
if change_analysis:
|
|
1135
|
+
# If change analysis is requested, we need to find the nodes that have changes
|
|
1136
|
+
node_diff = self.get_change_analysis_cached(node_id)
|
|
1137
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1138
|
+
extra_node_ids.add(node_id)
|
|
1139
|
+
if no_cll:
|
|
1140
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1141
|
+
anchor_node_ids.add(node_id)
|
|
1142
|
+
else:
|
|
1143
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1144
|
+
anchor_node_ids.add(node_id)
|
|
1145
|
+
if node_diff.change.columns is not None:
|
|
1146
|
+
for column_name in node_diff.change.columns:
|
|
1147
|
+
anchor_node_ids.add(f"{node_id}_{column_name}")
|
|
1148
|
+
else:
|
|
1149
|
+
anchor_node_ids.add(node_id)
|
|
1150
|
+
else:
|
|
1151
|
+
anchor_node_ids.add(node_id)
|
|
1152
|
+
if not no_cll:
|
|
1153
|
+
node = nodes.get(node_id)
|
|
1154
|
+
if node:
|
|
1155
|
+
for column_name in node.columns:
|
|
1156
|
+
column_key = build_column_key(node_id, column_name)
|
|
1157
|
+
anchor_node_ids.add(column_key)
|
|
1158
|
+
else:
|
|
1159
|
+
anchor_node_ids.add(f"{node_id}_{column}")
|
|
1160
|
+
|
|
1161
|
+
cll_tracker.set_anchor_nodes(len(anchor_node_ids))
|
|
1162
|
+
result_node_ids = set(anchor_node_ids)
|
|
1163
|
+
if not no_upstream:
|
|
1164
|
+
result_node_ids = result_node_ids.union(find_upstream(anchor_node_ids, parent_map))
|
|
1165
|
+
if not no_downstream:
|
|
1166
|
+
result_node_ids = result_node_ids.union(find_downstream(anchor_node_ids, child_map))
|
|
1167
|
+
|
|
1168
|
+
# Filter the nodes and columns based on the anchor nodes
|
|
1169
|
+
if not no_filter:
|
|
1170
|
+
nodes = {k: v for k, v in nodes.items() if k in result_node_ids or k in extra_node_ids}
|
|
1171
|
+
columns = {k: v for k, v in columns.items() if k in result_node_ids or k in extra_node_ids}
|
|
1172
|
+
|
|
1173
|
+
for node in nodes.values():
|
|
1174
|
+
node.columns = {
|
|
1175
|
+
k: v for k, v in node.columns.items() if v.id in result_node_ids or v.id in extra_node_ids
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
if change_analysis:
|
|
1179
|
+
node.impacted = node.id in result_node_ids
|
|
1180
|
+
|
|
1181
|
+
parent_map, child_map = filter_dependency_maps(parent_map, child_map, result_node_ids)
|
|
865
1182
|
|
|
866
1183
|
cll_tracker.end_column_lineage()
|
|
867
|
-
cll_tracker.set_total_nodes(len(nodes))
|
|
868
|
-
log_performance(
|
|
1184
|
+
cll_tracker.set_total_nodes(len(nodes) + len(columns))
|
|
1185
|
+
log_performance("column level lineage", cll_tracker.to_dict())
|
|
869
1186
|
cll_tracker.reset()
|
|
870
1187
|
|
|
871
|
-
return
|
|
1188
|
+
return CllData(
|
|
1189
|
+
nodes=nodes,
|
|
1190
|
+
columns=columns,
|
|
1191
|
+
parent_map=parent_map,
|
|
1192
|
+
child_map=child_map,
|
|
1193
|
+
)
|
|
872
1194
|
|
|
873
1195
|
@lru_cache(maxsize=128)
|
|
874
|
-
def get_cll_cached(self, node_id: str, base: Optional[bool] = False):
|
|
875
|
-
|
|
1196
|
+
def get_cll_cached(self, node_id: str, base: Optional[bool] = False) -> Optional[CllData]:
|
|
1197
|
+
cll_tracker = CLLPerformanceTracking()
|
|
876
1198
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
col
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
for cl in column_lineage.values():
|
|
895
|
-
for depend_on in cl.depends_on:
|
|
896
|
-
if depend_on.node.startswith('__'):
|
|
897
|
-
for n in nodes.values():
|
|
898
|
-
if n.get('resource_type') != 'source':
|
|
899
|
-
continue
|
|
900
|
-
# __source__table -> source.table
|
|
901
|
-
source_table = depend_on.node.lstrip("_").replace("__", ".", 1).lower()
|
|
902
|
-
if source_table in n.get('id'):
|
|
903
|
-
depend_on.node = n.get('id')
|
|
904
|
-
break
|
|
905
|
-
else:
|
|
906
|
-
for n in nodes.values():
|
|
907
|
-
if n.get('name') == depend_on.node.lower():
|
|
908
|
-
depend_on.node = n.get('id')
|
|
909
|
-
break
|
|
1199
|
+
node, parent_list = self.get_cll_node(node_id, base=base)
|
|
1200
|
+
if node is None:
|
|
1201
|
+
return None
|
|
1202
|
+
|
|
1203
|
+
cll_tracker.set_total_nodes(1)
|
|
1204
|
+
cll_tracker.start_column_lineage()
|
|
1205
|
+
|
|
1206
|
+
def _apply_all_columns(node: CllNode, transformation_type):
|
|
1207
|
+
cll_data = CllData()
|
|
1208
|
+
cll_data.nodes[node.id] = node
|
|
1209
|
+
cll_data.parent_map[node.id] = set(parent_list)
|
|
1210
|
+
for col in node.columns.values():
|
|
1211
|
+
column_id = f"{node.id}_{col.name}"
|
|
1212
|
+
col.transformation_type = transformation_type
|
|
1213
|
+
cll_data.columns[column_id] = col
|
|
1214
|
+
cll_data.parent_map[column_id] = set()
|
|
1215
|
+
return cll_data
|
|
910
1216
|
|
|
911
|
-
cll_tracker = CLLPerformanceTracking()
|
|
912
|
-
nodes = self.get_lineage_nodes_metadata(base=base)
|
|
913
1217
|
manifest = as_manifest(self.get_manifest(base))
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
1218
|
+
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
1219
|
+
resource_type = node.resource_type
|
|
1220
|
+
if resource_type not in {"model", "seed", "source", "snapshot"}:
|
|
1221
|
+
return _apply_all_columns(node, "unknown")
|
|
917
1222
|
|
|
918
|
-
if resource_type ==
|
|
919
|
-
_apply_all_columns(node,
|
|
920
|
-
return
|
|
1223
|
+
if resource_type == "source" or resource_type == "seed":
|
|
1224
|
+
return _apply_all_columns(node, "source")
|
|
921
1225
|
|
|
922
|
-
if node.
|
|
923
|
-
_apply_all_columns(node,
|
|
924
|
-
return
|
|
1226
|
+
if node.raw_code is None or self.is_python_model(node.id, base=base):
|
|
1227
|
+
return _apply_all_columns(node, "unknown")
|
|
925
1228
|
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
_apply_all_columns(node, 'source', [])
|
|
929
|
-
return
|
|
1229
|
+
if node.name == "metricflow_time_spine":
|
|
1230
|
+
return _apply_all_columns(node, "source")
|
|
930
1231
|
|
|
931
|
-
if not node.
|
|
932
|
-
|
|
933
|
-
|
|
1232
|
+
if not node.columns:
|
|
1233
|
+
return _apply_all_columns(node, "unknown")
|
|
1234
|
+
|
|
1235
|
+
table_id_map = {}
|
|
934
1236
|
|
|
935
1237
|
def ref_func(*args):
|
|
1238
|
+
node_name: str = None
|
|
1239
|
+
project_or_package: str = None
|
|
1240
|
+
|
|
936
1241
|
if len(args) == 1:
|
|
937
|
-
|
|
938
|
-
elif len(args) > 1:
|
|
939
|
-
node = args[1]
|
|
1242
|
+
node_name = args[0]
|
|
940
1243
|
else:
|
|
941
|
-
|
|
942
|
-
|
|
1244
|
+
project_or_package = args[0]
|
|
1245
|
+
node_name = args[1]
|
|
943
1246
|
|
|
944
|
-
|
|
945
|
-
|
|
1247
|
+
for key, n in manifest.nodes.items():
|
|
1248
|
+
if n.name != node_name:
|
|
1249
|
+
continue
|
|
1250
|
+
if project_or_package is not None and n.package_name != project_or_package:
|
|
1251
|
+
continue
|
|
1252
|
+
|
|
1253
|
+
# replace id "." to "_"
|
|
1254
|
+
unique_id = n.unique_id
|
|
1255
|
+
table_name = unique_id.replace(".", "_")
|
|
1256
|
+
table_id_map[table_name.lower()] = unique_id
|
|
1257
|
+
return table_name
|
|
1258
|
+
|
|
1259
|
+
raise ValueError(f"Cannot find node {node_name} in the manifest")
|
|
1260
|
+
|
|
1261
|
+
def source_func(source_name, name):
|
|
1262
|
+
for key, n in manifest.sources.items():
|
|
1263
|
+
if n.source_name != source_name:
|
|
1264
|
+
continue
|
|
1265
|
+
if n.name != name:
|
|
1266
|
+
continue
|
|
1267
|
+
|
|
1268
|
+
# replace id "." to "_"
|
|
1269
|
+
unique_id = n.unique_id
|
|
1270
|
+
table_name = unique_id.replace(".", "_")
|
|
1271
|
+
table_id_map[table_name.lower()] = unique_id
|
|
1272
|
+
return table_name
|
|
946
1273
|
|
|
947
|
-
|
|
1274
|
+
raise ValueError(f"Cannot find source {source_name}.{name} in the manifest")
|
|
1275
|
+
|
|
1276
|
+
raw_code = node.raw_code
|
|
948
1277
|
jinja_context = dict(
|
|
949
1278
|
ref=ref_func,
|
|
950
1279
|
source=source_func,
|
|
951
1280
|
)
|
|
952
1281
|
|
|
953
1282
|
schema = {}
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
schema[name] = {
|
|
966
|
-
name: column.get('type') for name, column in columns.items()
|
|
967
|
-
}
|
|
1283
|
+
if catalog is not None:
|
|
1284
|
+
for parent_id in parent_list:
|
|
1285
|
+
table_name = parent_id.replace(".", "_")
|
|
1286
|
+
columns = {}
|
|
1287
|
+
if parent_id in catalog.nodes:
|
|
1288
|
+
for col_name, col_metadata in catalog.nodes[parent_id].columns.items():
|
|
1289
|
+
columns[col_name] = col_metadata.type
|
|
1290
|
+
if parent_id in catalog.sources:
|
|
1291
|
+
for col_name, col_metadata in catalog.sources[parent_id].columns.items():
|
|
1292
|
+
columns[col_name] = col_metadata.type
|
|
1293
|
+
schema[table_name] = columns
|
|
968
1294
|
|
|
969
1295
|
try:
|
|
970
|
-
# provide a manifest to speedup and not pollute the manifest
|
|
971
1296
|
compiled_sql = self.generate_sql(raw_code, base=base, context=jinja_context, provided_manifest=manifest)
|
|
972
1297
|
dialect = self.adapter.type()
|
|
973
|
-
# find adapter type from the manifest, otherwise we use the adapter type from the adapter
|
|
974
1298
|
if self.get_manifest(base).metadata.adapter_type is not None:
|
|
975
1299
|
dialect = self.get_manifest(base).metadata.adapter_type
|
|
976
|
-
|
|
1300
|
+
m2c, c2c_map = cll(compiled_sql, schema=schema, dialect=dialect)
|
|
977
1301
|
except RecceException:
|
|
978
|
-
# TODO: provide parsing error message if needed
|
|
979
|
-
_apply_all_columns(node, 'unknown', [])
|
|
980
1302
|
cll_tracker.increment_sqlglot_error_nodes()
|
|
981
|
-
return
|
|
1303
|
+
return _apply_all_columns(node, "unknown")
|
|
982
1304
|
except Exception:
|
|
983
|
-
_apply_all_columns(node, 'unknown', [])
|
|
984
1305
|
cll_tracker.increment_other_error_nodes()
|
|
985
|
-
return
|
|
986
|
-
|
|
987
|
-
|
|
1306
|
+
return _apply_all_columns(node, "unknown")
|
|
1307
|
+
|
|
1308
|
+
# Add cll dependency to the node.
|
|
1309
|
+
cll_data = CllData()
|
|
1310
|
+
cll_data.nodes[node.id] = node
|
|
1311
|
+
cll_data.columns = {f"{node.id}_{col.name}": col for col in node.columns.values()}
|
|
1312
|
+
|
|
1313
|
+
# parent map for node
|
|
1314
|
+
depends_on = set(parent_list)
|
|
1315
|
+
for d in m2c:
|
|
1316
|
+
parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
|
|
1317
|
+
depends_on.add(parent_key)
|
|
1318
|
+
cll_data.parent_map[node_id] = depends_on
|
|
1319
|
+
|
|
1320
|
+
# parent map for columns
|
|
1321
|
+
for name, column in node.columns.items():
|
|
1322
|
+
depends_on = set()
|
|
1323
|
+
column_id = f"{node.id}_{name}"
|
|
1324
|
+
if name in c2c_map:
|
|
1325
|
+
for d in c2c_map[name].depends_on:
|
|
1326
|
+
parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
|
|
1327
|
+
depends_on.add(parent_key)
|
|
1328
|
+
column.transformation_type = c2c_map[name].transformation_type
|
|
1329
|
+
cll_data.parent_map[column_id] = set(depends_on)
|
|
988
1330
|
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1331
|
+
cll_tracker.end_column_lineage()
|
|
1332
|
+
log_performance("column level lineage per node", cll_tracker.to_dict())
|
|
1333
|
+
cll_tracker.reset()
|
|
1334
|
+
return cll_data
|
|
993
1335
|
|
|
994
|
-
|
|
995
|
-
def get_lineage_nodes_metadata(self, base: Optional[bool] = False):
|
|
1336
|
+
def get_cll_node(self, node_id: str, base: Optional[bool] = False) -> Tuple[Optional[CllNode], list[str]]:
|
|
996
1337
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
997
1338
|
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
998
|
-
|
|
1339
|
+
parent_list = []
|
|
1340
|
+
node = None
|
|
999
1341
|
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1342
|
+
# model, seed, snapshot
|
|
1343
|
+
if node_id in manifest.nodes:
|
|
1344
|
+
found = manifest.nodes[node_id]
|
|
1345
|
+
unique_id = found.unique_id
|
|
1346
|
+
node = CllNode.build_cll_node(manifest, "nodes", node_id)
|
|
1347
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1348
|
+
parent_list = found.depends_on.nodes
|
|
1004
1349
|
|
|
1005
|
-
if
|
|
1006
|
-
|
|
1350
|
+
if catalog is not None and node is not None and unique_id in catalog.nodes:
|
|
1351
|
+
columns = {}
|
|
1352
|
+
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
1353
|
+
column_id = f"{unique_id}_{col_name}"
|
|
1354
|
+
col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
|
|
1355
|
+
columns[col_name] = col
|
|
1356
|
+
node.columns = columns
|
|
1007
1357
|
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1358
|
+
# source
|
|
1359
|
+
if node_id in manifest.sources:
|
|
1360
|
+
found = manifest.sources[node_id]
|
|
1361
|
+
unique_id = found.unique_id
|
|
1362
|
+
node = CllNode.build_cll_node(manifest, "sources", node_id)
|
|
1363
|
+
parent_list = []
|
|
1014
1364
|
|
|
1015
|
-
if catalog is not None and unique_id in catalog.
|
|
1365
|
+
if catalog is not None and node is not None and unique_id in catalog.sources:
|
|
1016
1366
|
columns = {}
|
|
1017
|
-
for col_name, col_metadata in catalog.
|
|
1018
|
-
|
|
1367
|
+
for col_name, col_metadata in catalog.sources[unique_id].columns.items():
|
|
1368
|
+
column_id = f"{unique_id}_{col_name}"
|
|
1369
|
+
col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
|
|
1019
1370
|
columns[col_name] = col
|
|
1020
|
-
|
|
1371
|
+
node.columns = columns
|
|
1021
1372
|
|
|
1022
|
-
|
|
1023
|
-
|
|
1373
|
+
# exposure
|
|
1374
|
+
if node_id in manifest.exposures:
|
|
1375
|
+
found = manifest.exposures[node_id]
|
|
1376
|
+
node = CllNode.build_cll_node(manifest, "exposures", node_id)
|
|
1377
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1378
|
+
parent_list = found.depends_on.nodes
|
|
1024
1379
|
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1380
|
+
if hasattr(manifest, "semantic_models") and node_id in manifest.semantic_models:
|
|
1381
|
+
found = manifest.semantic_models[node_id]
|
|
1382
|
+
node = CllNode.build_cll_node(manifest, "semantic_models", node_id)
|
|
1383
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1384
|
+
parent_list = found.depends_on.nodes
|
|
1030
1385
|
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
}
|
|
1037
|
-
for col_name, col_metadata in catalog.sources[unique_id].columns.items()
|
|
1038
|
-
}
|
|
1386
|
+
if node_id in manifest.metrics:
|
|
1387
|
+
found = manifest.metrics[node_id]
|
|
1388
|
+
node = CllNode.build_cll_node(manifest, "metrics", node_id)
|
|
1389
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1390
|
+
parent_list = found.depends_on.nodes
|
|
1039
1391
|
|
|
1040
|
-
return
|
|
1392
|
+
return node, parent_list
|
|
1041
1393
|
|
|
1042
1394
|
def get_manifests_by_id(self, unique_id: str):
|
|
1043
1395
|
curr_manifest = self.get_manifest(base=False)
|
|
1044
1396
|
base_manifest = self.get_manifest(base=True)
|
|
1045
1397
|
if unique_id in curr_manifest.nodes.keys() or unique_id in base_manifest.nodes.keys():
|
|
1046
1398
|
return {
|
|
1047
|
-
|
|
1048
|
-
|
|
1399
|
+
"current": curr_manifest.nodes.get(unique_id),
|
|
1400
|
+
"base": base_manifest.nodes.get(unique_id),
|
|
1049
1401
|
}
|
|
1050
1402
|
return None
|
|
1051
1403
|
|
|
@@ -1068,39 +1420,40 @@ class DbtAdapter(BaseAdapter):
|
|
|
1068
1420
|
if self.base_path:
|
|
1069
1421
|
self.artifacts_observer.schedule(event_handler, self.base_path, recursive=False)
|
|
1070
1422
|
self.artifacts_observer.start()
|
|
1071
|
-
logger.info(
|
|
1423
|
+
logger.info("Start monitoring dbt artifacts")
|
|
1072
1424
|
|
|
1073
1425
|
def stop_monitor_artifacts(self):
|
|
1074
1426
|
if self.artifacts_files:
|
|
1075
1427
|
self.artifacts_observer.stop()
|
|
1076
1428
|
self.artifacts_observer.join()
|
|
1077
|
-
logger.info(
|
|
1429
|
+
logger.info("Stop monitoring artifacts")
|
|
1078
1430
|
|
|
1079
1431
|
def start_monitor_base_env(self, callback: Callable = None):
|
|
1080
|
-
target_base_dir = os.path.join(self.runtime_config.project_root,
|
|
1432
|
+
target_base_dir = os.path.join(self.runtime_config.project_root, "target-base")
|
|
1081
1433
|
base_env_files = {
|
|
1082
|
-
os.path.join(target_base_dir,
|
|
1083
|
-
os.path.join(target_base_dir,
|
|
1434
|
+
os.path.join(target_base_dir, "manifest.json"),
|
|
1435
|
+
os.path.join(target_base_dir, "catalog.json"),
|
|
1084
1436
|
}
|
|
1085
1437
|
event_handler = EnvironmentEventHandler(self.base_env_observer, base_env_files, callback=callback)
|
|
1086
1438
|
self.base_env_observer.schedule(event_handler, self.runtime_config.project_root, recursive=True)
|
|
1087
1439
|
self.base_env_observer.start()
|
|
1088
|
-
logger.info(
|
|
1440
|
+
logger.info("Start monitoring base environment")
|
|
1089
1441
|
|
|
1090
1442
|
def stop_monitor_base_env(self):
|
|
1091
1443
|
if self.base_env_observer.is_alive():
|
|
1092
1444
|
self.base_env_observer.stop()
|
|
1093
1445
|
self.base_env_observer.join()
|
|
1094
|
-
logger.info(
|
|
1095
|
-
|
|
1096
|
-
def set_artifacts(
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1446
|
+
logger.info("Stop monitoring base environment")
|
|
1447
|
+
|
|
1448
|
+
def set_artifacts(
|
|
1449
|
+
self,
|
|
1450
|
+
base_manifest: WritableManifest,
|
|
1451
|
+
curr_manifest: WritableManifest,
|
|
1452
|
+
manifest: Manifest,
|
|
1453
|
+
previous_manifest: Manifest,
|
|
1454
|
+
base_catalog: CatalogArtifact,
|
|
1455
|
+
curr_catalog: CatalogArtifact,
|
|
1456
|
+
):
|
|
1104
1457
|
self.curr_manifest = curr_manifest
|
|
1105
1458
|
self.base_manifest = base_manifest
|
|
1106
1459
|
self.manifest = manifest
|
|
@@ -1109,7 +1462,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1109
1462
|
self.previous_state = previous_state(
|
|
1110
1463
|
Path(self.base_path),
|
|
1111
1464
|
Path(self.runtime_config.target_path),
|
|
1112
|
-
Path(self.runtime_config.project_root)
|
|
1465
|
+
Path(self.runtime_config.project_root),
|
|
1113
1466
|
)
|
|
1114
1467
|
self.previous_state.manifest = previous_manifest
|
|
1115
1468
|
|
|
@@ -1131,19 +1484,22 @@ class DbtAdapter(BaseAdapter):
|
|
|
1131
1484
|
# we capture the original manifest as base and only update the current
|
|
1132
1485
|
target_type = os.path.basename(os.path.dirname(refresh_file_path))
|
|
1133
1486
|
if self.target_path and target_type == os.path.basename(self.target_path):
|
|
1134
|
-
if refresh_file_path.endswith(
|
|
1487
|
+
if refresh_file_path.endswith("manifest.json"):
|
|
1135
1488
|
self.curr_manifest = load_manifest(path=refresh_file_path)
|
|
1136
1489
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1137
1490
|
self.get_cll_cached.cache_clear()
|
|
1138
|
-
self.
|
|
1139
|
-
elif refresh_file_path.endswith(
|
|
1491
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1492
|
+
elif refresh_file_path.endswith("catalog.json"):
|
|
1140
1493
|
self.curr_catalog = load_catalog(path=refresh_file_path)
|
|
1141
|
-
self.
|
|
1494
|
+
self.get_cll_cached.cache_clear()
|
|
1495
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1142
1496
|
elif self.base_path and target_type == os.path.basename(self.base_path):
|
|
1143
|
-
if refresh_file_path.endswith(
|
|
1497
|
+
if refresh_file_path.endswith("manifest.json"):
|
|
1144
1498
|
self.base_manifest = load_manifest(path=refresh_file_path)
|
|
1145
|
-
|
|
1499
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1500
|
+
elif refresh_file_path.endswith("catalog.json"):
|
|
1146
1501
|
self.base_catalog = load_catalog(path=refresh_file_path)
|
|
1502
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1147
1503
|
|
|
1148
1504
|
def create_relation(self, model, base=False):
|
|
1149
1505
|
node = self.find_node_by_name(model, base)
|
|
@@ -1157,42 +1513,69 @@ class DbtAdapter(BaseAdapter):
|
|
|
1157
1513
|
select: Optional[str] = None,
|
|
1158
1514
|
exclude: Optional[str] = None,
|
|
1159
1515
|
packages: Optional[list[str]] = None,
|
|
1160
|
-
view_mode: Optional[Literal[
|
|
1516
|
+
view_mode: Optional[Literal["all", "changed_models"]] = None,
|
|
1161
1517
|
) -> Set[str]:
|
|
1162
|
-
from dbt.graph import NodeSelector
|
|
1163
|
-
from dbt.compilation import Compiler
|
|
1164
|
-
from dbt.graph import parse_difference, SelectionIntersection, SelectionUnion
|
|
1165
1518
|
import dbt.compilation
|
|
1519
|
+
from dbt.compilation import Compiler
|
|
1520
|
+
from dbt.graph import (
|
|
1521
|
+
NodeSelector,
|
|
1522
|
+
SelectionIntersection,
|
|
1523
|
+
SelectionUnion,
|
|
1524
|
+
parse_difference,
|
|
1525
|
+
)
|
|
1166
1526
|
|
|
1167
1527
|
select_list = [select] if select else None
|
|
1168
1528
|
exclude_list = [exclude] if exclude else None
|
|
1169
1529
|
|
|
1170
1530
|
def _parse_difference(include, exclude):
|
|
1171
|
-
if dbt_version <
|
|
1531
|
+
if dbt_version < "v1.8":
|
|
1172
1532
|
return parse_difference(include, exclude, "eager")
|
|
1173
1533
|
else:
|
|
1174
1534
|
return parse_difference(include, exclude)
|
|
1175
1535
|
|
|
1176
1536
|
specs = [_parse_difference(select_list, exclude_list)]
|
|
1177
1537
|
|
|
1538
|
+
# If packages is not provided, use the project name from manifest metadata as default
|
|
1539
|
+
if packages is None:
|
|
1540
|
+
if (
|
|
1541
|
+
self.manifest.metadata
|
|
1542
|
+
and hasattr(self.manifest.metadata, "project_name")
|
|
1543
|
+
and self.manifest.metadata.project_name
|
|
1544
|
+
):
|
|
1545
|
+
packages = [self.manifest.metadata.project_name]
|
|
1546
|
+
|
|
1178
1547
|
if packages is not None:
|
|
1179
|
-
package_spec = SelectionUnion([_parse_difference([f
|
|
1548
|
+
package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
|
|
1180
1549
|
specs.append(package_spec)
|
|
1181
|
-
if view_mode and view_mode ==
|
|
1182
|
-
specs.append(_parse_difference([
|
|
1550
|
+
if view_mode and view_mode == "changed_models":
|
|
1551
|
+
specs.append(_parse_difference(["1+state:modified+"], None))
|
|
1183
1552
|
spec = SelectionIntersection(specs)
|
|
1184
1553
|
|
|
1185
1554
|
manifest = Manifest()
|
|
1555
|
+
manifest.metadata.adapter_type = self.adapter.type()
|
|
1186
1556
|
manifest_prev = self.previous_state.manifest
|
|
1187
1557
|
manifest_curr = self.manifest
|
|
1188
1558
|
|
|
1189
|
-
manifest.nodes = {**
|
|
1559
|
+
manifest.nodes = {**manifest_curr.nodes}
|
|
1560
|
+
# # mark a node is removed if the node id is no in the curr nodes
|
|
1561
|
+
for node_id, node in manifest_prev.nodes.items():
|
|
1562
|
+
if node_id not in manifest.nodes:
|
|
1563
|
+
node_dict = node.to_dict()
|
|
1564
|
+
if "raw_code" in node_dict:
|
|
1565
|
+
node_dict["raw_code"] = "__removed__"
|
|
1566
|
+
node_class = type(node)
|
|
1567
|
+
removed_node = node_class.from_dict(node_dict)
|
|
1568
|
+
manifest.nodes[node_id] = removed_node
|
|
1569
|
+
|
|
1190
1570
|
manifest.macros = {**manifest_prev.macros, **manifest_curr.macros}
|
|
1191
1571
|
manifest.sources = {**manifest_prev.sources, **manifest_curr.sources}
|
|
1192
1572
|
manifest.exposures = {**manifest_prev.exposures, **manifest_curr.exposures}
|
|
1193
1573
|
manifest.metrics = {**manifest_prev.metrics, **manifest_curr.metrics}
|
|
1194
|
-
if hasattr(manifest_prev,
|
|
1195
|
-
manifest.semantic_models = {
|
|
1574
|
+
if hasattr(manifest_prev, "semantic_models"):
|
|
1575
|
+
manifest.semantic_models = {
|
|
1576
|
+
**manifest_prev.semantic_models,
|
|
1577
|
+
**manifest_curr.semantic_models,
|
|
1578
|
+
}
|
|
1196
1579
|
|
|
1197
1580
|
compiler = Compiler(self.runtime_config)
|
|
1198
1581
|
# disable to print compile states
|
|
@@ -1207,28 +1590,28 @@ class DbtAdapter(BaseAdapter):
|
|
|
1207
1590
|
return selector.get_selected(spec)
|
|
1208
1591
|
|
|
1209
1592
|
def export_artifacts(self) -> ArtifactsRoot:
|
|
1210
|
-
|
|
1593
|
+
"""
|
|
1211
1594
|
Export the artifacts from the current state
|
|
1212
|
-
|
|
1595
|
+
"""
|
|
1213
1596
|
artifacts = ArtifactsRoot()
|
|
1214
1597
|
|
|
1215
1598
|
def _load_artifact(artifact):
|
|
1216
1599
|
return artifact.to_dict() if artifact else None
|
|
1217
1600
|
|
|
1218
1601
|
artifacts.base = {
|
|
1219
|
-
|
|
1220
|
-
|
|
1602
|
+
"manifest": _load_artifact(self.base_manifest),
|
|
1603
|
+
"catalog": _load_artifact(self.base_catalog),
|
|
1221
1604
|
}
|
|
1222
1605
|
artifacts.current = {
|
|
1223
|
-
|
|
1224
|
-
|
|
1606
|
+
"manifest": _load_artifact(self.curr_manifest),
|
|
1607
|
+
"catalog": _load_artifact(self.curr_catalog),
|
|
1225
1608
|
}
|
|
1226
1609
|
return artifacts
|
|
1227
1610
|
|
|
1228
1611
|
def export_artifacts_from_file(self) -> ArtifactsRoot:
|
|
1229
|
-
|
|
1612
|
+
"""
|
|
1230
1613
|
Export the artifacts from the state file. This is the old implementation
|
|
1231
|
-
|
|
1614
|
+
"""
|
|
1232
1615
|
artifacts = ArtifactsRoot()
|
|
1233
1616
|
target_path = self.runtime_config.target_path
|
|
1234
1617
|
target_base_path = self.base_path
|
|
@@ -1237,18 +1620,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
1237
1620
|
if not os.path.isfile(path):
|
|
1238
1621
|
return None
|
|
1239
1622
|
|
|
1240
|
-
with open(path,
|
|
1623
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
1241
1624
|
json_content = f.read()
|
|
1242
1625
|
return json.loads(json_content)
|
|
1243
1626
|
|
|
1244
1627
|
project_root = self.runtime_config.project_root
|
|
1245
1628
|
artifacts.base = {
|
|
1246
|
-
|
|
1247
|
-
|
|
1629
|
+
"manifest": _load_artifact(os.path.join(project_root, target_base_path, "manifest.json")),
|
|
1630
|
+
"catalog": _load_artifact(os.path.join(project_root, target_base_path, "catalog.json")),
|
|
1248
1631
|
}
|
|
1249
1632
|
artifacts.current = {
|
|
1250
|
-
|
|
1251
|
-
|
|
1633
|
+
"manifest": _load_artifact(os.path.join(project_root, target_path, "manifest.json")),
|
|
1634
|
+
"catalog": _load_artifact(os.path.join(project_root, target_path, "catalog.json")),
|
|
1252
1635
|
}
|
|
1253
1636
|
return artifacts
|
|
1254
1637
|
|
|
@@ -1256,7 +1639,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1256
1639
|
# Merge the artifacts from the state file or cloud
|
|
1257
1640
|
def _select_artifact(
|
|
1258
1641
|
original: Union[WritableManifest, CatalogArtifact],
|
|
1259
|
-
new: Union[WritableManifest, CatalogArtifact]
|
|
1642
|
+
new: Union[WritableManifest, CatalogArtifact],
|
|
1260
1643
|
):
|
|
1261
1644
|
if merge:
|
|
1262
1645
|
if not original:
|
|
@@ -1267,16 +1650,16 @@ class DbtAdapter(BaseAdapter):
|
|
|
1267
1650
|
else:
|
|
1268
1651
|
return new
|
|
1269
1652
|
|
|
1270
|
-
self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get(
|
|
1271
|
-
self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get(
|
|
1272
|
-
self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get(
|
|
1273
|
-
self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get(
|
|
1653
|
+
self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get("manifest")))
|
|
1654
|
+
self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get("manifest")))
|
|
1655
|
+
self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get("catalog")))
|
|
1656
|
+
self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get("catalog")))
|
|
1274
1657
|
|
|
1275
1658
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1276
1659
|
self.previous_state = previous_state(
|
|
1277
1660
|
Path(self.base_path),
|
|
1278
1661
|
Path(self.runtime_config.target_path),
|
|
1279
|
-
Path(self.runtime_config.project_root)
|
|
1662
|
+
Path(self.runtime_config.project_root),
|
|
1280
1663
|
)
|
|
1281
1664
|
self.previous_state.manifest = as_manifest(self.base_manifest)
|
|
1282
1665
|
|
|
@@ -1292,7 +1675,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
1292
1675
|
|
|
1293
1676
|
if not self.curr_manifest or not self.base_manifest:
|
|
1294
1677
|
raise Exception(
|
|
1295
|
-
|
|
1678
|
+
"No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state"
|
|
1679
|
+
)
|
|
1296
1680
|
|
|
1297
1681
|
@contextmanager
|
|
1298
1682
|
def connection_named(self, name: str) -> Iterator[None]:
|