recce-nightly 1.10.0.20250625__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +5 -0
- recce/adapter/dbt_adapter/__init__.py +343 -245
- recce/apis/check_api.py +20 -14
- recce/apis/check_events_api.py +353 -0
- recce/apis/check_func.py +5 -5
- recce/apis/run_func.py +32 -3
- recce/artifact.py +76 -3
- recce/cli.py +705 -82
- recce/config.py +2 -2
- recce/connect_to_cloud.py +1 -1
- recce/core.py +3 -3
- recce/data/404/index.html +2 -0
- recce/data/404.html +2 -22
- recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
- recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
- recce/data/__next.__PAGE__.txt +6 -0
- recce/data/__next._full.txt +32 -0
- recce/data/__next._head.txt +8 -0
- recce/data/__next._index.txt +14 -0
- recce/data/__next._tree.txt +8 -0
- recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
- recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
- recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
- recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
- recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
- recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
- recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
- recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
- recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
- recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
- recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
- recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
- recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
- recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
- recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
- recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
- recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
- recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
- recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
- recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
- recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
- recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
- recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
- recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
- recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
- recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
- recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
- recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
- recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
- recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
- recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
- recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
- recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
- recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
- recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
- recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
- recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-cyrillic-800-normal.bd5c9f50.woff → montserrat-cyrillic-800-normal.f9d58125.woff} +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-800-normal.fc315020.woff → montserrat-latin-800-normal.d5761935.woff} +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-ext-800-normal.2e5381b2.woff → montserrat-latin-ext-800-normal.b671449b.woff} +0 -0
- recce/data/_next/static/media/{montserrat-vietnamese-800-normal.20c545e6.woff → montserrat-vietnamese-800-normal.9f7b8541.woff} +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
- recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
- recce/data/_not-found/__next._full.txt +24 -0
- recce/data/_not-found/__next._head.txt +8 -0
- recce/data/_not-found/__next._index.txt +13 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +6 -0
- recce/data/_not-found/index.html +2 -0
- recce/data/_not-found/index.txt +24 -0
- recce/data/auth_callback.html +1 -1
- recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/checks/__next._full.txt +39 -0
- recce/data/checks/__next._head.txt +8 -0
- recce/data/checks/__next._index.txt +14 -0
- recce/data/checks/__next._tree.txt +8 -0
- recce/data/checks/__next.checks.__PAGE__.txt +10 -0
- recce/data/checks/__next.checks.txt +4 -0
- recce/data/checks/index.html +2 -0
- recce/data/checks/index.txt +39 -0
- recce/data/index.html +2 -27
- recce/data/index.txt +32 -8
- recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/lineage/__next._full.txt +39 -0
- recce/data/lineage/__next._head.txt +8 -0
- recce/data/lineage/__next._index.txt +14 -0
- recce/data/lineage/__next._tree.txt +8 -0
- recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
- recce/data/lineage/__next.lineage.txt +4 -0
- recce/data/lineage/index.html +2 -0
- recce/data/lineage/index.txt +39 -0
- recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
- recce/data/query/__next._full.txt +37 -0
- recce/data/query/__next._head.txt +8 -0
- recce/data/query/__next._index.txt +14 -0
- recce/data/query/__next._tree.txt +8 -0
- recce/data/query/__next.query.__PAGE__.txt +9 -0
- recce/data/query/__next.query.txt +4 -0
- recce/data/query/index.html +2 -0
- recce/data/query/index.txt +37 -0
- recce/event/CONFIG.bak +1 -0
- recce/event/__init__.py +9 -8
- recce/event/collector.py +6 -2
- recce/event/track.py +10 -0
- recce/github.py +1 -1
- recce/mcp_server.py +725 -0
- recce/models/check.py +433 -15
- recce/models/types.py +61 -2
- recce/pull_request.py +1 -1
- recce/run.py +37 -17
- recce/server.py +216 -21
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +644 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +25 -3
- recce/tasks/dataframe.py +63 -1
- recce/tasks/query.py +40 -3
- recce/tasks/rowcount.py +4 -1
- recce/tasks/schema.py +4 -1
- recce/tasks/utils.py +147 -0
- recce/tasks/valuediff.py +85 -57
- recce/util/api_token.py +11 -2
- recce/util/breaking.py +10 -1
- recce/util/cll.py +1 -2
- recce/util/cloud/__init__.py +15 -0
- recce/util/cloud/base.py +115 -0
- recce/util/cloud/check_events.py +190 -0
- recce/util/cloud/checks.py +242 -0
- recce/util/io.py +2 -2
- recce/util/lineage.py +19 -18
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +254 -5
- recce/util/startup_perf.py +121 -0
- recce/yaml/__init__.py +2 -2
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/METADATA +91 -71
- recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
- recce/data/_next/static/abCX3x3UoIdRLEDWxx4xd/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/181-acc61ddada3bc0ca.js +0 -43
- recce/data/_next/static/chunks/1bff33f1-1ef85cf5e658a751.js +0 -1
- recce/data/_next/static/chunks/217-879a84d70f7a907c.js +0 -2
- recce/data/_next/static/chunks/29e3cc0d-60045b2e47aa3916.js +0 -1
- recce/data/_next/static/chunks/36e1c10d-8e7be4a6c1f6ab2d.js +0 -1
- recce/data/_next/static/chunks/3998a672-03adacad07b346ac.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-1081c360214f9602.js +0 -1
- recce/data/_next/static/chunks/42-cd3c06533f5fd47c.js +0 -9
- recce/data/_next/static/chunks/450c323b-fd94e7ffaa4a5efa.js +0 -1
- recce/data/_next/static/chunks/47d8844f-929aed9b1c73a905.js +0 -1
- recce/data/_next/static/chunks/608-3b079b544e5d5f5e.js +0 -15
- recce/data/_next/static/chunks/6dc81886-adbfa45836061d79.js +0 -1
- recce/data/_next/static/chunks/7a8a3e83-edf6dc64b5d5f0a5.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-d5f0438edd5c2a5b.js +0 -1
- recce/data/_next/static/chunks/86730205-cfb14e3f051bab35.js +0 -1
- recce/data/_next/static/chunks/8d700b6a.8bb140898499c512.js +0 -1
- recce/data/_next/static/chunks/92-607cd1af83c41f43.js +0 -1
- recce/data/_next/static/chunks/9746af58-a42b7d169cacadf0.js +0 -1
- recce/data/_next/static/chunks/a30376cd-de84559016d7e133.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-01ed58b7f971d311.js +0 -1
- recce/data/_next/static/chunks/app/layout-177a410a97e0d018.js +0 -1
- recce/data/_next/static/chunks/app/page-da6e046a8235dbfc.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-4282bdcf459e075c.js +0 -1
- recce/data/_next/static/chunks/bbda5537-9ec25eb1dd62348a.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-08cb668a789d6afd.js +0 -1
- recce/data/_next/static/chunks/ce84277d-2e5d1d46910cf052.js +0 -1
- recce/data/_next/static/chunks/febdd86e-c6b525341634b860.js +0 -54
- recce/data/_next/static/chunks/fee69bc6-2dbccaf9b90474e6.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-app-39061b0166c47f55.js +0 -1
- recce/data/_next/static/chunks/main-b5b3ae20a1405261.js +0 -1
- recce/data/_next/static/chunks/pages/_app-437c455677d62394.js +0 -1
- recce/data/_next/static/chunks/pages/_error-e7650df18ca04bde.js +0 -1
- recce/data/_next/static/chunks/webpack-7b49d5ba7e3a434d.js +0 -1
- recce/data/_next/static/css/17a96168e3a9db13.css +0 -1
- recce/data/_next/static/css/1b121dc4d36aeb4d.css +0 -3
- recce/data/_next/static/css/35c6679a098e1e34.css +0 -1
- recce/data/_next/static/css/951e2e0eea2d4a5b.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/data/_next/static/media/reload-image.79aabb7d.svg +0 -4
- recce/state.py +0 -786
- recce_nightly-1.10.0.20250625.dist-info/RECORD +0 -154
- recce_nightly-1.10.0.20250625.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -0
- tests/adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/__init__.py +0 -0
- tests/adapter/dbt_adapter/conftest.py +0 -17
- tests/adapter/dbt_adapter/dbt_test_helper.py +0 -298
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -25
- tests/adapter/dbt_adapter/test_dbt_cll.py +0 -384
- tests/adapter/dbt_adapter/test_selector.py +0 -202
- tests/tasks/__init__.py +0 -0
- tests/tasks/conftest.py +0 -4
- tests/tasks/test_histogram.py +0 -129
- tests/tasks/test_lineage.py +0 -55
- tests/tasks/test_preset_checks.py +0 -64
- tests/tasks/test_profile.py +0 -397
- tests/tasks/test_query.py +0 -151
- tests/tasks/test_row_count.py +0 -135
- tests/tasks/test_schema.py +0 -122
- tests/tasks/test_top_k.py +0 -77
- tests/tasks/test_valuediff.py +0 -85
- tests/test_cli.py +0 -133
- tests/test_config.py +0 -43
- tests/test_connect_to_cloud.py +0 -82
- tests/test_core.py +0 -29
- tests/test_dbt.py +0 -36
- tests/test_pull_request.py +0 -130
- tests/test_server.py +0 -104
- tests/test_state.py +0 -134
- tests/test_summary.py +0 -65
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- /recce/data/_next/static/media/{montserrat-cyrillic-ext-800-normal.e6e0d8d0.woff → montserrat-cyrillic-ext-800-normal.a4fa76b5.woff} +0 -0
- /recce/data/_next/static/{abCX3x3UoIdRLEDWxx4xd → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import uuid
|
|
5
5
|
from contextlib import contextmanager
|
|
6
|
+
from copy import deepcopy
|
|
6
7
|
from dataclasses import dataclass, fields
|
|
7
8
|
from errno import ENOENT
|
|
8
9
|
from functools import lru_cache
|
|
@@ -25,12 +26,13 @@ from recce.event import log_performance
|
|
|
25
26
|
from recce.exceptions import RecceException
|
|
26
27
|
from recce.util.cll import CLLPerformanceTracking, cll
|
|
27
28
|
from recce.util.lineage import (
|
|
29
|
+
build_column_key,
|
|
28
30
|
filter_dependency_maps,
|
|
29
|
-
filter_lineage_vertices,
|
|
30
|
-
find_column_dependencies,
|
|
31
31
|
find_downstream,
|
|
32
32
|
find_upstream,
|
|
33
33
|
)
|
|
34
|
+
from recce.util.perf_tracking import LineagePerfTracker
|
|
35
|
+
from recce.util.startup_perf import track_timing
|
|
34
36
|
|
|
35
37
|
from ...tasks.profile import ProfileTask
|
|
36
38
|
from ...util.breaking import BreakingPerformanceTracking, parse_change_category
|
|
@@ -109,6 +111,7 @@ from dbt.config.runtime import RuntimeConfig # noqa: E402
|
|
|
109
111
|
from dbt.contracts.graph.manifest import ( # noqa: E402
|
|
110
112
|
MacroManifest,
|
|
111
113
|
Manifest,
|
|
114
|
+
ManifestMetadata,
|
|
112
115
|
WritableManifest,
|
|
113
116
|
)
|
|
114
117
|
from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
|
|
@@ -208,9 +211,12 @@ def as_manifest(m: WritableManifest) -> Manifest:
|
|
|
208
211
|
new_data = {k: v for k, v in data.items() if k in all_fields}
|
|
209
212
|
return Manifest(**new_data)
|
|
210
213
|
else:
|
|
211
|
-
|
|
214
|
+
result = Manifest.from_writable_manifest(m)
|
|
215
|
+
result.metadata = ManifestMetadata(**m.metadata.__dict__)
|
|
216
|
+
return result
|
|
212
217
|
|
|
213
218
|
|
|
219
|
+
@track_timing(record_size=True)
|
|
214
220
|
def load_manifest(path: str = None, data: dict = None):
|
|
215
221
|
if path is not None:
|
|
216
222
|
if not os.path.isfile(path):
|
|
@@ -220,6 +226,7 @@ def load_manifest(path: str = None, data: dict = None):
|
|
|
220
226
|
return WritableManifest.upgrade_schema_version(data)
|
|
221
227
|
|
|
222
228
|
|
|
229
|
+
@track_timing(record_size=True)
|
|
223
230
|
def load_catalog(path: str = None, data: dict = None):
|
|
224
231
|
if path is not None:
|
|
225
232
|
if not os.path.isfile(path):
|
|
@@ -278,7 +285,7 @@ class DbtArgs:
|
|
|
278
285
|
target_path: Optional[str] = (None,)
|
|
279
286
|
project_only_flags: Optional[Dict[str, Any]] = None
|
|
280
287
|
which: Optional[str] = None
|
|
281
|
-
state_modified_compare_more_unrendered_values: Optional[bool] =
|
|
288
|
+
state_modified_compare_more_unrendered_values: Optional[bool] = True # new flag added since dbt v1.9
|
|
282
289
|
|
|
283
290
|
|
|
284
291
|
@dataclass
|
|
@@ -407,7 +414,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
407
414
|
|
|
408
415
|
if self.adapter.connections.TYPE == "databricks":
|
|
409
416
|
# reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
|
|
410
|
-
from dbt.adapters.databricks import DatabricksColumn
|
|
417
|
+
from dbt.adapters.databricks.column import DatabricksColumn
|
|
411
418
|
|
|
412
419
|
rows = columns
|
|
413
420
|
columns = []
|
|
@@ -472,6 +479,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
472
479
|
|
|
473
480
|
return result
|
|
474
481
|
|
|
482
|
+
@track_timing("artifact_load")
|
|
475
483
|
def load_artifacts(self):
|
|
476
484
|
"""
|
|
477
485
|
Load the artifacts from the 'target' and 'target-base' directory
|
|
@@ -487,16 +495,20 @@ class DbtAdapter(BaseAdapter):
|
|
|
487
495
|
|
|
488
496
|
# load the artifacts
|
|
489
497
|
path = os.path.join(project_root, target_path, "manifest.json")
|
|
490
|
-
curr_manifest = load_manifest(path=path)
|
|
498
|
+
curr_manifest = load_manifest(path=path, timing_name="curr_manifest")
|
|
491
499
|
if curr_manifest is None:
|
|
492
500
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
493
501
|
path = os.path.join(project_root, target_base_path, "manifest.json")
|
|
494
|
-
base_manifest = load_manifest(path=path)
|
|
502
|
+
base_manifest = load_manifest(path=path, timing_name="base_manifest")
|
|
495
503
|
if base_manifest is None:
|
|
496
504
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
497
505
|
|
|
498
|
-
curr_catalog = load_catalog(
|
|
499
|
-
|
|
506
|
+
curr_catalog = load_catalog(
|
|
507
|
+
path=os.path.join(project_root, target_path, "catalog.json"), timing_name="curr_catalog"
|
|
508
|
+
)
|
|
509
|
+
base_catalog = load_catalog(
|
|
510
|
+
path=os.path.join(project_root, target_base_path, "catalog.json"), timing_name="base_catalog"
|
|
511
|
+
)
|
|
500
512
|
|
|
501
513
|
# set the value if all the artifacts are loaded successfully
|
|
502
514
|
self.curr_manifest = curr_manifest
|
|
@@ -599,7 +611,15 @@ class DbtAdapter(BaseAdapter):
|
|
|
599
611
|
return node.compiled_code
|
|
600
612
|
else:
|
|
601
613
|
from dbt.clients import jinja
|
|
602
|
-
from dbt.context.providers import
|
|
614
|
+
from dbt.context.providers import (
|
|
615
|
+
generate_runtime_macro_context,
|
|
616
|
+
generate_runtime_model_context,
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# Set up macro resolver for dbt >= 1.8
|
|
620
|
+
macro_manifest = MacroManifest(manifest.macros)
|
|
621
|
+
self.adapter.set_macro_resolver(macro_manifest)
|
|
622
|
+
self.adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
603
623
|
|
|
604
624
|
jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
|
|
605
625
|
jinja_ctx.update(context)
|
|
@@ -658,8 +678,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
658
678
|
@lru_cache(maxsize=2)
|
|
659
679
|
def get_lineage_cached(self, base: Optional[bool] = False, cache_key=0):
|
|
660
680
|
if base is False:
|
|
661
|
-
|
|
662
|
-
|
|
681
|
+
perf_tracker = LineagePerfTracker()
|
|
682
|
+
perf_tracker.start_lineage()
|
|
663
683
|
|
|
664
684
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
665
685
|
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
@@ -736,6 +756,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
736
756
|
nodes[unique_id] = {
|
|
737
757
|
"id": source["unique_id"],
|
|
738
758
|
"name": source["name"],
|
|
759
|
+
"source_name": source["source_name"],
|
|
739
760
|
"resource_type": source["resource_type"],
|
|
740
761
|
"package_name": source["package_name"],
|
|
741
762
|
"config": source["config"],
|
|
@@ -777,10 +798,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
777
798
|
parent_map = self.build_parent_map(nodes, base)
|
|
778
799
|
|
|
779
800
|
if base is False:
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
log_performance("model lineage",
|
|
783
|
-
|
|
801
|
+
perf_tracker.end_lineage()
|
|
802
|
+
perf_tracker.set_total_nodes(len(nodes))
|
|
803
|
+
log_performance("model lineage", perf_tracker.to_dict())
|
|
804
|
+
perf_tracker.reset()
|
|
784
805
|
|
|
785
806
|
return dict(
|
|
786
807
|
parent_map=parent_map,
|
|
@@ -793,15 +814,43 @@ class DbtAdapter(BaseAdapter):
|
|
|
793
814
|
def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
|
|
794
815
|
base = self.get_lineage(base=True)
|
|
795
816
|
current = self.get_lineage(base=False)
|
|
796
|
-
keys = {*base.get("nodes", {}).keys(), *current.get("nodes", {}).keys()}
|
|
797
817
|
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
818
|
+
modified_nodes = self.select_nodes(select="state:modified")
|
|
819
|
+
diff = {}
|
|
820
|
+
for node_id in modified_nodes:
|
|
821
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
822
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
823
|
+
if base_node and curr_node:
|
|
824
|
+
diff[node_id] = NodeDiff(change_status="modified")
|
|
825
|
+
elif base_node:
|
|
826
|
+
diff[node_id] = NodeDiff(change_status="removed")
|
|
827
|
+
elif curr_node:
|
|
828
|
+
diff[node_id] = NodeDiff(change_status="added")
|
|
829
|
+
|
|
830
|
+
return LineageDiff(
|
|
831
|
+
base=base,
|
|
832
|
+
current=current,
|
|
833
|
+
diff=diff,
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
@lru_cache(maxsize=128)
|
|
837
|
+
def get_change_analysis_cached(self, node_id: str):
|
|
838
|
+
breaking_perf_tracker = BreakingPerformanceTracking()
|
|
839
|
+
lineage_diff = self.get_lineage_diff()
|
|
840
|
+
diff = lineage_diff.diff
|
|
841
|
+
|
|
842
|
+
if node_id not in diff or diff[node_id].change_status != "modified":
|
|
843
|
+
return diff.get(node_id)
|
|
844
|
+
|
|
845
|
+
breaking_perf_tracker.increment_modified_nodes()
|
|
846
|
+
breaking_perf_tracker.start_lineage_diff()
|
|
847
|
+
|
|
848
|
+
base = lineage_diff.base
|
|
849
|
+
current = lineage_diff.current
|
|
801
850
|
|
|
802
851
|
base_manifest = as_manifest(self.get_manifest(True))
|
|
803
852
|
curr_manifest = as_manifest(self.get_manifest(False))
|
|
804
|
-
|
|
853
|
+
breaking_perf_tracker.record_checkpoint("manifest")
|
|
805
854
|
|
|
806
855
|
def ref_func(*args):
|
|
807
856
|
if len(args) == 1:
|
|
@@ -821,111 +870,106 @@ class DbtAdapter(BaseAdapter):
|
|
|
821
870
|
source=source_func,
|
|
822
871
|
)
|
|
823
872
|
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
curr_node
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
curr_sql,
|
|
878
|
-
old_schema=base_schema,
|
|
879
|
-
new_schema=curr_schema,
|
|
880
|
-
dialect=dialect,
|
|
881
|
-
perf_tracking=perf_tracking,
|
|
882
|
-
)
|
|
883
|
-
|
|
884
|
-
# Make sure that the case of the column names are the same
|
|
885
|
-
changed_columns = {
|
|
886
|
-
column.lower(): change_status for column, change_status in (change.columns or {}).items()
|
|
887
|
-
}
|
|
888
|
-
changed_columns_names = set(changed_columns)
|
|
889
|
-
changed_columns_final = {}
|
|
890
|
-
|
|
891
|
-
base_columns = base_node.get("columns") or {}
|
|
892
|
-
curr_columns = curr_node.get("columns") or {}
|
|
893
|
-
columns_names = set(base_columns) | set(curr_columns)
|
|
873
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
874
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
875
|
+
change = NodeChange(category="unknown")
|
|
876
|
+
if (
|
|
877
|
+
curr_node.get("resource_type") in ["model", "snapshot"]
|
|
878
|
+
and curr_node.get("raw_code") is not None
|
|
879
|
+
and base_node.get("raw_code") is not None
|
|
880
|
+
):
|
|
881
|
+
try:
|
|
882
|
+
|
|
883
|
+
def _get_schema(lineage):
|
|
884
|
+
schema = {}
|
|
885
|
+
nodes = lineage["nodes"]
|
|
886
|
+
parent_list = lineage["parent_map"].get(node_id, [])
|
|
887
|
+
for parent_id in parent_list:
|
|
888
|
+
parent_node = nodes.get(parent_id)
|
|
889
|
+
if parent_node is None:
|
|
890
|
+
continue
|
|
891
|
+
columns = parent_node.get("columns") or {}
|
|
892
|
+
name = parent_node.get("name")
|
|
893
|
+
if parent_node.get("resource_type") == "source":
|
|
894
|
+
parts = parent_id.split(".")
|
|
895
|
+
source = parts[2]
|
|
896
|
+
table = parts[3]
|
|
897
|
+
source = source.replace("-", "_")
|
|
898
|
+
name = f"__{source}__{table}"
|
|
899
|
+
schema[name] = {name: column.get("type") for name, column in columns.items()}
|
|
900
|
+
return schema
|
|
901
|
+
|
|
902
|
+
base_sql = self.generate_sql(
|
|
903
|
+
base_node.get("raw_code"),
|
|
904
|
+
context=jinja_context,
|
|
905
|
+
provided_manifest=base_manifest,
|
|
906
|
+
)
|
|
907
|
+
curr_sql = self.generate_sql(
|
|
908
|
+
curr_node.get("raw_code"),
|
|
909
|
+
context=jinja_context,
|
|
910
|
+
provided_manifest=curr_manifest,
|
|
911
|
+
)
|
|
912
|
+
base_schema = _get_schema(base)
|
|
913
|
+
curr_schema = _get_schema(current)
|
|
914
|
+
dialect = self.adapter.connections.TYPE
|
|
915
|
+
if curr_manifest.metadata.adapter_type is not None:
|
|
916
|
+
dialect = curr_manifest.metadata.adapter_type
|
|
917
|
+
|
|
918
|
+
change = parse_change_category(
|
|
919
|
+
base_sql,
|
|
920
|
+
curr_sql,
|
|
921
|
+
old_schema=base_schema,
|
|
922
|
+
new_schema=curr_schema,
|
|
923
|
+
dialect=dialect,
|
|
924
|
+
perf_tracking=breaking_perf_tracker,
|
|
925
|
+
)
|
|
894
926
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
927
|
+
# Make sure that the case of the column names are the same
|
|
928
|
+
changed_columns = {
|
|
929
|
+
column.lower(): change_status for column, change_status in (change.columns or {}).items()
|
|
930
|
+
}
|
|
931
|
+
changed_columns_names = set(changed_columns)
|
|
932
|
+
changed_columns_final = {}
|
|
898
933
|
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
934
|
+
base_columns = base_node.get("columns") or {}
|
|
935
|
+
curr_columns = curr_node.get("columns") or {}
|
|
936
|
+
columns_names = set(base_columns) | set(curr_columns)
|
|
902
937
|
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
elif curr_node:
|
|
907
|
-
diff[key] = NodeDiff(change_status="added")
|
|
938
|
+
for column_name in columns_names:
|
|
939
|
+
if column_name.lower() in changed_columns_names:
|
|
940
|
+
changed_columns_final[column_name] = changed_columns[column_name.lower()]
|
|
908
941
|
|
|
909
|
-
|
|
910
|
-
|
|
942
|
+
change.columns = changed_columns_final
|
|
943
|
+
except Exception:
|
|
944
|
+
# TODO: telemetry
|
|
945
|
+
pass
|
|
911
946
|
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
947
|
+
breaking_perf_tracker.end_lineage_diff()
|
|
948
|
+
log_performance("change analysis per node", breaking_perf_tracker.to_dict())
|
|
949
|
+
breaking_perf_tracker.reset()
|
|
950
|
+
node_diff = diff.get(node_id)
|
|
951
|
+
node_diff.change = change
|
|
952
|
+
return node_diff
|
|
917
953
|
|
|
918
954
|
def get_cll(
|
|
919
955
|
self,
|
|
920
956
|
node_id: Optional[str] = None,
|
|
921
957
|
column: Optional[str] = None,
|
|
922
958
|
change_analysis: Optional[bool] = False,
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
959
|
+
no_cll: Optional[bool] = False,
|
|
960
|
+
no_upstream: Optional[bool] = False,
|
|
961
|
+
no_downstream: Optional[bool] = False,
|
|
926
962
|
no_filter: Optional[bool] = False,
|
|
927
963
|
) -> CllData:
|
|
928
|
-
cll_tracker =
|
|
964
|
+
cll_tracker = LineagePerfTracker()
|
|
965
|
+
cll_tracker.set_params(
|
|
966
|
+
has_node=node_id is not None,
|
|
967
|
+
has_column=column is not None,
|
|
968
|
+
change_analysis=change_analysis,
|
|
969
|
+
no_cll=no_cll,
|
|
970
|
+
no_upstream=no_upstream,
|
|
971
|
+
no_downstream=no_downstream,
|
|
972
|
+
)
|
|
929
973
|
cll_tracker.start_column_lineage()
|
|
930
974
|
|
|
931
975
|
manifest = self.curr_manifest
|
|
@@ -936,47 +980,114 @@ class DbtAdapter(BaseAdapter):
|
|
|
936
980
|
cll_node_ids = {node_id}
|
|
937
981
|
else:
|
|
938
982
|
lineage_diff = self.get_lineage_diff()
|
|
939
|
-
cll_node_ids = lineage_diff.diff.keys()
|
|
983
|
+
cll_node_ids = set(lineage_diff.diff.keys())
|
|
984
|
+
|
|
985
|
+
cll_tracker.set_init_nodes(len(cll_node_ids))
|
|
940
986
|
|
|
941
987
|
nodes = {}
|
|
942
988
|
columns = {}
|
|
943
989
|
parent_map = {}
|
|
944
990
|
child_map = {}
|
|
945
991
|
|
|
946
|
-
if
|
|
992
|
+
if not no_upstream:
|
|
947
993
|
cll_node_ids = cll_node_ids.union(find_upstream(cll_node_ids, manifest_dict.get("parent_map")))
|
|
948
|
-
if
|
|
994
|
+
if not no_downstream:
|
|
949
995
|
cll_node_ids = cll_node_ids.union(find_downstream(cll_node_ids, manifest_dict.get("child_map")))
|
|
950
996
|
|
|
951
|
-
if
|
|
997
|
+
if not no_cll:
|
|
998
|
+
allowed_related_nodes = set()
|
|
999
|
+
for key in ["sources", "nodes", "exposures", "metrics"]:
|
|
1000
|
+
attr = getattr(manifest, key)
|
|
1001
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
1002
|
+
if hasattr(manifest, "semantic_models"):
|
|
1003
|
+
attr = getattr(manifest, "semantic_models")
|
|
1004
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
952
1005
|
for cll_node_id in cll_node_ids:
|
|
953
|
-
if
|
|
954
|
-
cll_node_id not in manifest.sources
|
|
955
|
-
and cll_node_id not in manifest.nodes
|
|
956
|
-
and cll_node_id not in manifest.exposures
|
|
957
|
-
):
|
|
1006
|
+
if cll_node_id not in allowed_related_nodes:
|
|
958
1007
|
continue
|
|
959
|
-
cll_data_one = self.get_cll_cached(cll_node_id, base=False)
|
|
1008
|
+
cll_data_one = deepcopy(self.get_cll_cached(cll_node_id, base=False))
|
|
1009
|
+
cll_tracker.increment_cll_nodes()
|
|
960
1010
|
if cll_data_one is None:
|
|
961
1011
|
continue
|
|
962
1012
|
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
1013
|
+
nodes[cll_node_id] = cll_data_one.nodes.get(cll_node_id)
|
|
1014
|
+
node_diff = None
|
|
1015
|
+
if change_analysis:
|
|
1016
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1017
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1018
|
+
if node_diff is not None:
|
|
1019
|
+
nodes[cll_node_id].change_status = node_diff.change_status
|
|
1020
|
+
if node_diff.change is not None:
|
|
1021
|
+
nodes[cll_node_id].change_category = node_diff.change.category
|
|
971
1022
|
for c_id, c in cll_data_one.columns.items():
|
|
972
1023
|
columns[c_id] = c
|
|
973
|
-
if node_diff is not None
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
1024
|
+
if node_diff is not None:
|
|
1025
|
+
if node_diff.change_status == "added":
|
|
1026
|
+
c.change_status = "added"
|
|
1027
|
+
elif node_diff.change_status == "removed":
|
|
1028
|
+
c.change_status = "removed"
|
|
1029
|
+
elif node_diff.change is not None and node_diff.change.columns is not None:
|
|
1030
|
+
column_diff = node_diff.change.columns.get(c.name)
|
|
1031
|
+
if column_diff:
|
|
1032
|
+
c.change_status = column_diff
|
|
977
1033
|
|
|
978
1034
|
for p_id, parents in cll_data_one.parent_map.items():
|
|
979
1035
|
parent_map[p_id] = parents
|
|
1036
|
+
else:
|
|
1037
|
+
for cll_node_id in cll_node_ids:
|
|
1038
|
+
cll_node = None
|
|
1039
|
+
cll_node_columns: Dict[str, CllColumn] = {}
|
|
1040
|
+
|
|
1041
|
+
if cll_node_id in manifest.sources:
|
|
1042
|
+
cll_node = CllNode.build_cll_node(manifest, "sources", cll_node_id)
|
|
1043
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.sources:
|
|
1044
|
+
cll_node_columns = {
|
|
1045
|
+
column.name: CllColumn(
|
|
1046
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1047
|
+
table_id=cll_node_id,
|
|
1048
|
+
name=column.name,
|
|
1049
|
+
type=column.type,
|
|
1050
|
+
)
|
|
1051
|
+
for column in self.curr_catalog.sources[cll_node_id].columns.values()
|
|
1052
|
+
}
|
|
1053
|
+
elif cll_node_id in manifest.nodes:
|
|
1054
|
+
cll_node = CllNode.build_cll_node(manifest, "nodes", cll_node_id)
|
|
1055
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.nodes:
|
|
1056
|
+
cll_node_columns = {
|
|
1057
|
+
column.name: CllColumn(
|
|
1058
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1059
|
+
table_id=cll_node_id,
|
|
1060
|
+
name=column.name,
|
|
1061
|
+
type=column.type,
|
|
1062
|
+
)
|
|
1063
|
+
for column in self.curr_catalog.nodes[cll_node_id].columns.values()
|
|
1064
|
+
}
|
|
1065
|
+
elif cll_node_id in manifest.exposures:
|
|
1066
|
+
cll_node = CllNode.build_cll_node(manifest, "exposures", cll_node_id)
|
|
1067
|
+
elif hasattr(manifest, "semantic_models") and cll_node_id in manifest.semantic_models:
|
|
1068
|
+
cll_node = CllNode.build_cll_node(manifest, "semantic_models", cll_node_id)
|
|
1069
|
+
elif cll_node_id in manifest.metrics:
|
|
1070
|
+
cll_node = CllNode.build_cll_node(manifest, "metrics", cll_node_id)
|
|
1071
|
+
|
|
1072
|
+
if not cll_node:
|
|
1073
|
+
continue
|
|
1074
|
+
nodes[cll_node_id] = cll_node
|
|
1075
|
+
|
|
1076
|
+
node_diff = None
|
|
1077
|
+
if change_analysis:
|
|
1078
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1079
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1080
|
+
if node_diff is not None:
|
|
1081
|
+
cll_node.change_status = node_diff.change_status
|
|
1082
|
+
if node_diff.change is not None:
|
|
1083
|
+
cll_node.change_category = node_diff.change.category
|
|
1084
|
+
for c, cll_column in cll_node_columns.items():
|
|
1085
|
+
cll_node.columns[c] = cll_column
|
|
1086
|
+
columns[cll_column.id] = cll_column
|
|
1087
|
+
if node_diff.change.columns and c in node_diff.change.columns:
|
|
1088
|
+
cll_column.change_status = node_diff.change.columns[c]
|
|
1089
|
+
|
|
1090
|
+
parent_map[cll_node_id] = manifest.parent_map.get(cll_node_id, [])
|
|
980
1091
|
|
|
981
1092
|
# build the child map
|
|
982
1093
|
for parent_id, parents in parent_map.items():
|
|
@@ -987,47 +1098,90 @@ class DbtAdapter(BaseAdapter):
|
|
|
987
1098
|
|
|
988
1099
|
# Find the anchor nodes
|
|
989
1100
|
anchor_node_ids = set()
|
|
1101
|
+
extra_node_ids = set()
|
|
990
1102
|
if node_id is None and column is None:
|
|
991
1103
|
if change_analysis:
|
|
992
1104
|
# If change analysis is requested, we need to find the nodes that have changes
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1105
|
+
lineage_diff = self.get_lineage_diff()
|
|
1106
|
+
for nid, nd in lineage_diff.diff.items():
|
|
1107
|
+
if nd.change_status == "added":
|
|
1108
|
+
anchor_node_ids.add(nid)
|
|
1109
|
+
n = lineage_diff.current["nodes"].get(nid)
|
|
1110
|
+
n_columns = n.get("columns", {})
|
|
1111
|
+
for c in n_columns:
|
|
1112
|
+
anchor_node_ids.add(build_column_key(nid, c))
|
|
1113
|
+
continue
|
|
1114
|
+
if nd.change_status == "removed":
|
|
1115
|
+
extra_node_ids.add(nid)
|
|
1116
|
+
continue
|
|
1117
|
+
|
|
1118
|
+
node_diff = self.get_change_analysis_cached(nid)
|
|
1119
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1120
|
+
extra_node_ids.add(nid)
|
|
1121
|
+
if no_cll:
|
|
1122
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1123
|
+
anchor_node_ids.add(nid)
|
|
1124
|
+
else:
|
|
1125
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1126
|
+
anchor_node_ids.add(nid)
|
|
1127
|
+
if node_diff.change.columns is not None:
|
|
1128
|
+
for column_name in node_diff.change.columns:
|
|
1129
|
+
anchor_node_ids.add(f"{nid}_{column_name}")
|
|
998
1130
|
else:
|
|
999
1131
|
lineage_diff = self.get_lineage_diff()
|
|
1000
1132
|
anchor_node_ids = lineage_diff.diff.keys()
|
|
1001
1133
|
elif node_id is not None and column is None:
|
|
1002
1134
|
if change_analysis:
|
|
1003
1135
|
# If change analysis is requested, we need to find the nodes that have changes
|
|
1004
|
-
node_diff = self.
|
|
1005
|
-
if node_diff:
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1136
|
+
node_diff = self.get_change_analysis_cached(node_id)
|
|
1137
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1138
|
+
extra_node_ids.add(node_id)
|
|
1139
|
+
if no_cll:
|
|
1140
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1141
|
+
anchor_node_ids.add(node_id)
|
|
1142
|
+
else:
|
|
1143
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1144
|
+
anchor_node_ids.add(node_id)
|
|
1145
|
+
if node_diff.change.columns is not None:
|
|
1146
|
+
for column_name in node_diff.change.columns:
|
|
1147
|
+
anchor_node_ids.add(f"{node_id}_{column_name}")
|
|
1010
1148
|
else:
|
|
1011
1149
|
anchor_node_ids.add(node_id)
|
|
1012
1150
|
else:
|
|
1013
1151
|
anchor_node_ids.add(node_id)
|
|
1152
|
+
if not no_cll:
|
|
1153
|
+
node = nodes.get(node_id)
|
|
1154
|
+
if node:
|
|
1155
|
+
for column_name in node.columns:
|
|
1156
|
+
column_key = build_column_key(node_id, column_name)
|
|
1157
|
+
anchor_node_ids.add(column_key)
|
|
1014
1158
|
else:
|
|
1015
1159
|
anchor_node_ids.add(f"{node_id}_{column}")
|
|
1016
1160
|
|
|
1161
|
+
cll_tracker.set_anchor_nodes(len(anchor_node_ids))
|
|
1017
1162
|
result_node_ids = set(anchor_node_ids)
|
|
1018
|
-
if
|
|
1163
|
+
if not no_upstream:
|
|
1019
1164
|
result_node_ids = result_node_ids.union(find_upstream(anchor_node_ids, parent_map))
|
|
1020
|
-
if
|
|
1165
|
+
if not no_downstream:
|
|
1021
1166
|
result_node_ids = result_node_ids.union(find_downstream(anchor_node_ids, child_map))
|
|
1022
1167
|
|
|
1023
1168
|
# Filter the nodes and columns based on the anchor nodes
|
|
1024
1169
|
if not no_filter:
|
|
1025
|
-
nodes = {k: v for k, v in nodes.items() if k in result_node_ids}
|
|
1026
|
-
columns = {k: v for k, v in columns.items() if k in result_node_ids}
|
|
1170
|
+
nodes = {k: v for k, v in nodes.items() if k in result_node_ids or k in extra_node_ids}
|
|
1171
|
+
columns = {k: v for k, v in columns.items() if k in result_node_ids or k in extra_node_ids}
|
|
1172
|
+
|
|
1173
|
+
for node in nodes.values():
|
|
1174
|
+
node.columns = {
|
|
1175
|
+
k: v for k, v in node.columns.items() if v.id in result_node_ids or v.id in extra_node_ids
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
if change_analysis:
|
|
1179
|
+
node.impacted = node.id in result_node_ids
|
|
1180
|
+
|
|
1027
1181
|
parent_map, child_map = filter_dependency_maps(parent_map, child_map, result_node_ids)
|
|
1028
1182
|
|
|
1029
1183
|
cll_tracker.end_column_lineage()
|
|
1030
|
-
cll_tracker.set_total_nodes(len(nodes))
|
|
1184
|
+
cll_tracker.set_total_nodes(len(nodes) + len(columns))
|
|
1031
1185
|
log_performance("column level lineage", cll_tracker.to_dict())
|
|
1032
1186
|
cll_tracker.reset()
|
|
1033
1187
|
|
|
@@ -1046,6 +1200,9 @@ class DbtAdapter(BaseAdapter):
|
|
|
1046
1200
|
if node is None:
|
|
1047
1201
|
return None
|
|
1048
1202
|
|
|
1203
|
+
cll_tracker.set_total_nodes(1)
|
|
1204
|
+
cll_tracker.start_column_lineage()
|
|
1205
|
+
|
|
1049
1206
|
def _apply_all_columns(node: CllNode, transformation_type):
|
|
1050
1207
|
cll_data = CllData()
|
|
1051
1208
|
cll_data.nodes[node.id] = node
|
|
@@ -1170,6 +1327,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
1170
1327
|
depends_on.add(parent_key)
|
|
1171
1328
|
column.transformation_type = c2c_map[name].transformation_type
|
|
1172
1329
|
cll_data.parent_map[column_id] = set(depends_on)
|
|
1330
|
+
|
|
1331
|
+
cll_tracker.end_column_lineage()
|
|
1332
|
+
log_performance("column level lineage per node", cll_tracker.to_dict())
|
|
1333
|
+
cll_tracker.reset()
|
|
1173
1334
|
return cll_data
|
|
1174
1335
|
|
|
1175
1336
|
def get_cll_node(self, node_id: str, base: Optional[bool] = False) -> Tuple[Optional[CllNode], list[str]]:
|
|
@@ -1181,21 +1342,12 @@ class DbtAdapter(BaseAdapter):
|
|
|
1181
1342
|
# model, seed, snapshot
|
|
1182
1343
|
if node_id in manifest.nodes:
|
|
1183
1344
|
found = manifest.nodes[node_id]
|
|
1184
|
-
if found.resource_type not in ["model", "seed", "snapshot"]:
|
|
1185
|
-
return None, []
|
|
1186
|
-
|
|
1187
1345
|
unique_id = found.unique_id
|
|
1188
|
-
node = CllNode(
|
|
1189
|
-
id=found.unique_id,
|
|
1190
|
-
name=found.name,
|
|
1191
|
-
package_name=found.package_name,
|
|
1192
|
-
resource_type=found.resource_type,
|
|
1193
|
-
raw_code=found.raw_code,
|
|
1194
|
-
)
|
|
1346
|
+
node = CllNode.build_cll_node(manifest, "nodes", node_id)
|
|
1195
1347
|
if hasattr(found.depends_on, "nodes"):
|
|
1196
1348
|
parent_list = found.depends_on.nodes
|
|
1197
1349
|
|
|
1198
|
-
if catalog is not None and unique_id in catalog.nodes:
|
|
1350
|
+
if catalog is not None and node is not None and unique_id in catalog.nodes:
|
|
1199
1351
|
columns = {}
|
|
1200
1352
|
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
1201
1353
|
column_id = f"{unique_id}_{col_name}"
|
|
@@ -1207,17 +1359,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
1207
1359
|
if node_id in manifest.sources:
|
|
1208
1360
|
found = manifest.sources[node_id]
|
|
1209
1361
|
unique_id = found.unique_id
|
|
1210
|
-
|
|
1211
|
-
node = CllNode(
|
|
1212
|
-
id=found.unique_id,
|
|
1213
|
-
name=found.name,
|
|
1214
|
-
package_name=found.package_name,
|
|
1215
|
-
resource_type=found.resource_type,
|
|
1216
|
-
source_name=found.source_name,
|
|
1217
|
-
)
|
|
1362
|
+
node = CllNode.build_cll_node(manifest, "sources", node_id)
|
|
1218
1363
|
parent_list = []
|
|
1219
1364
|
|
|
1220
|
-
if catalog is not None and unique_id in catalog.sources:
|
|
1365
|
+
if catalog is not None and node is not None and unique_id in catalog.sources:
|
|
1221
1366
|
columns = {}
|
|
1222
1367
|
for col_name, col_metadata in catalog.sources[unique_id].columns.items():
|
|
1223
1368
|
column_id = f"{unique_id}_{col_name}"
|
|
@@ -1228,13 +1373,19 @@ class DbtAdapter(BaseAdapter):
|
|
|
1228
1373
|
# exposure
|
|
1229
1374
|
if node_id in manifest.exposures:
|
|
1230
1375
|
found = manifest.exposures[node_id]
|
|
1376
|
+
node = CllNode.build_cll_node(manifest, "exposures", node_id)
|
|
1377
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1378
|
+
parent_list = found.depends_on.nodes
|
|
1231
1379
|
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1380
|
+
if hasattr(manifest, "semantic_models") and node_id in manifest.semantic_models:
|
|
1381
|
+
found = manifest.semantic_models[node_id]
|
|
1382
|
+
node = CllNode.build_cll_node(manifest, "semantic_models", node_id)
|
|
1383
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1384
|
+
parent_list = found.depends_on.nodes
|
|
1385
|
+
|
|
1386
|
+
if node_id in manifest.metrics:
|
|
1387
|
+
found = manifest.metrics[node_id]
|
|
1388
|
+
node = CllNode.build_cll_node(manifest, "metrics", node_id)
|
|
1238
1389
|
if hasattr(found.depends_on, "nodes"):
|
|
1239
1390
|
parent_list = found.depends_on.nodes
|
|
1240
1391
|
|
|
@@ -1250,73 +1401,6 @@ class DbtAdapter(BaseAdapter):
|
|
|
1250
1401
|
}
|
|
1251
1402
|
return None
|
|
1252
1403
|
|
|
1253
|
-
def get_impact_radius(self, node_id: str) -> CllData:
|
|
1254
|
-
impacted_nodes = self.get_impacted_nodes(node_id)
|
|
1255
|
-
impacted_cll = self.get_impacted_cll(node_id)
|
|
1256
|
-
|
|
1257
|
-
# merge impact radius
|
|
1258
|
-
return self._merge_cll_data(impacted_nodes, impacted_cll)
|
|
1259
|
-
|
|
1260
|
-
def get_impacted_nodes(self, node_id: str) -> CllData:
|
|
1261
|
-
lineage_diff = self.get_lineage_diff()
|
|
1262
|
-
diff_info = lineage_diff.diff.get(node_id)
|
|
1263
|
-
if diff_info is None:
|
|
1264
|
-
return CllData()
|
|
1265
|
-
change_category = diff_info.change.category
|
|
1266
|
-
|
|
1267
|
-
if change_category == "breaking":
|
|
1268
|
-
cll = self.get_cll(node_id, no_filter=True)
|
|
1269
|
-
_, downstream = find_column_dependencies(node_id, cll.parent_map, cll.child_map)
|
|
1270
|
-
relevant_columns = {node_id}
|
|
1271
|
-
relevant_columns.update(downstream)
|
|
1272
|
-
nodes, columns = filter_lineage_vertices(cll.nodes, cll.columns, relevant_columns)
|
|
1273
|
-
p_map, c_map = filter_dependency_maps(cll.parent_map, cll.child_map, relevant_columns)
|
|
1274
|
-
|
|
1275
|
-
return CllData(nodes=nodes, columns=columns, parent_map=p_map, child_map=c_map)
|
|
1276
|
-
|
|
1277
|
-
return CllData()
|
|
1278
|
-
|
|
1279
|
-
def get_impacted_cll(self, node_id: str) -> CllData:
|
|
1280
|
-
lineage_diff = self.get_lineage_diff()
|
|
1281
|
-
diff_info = lineage_diff.diff.get(node_id)
|
|
1282
|
-
if diff_info is None:
|
|
1283
|
-
return CllData()
|
|
1284
|
-
change_columns = diff_info.change.columns
|
|
1285
|
-
|
|
1286
|
-
cll = self.get_cll(node_id, no_filter=True)
|
|
1287
|
-
relevant_columns = set()
|
|
1288
|
-
for col, change_status in change_columns.items():
|
|
1289
|
-
if change_status == "removed":
|
|
1290
|
-
continue
|
|
1291
|
-
target_column = f"{node_id}_{col}"
|
|
1292
|
-
_, downstream = find_column_dependencies(target_column, cll.parent_map, cll.child_map)
|
|
1293
|
-
relevant_columns.add(target_column)
|
|
1294
|
-
relevant_columns.update(downstream)
|
|
1295
|
-
|
|
1296
|
-
nodes, columns = filter_lineage_vertices(cll.nodes, cll.columns, relevant_columns)
|
|
1297
|
-
p_map, c_map = filter_dependency_maps(cll.parent_map, cll.child_map, relevant_columns)
|
|
1298
|
-
|
|
1299
|
-
return CllData(nodes=nodes, columns=columns, parent_map=p_map, child_map=c_map)
|
|
1300
|
-
|
|
1301
|
-
@staticmethod
|
|
1302
|
-
def _merge_cll_data(base: CllData, target: CllData) -> CllData:
|
|
1303
|
-
merged_nodes = {**base.nodes, **target.nodes}
|
|
1304
|
-
merged_columns = {**base.columns, **target.columns}
|
|
1305
|
-
|
|
1306
|
-
merged_parent_map = {}
|
|
1307
|
-
merged_keys = set(base.parent_map.keys()).union(set(target.parent_map.keys()))
|
|
1308
|
-
for key in merged_keys:
|
|
1309
|
-
merged_parent_map[key] = base.parent_map.get(key, set()).union(target.parent_map.get(key, set()))
|
|
1310
|
-
|
|
1311
|
-
merged_child_map = {}
|
|
1312
|
-
merged_keys = set(base.child_map.keys()).union(set(target.child_map.keys()))
|
|
1313
|
-
for key in merged_keys:
|
|
1314
|
-
merged_child_map[key] = base.child_map.get(key, set()).union(target.child_map.get(key, set()))
|
|
1315
|
-
|
|
1316
|
-
return CllData(
|
|
1317
|
-
nodes=merged_nodes, columns=merged_columns, parent_map=merged_parent_map, child_map=merged_child_map
|
|
1318
|
-
)
|
|
1319
|
-
|
|
1320
1404
|
def build_name_to_unique_id_index(self) -> Dict[str, str]:
|
|
1321
1405
|
name_to_unique_id = {}
|
|
1322
1406
|
curr_manifest = self.get_manifest(base=False)
|
|
@@ -1404,13 +1488,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
1404
1488
|
self.curr_manifest = load_manifest(path=refresh_file_path)
|
|
1405
1489
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1406
1490
|
self.get_cll_cached.cache_clear()
|
|
1491
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1407
1492
|
elif refresh_file_path.endswith("catalog.json"):
|
|
1408
1493
|
self.curr_catalog = load_catalog(path=refresh_file_path)
|
|
1494
|
+
self.get_cll_cached.cache_clear()
|
|
1495
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1409
1496
|
elif self.base_path and target_type == os.path.basename(self.base_path):
|
|
1410
1497
|
if refresh_file_path.endswith("manifest.json"):
|
|
1411
1498
|
self.base_manifest = load_manifest(path=refresh_file_path)
|
|
1499
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1412
1500
|
elif refresh_file_path.endswith("catalog.json"):
|
|
1413
1501
|
self.base_catalog = load_catalog(path=refresh_file_path)
|
|
1502
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1414
1503
|
|
|
1415
1504
|
def create_relation(self, model, base=False):
|
|
1416
1505
|
node = self.find_node_by_name(model, base)
|
|
@@ -1446,6 +1535,15 @@ class DbtAdapter(BaseAdapter):
|
|
|
1446
1535
|
|
|
1447
1536
|
specs = [_parse_difference(select_list, exclude_list)]
|
|
1448
1537
|
|
|
1538
|
+
# If packages is not provided, use the project name from manifest metadata as default
|
|
1539
|
+
if packages is None:
|
|
1540
|
+
if (
|
|
1541
|
+
self.manifest.metadata
|
|
1542
|
+
and hasattr(self.manifest.metadata, "project_name")
|
|
1543
|
+
and self.manifest.metadata.project_name
|
|
1544
|
+
):
|
|
1545
|
+
packages = [self.manifest.metadata.project_name]
|
|
1546
|
+
|
|
1449
1547
|
if packages is not None:
|
|
1450
1548
|
package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
|
|
1451
1549
|
specs.append(package_spec)
|
|
@@ -1522,7 +1620,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1522
1620
|
if not os.path.isfile(path):
|
|
1523
1621
|
return None
|
|
1524
1622
|
|
|
1525
|
-
with open(path, "r") as f:
|
|
1623
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
1526
1624
|
json_content = f.read()
|
|
1527
1625
|
return json.loads(json_content)
|
|
1528
1626
|
|