recce-nightly 1.9.0.20250623__py3-none-any.whl → 1.25.0.20251112a2066__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recce/VERSION +1 -1
- recce/__init__.py +5 -0
- recce/adapter/dbt_adapter/__init__.py +318 -240
- recce/artifact.py +76 -3
- recce/cli.py +703 -71
- recce/config.py +3 -3
- recce/connect_to_cloud.py +138 -0
- recce/core.py +3 -3
- recce/data/404.html +1 -22
- recce/data/__next.__PAGE__.txt +10 -0
- recce/data/__next._full.txt +23 -0
- recce/data/__next._index.txt +8 -0
- recce/data/__next._tree.txt +12 -0
- recce/data/_next/static/6LypcDXgyuSaiSCrsmUub/_buildManifest.js +11 -0
- recce/data/_next/static/6LypcDXgyuSaiSCrsmUub/_clientMiddlewareManifest.json +1 -0
- recce/data/_next/static/chunks/0a2b2dd4b57049c2.js +1 -0
- recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
- recce/data/_next/static/chunks/24fd885c7180a612.js +1 -0
- recce/data/_next/static/chunks/27e66b2eab4adc32.js +19 -0
- recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
- recce/data/_next/static/chunks/917619ab62a32388.js +1 -0
- recce/data/_next/static/chunks/93ba5a62932b704f.js +4 -0
- recce/data/_next/static/chunks/a43a2a5e06d5a92b.js +1 -0
- recce/data/_next/static/chunks/a6c78b24bd8b84fc.js +1 -0
- recce/data/_next/static/chunks/b2610ba997ff8c4f.js +110 -0
- recce/data/_next/static/chunks/ba2d87265a68599d.css +2 -0
- recce/data/_next/static/chunks/c117fd1c1382dd83.js +11 -0
- recce/data/_next/static/chunks/c9425ca46eebdde9.js +1 -0
- recce/data/_next/static/chunks/cc8a9eadba012be0.css +6 -0
- recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
- recce/data/_next/static/chunks/e392ad92847c3e17.js +1 -0
- recce/data/_next/static/chunks/e4ce95efe88dae79.js +11 -0
- recce/data/_next/static/chunks/e69c777814fea6ed.js +2 -0
- recce/data/_next/static/chunks/turbopack-21cfd73037ff57ab.js +3 -0
- recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-cyrillic-800-normal.bd5c9f50.woff → montserrat-cyrillic-800-normal.f9d58125.woff} +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-800-normal.fc315020.woff → montserrat-latin-800-normal.d5761935.woff} +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
- recce/data/_next/static/media/{montserrat-latin-ext-800-normal.2e5381b2.woff → montserrat-latin-ext-800-normal.b671449b.woff} +0 -0
- recce/data/_next/static/media/{montserrat-vietnamese-800-normal.20c545e6.woff → montserrat-vietnamese-800-normal.9f7b8541.woff} +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
- recce/data/_not-found/__next._full.txt +17 -0
- recce/data/_not-found/__next._index.txt +8 -0
- recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
- recce/data/_not-found/__next._not-found.txt +4 -0
- recce/data/_not-found/__next._tree.txt +10 -0
- recce/data/_not-found.html +1 -0
- recce/data/_not-found.txt +17 -0
- recce/data/auth_callback.html +68 -0
- recce/data/index.html +1 -27
- recce/data/index.txt +23 -8
- recce/event/__init__.py +9 -8
- recce/event/collector.py +6 -2
- recce/event/track.py +10 -0
- recce/github.py +1 -1
- recce/mcp_server.py +632 -0
- recce/models/types.py +23 -2
- recce/pull_request.py +1 -1
- recce/run.py +23 -16
- recce/server.py +194 -19
- recce/state/__init__.py +31 -0
- recce/state/cloud.py +632 -0
- recce/state/const.py +26 -0
- recce/state/local.py +56 -0
- recce/state/state.py +119 -0
- recce/state/state_loader.py +174 -0
- recce/summary.py +2 -1
- recce/tasks/dataframe.py +59 -2
- recce/tasks/rowcount.py +4 -1
- recce/tasks/schema.py +4 -1
- recce/tasks/valuediff.py +1 -1
- recce/util/api_token.py +11 -2
- recce/util/breaking.py +9 -0
- recce/util/cll.py +1 -2
- recce/util/io.py +2 -2
- recce/util/lineage.py +19 -18
- recce/util/perf_tracking.py +85 -0
- recce/util/recce_cloud.py +229 -5
- recce/yaml/__init__.py +2 -2
- recce_cloud/__init__.py +15 -0
- recce_cloud/api/__init__.py +17 -0
- recce_cloud/api/base.py +104 -0
- recce_cloud/api/client.py +150 -0
- recce_cloud/api/exceptions.py +26 -0
- recce_cloud/api/factory.py +63 -0
- recce_cloud/api/github.py +72 -0
- recce_cloud/api/gitlab.py +78 -0
- recce_cloud/artifact.py +57 -0
- recce_cloud/ci_providers/__init__.py +9 -0
- recce_cloud/ci_providers/base.py +82 -0
- recce_cloud/ci_providers/detector.py +147 -0
- recce_cloud/ci_providers/github_actions.py +136 -0
- recce_cloud/ci_providers/gitlab_ci.py +130 -0
- recce_cloud/cli.py +303 -0
- recce_cloud/upload.py +213 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/METADATA +31 -27
- recce_nightly-1.25.0.20251112a2066.dist-info/RECORD +178 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/top_level.txt +1 -0
- tests/adapter/dbt_adapter/test_dbt_cll.py +412 -79
- tests/recce_cloud/__init__.py +0 -0
- tests/recce_cloud/test_ci_providers.py +351 -0
- tests/recce_cloud/test_cli.py +372 -0
- tests/recce_cloud/test_client.py +273 -0
- tests/recce_cloud/test_platform_clients.py +279 -0
- tests/test_cli.py +106 -3
- tests/test_cli_mcp_optional.py +45 -0
- tests/test_cloud_listing_cli.py +324 -0
- tests/test_connect_to_cloud.py +82 -0
- tests/test_core.py +148 -3
- tests/test_mcp_server.py +332 -0
- tests/test_server.py +6 -6
- tests/test_summary.py +14 -6
- recce/data/_next/static/WrRUb3nV8BhAZG_R8kVma/_buildManifest.js +0 -1
- recce/data/_next/static/chunks/181-acc61ddada3bc0ca.js +0 -43
- recce/data/_next/static/chunks/1bff33f1-1ef85cf5e658a751.js +0 -1
- recce/data/_next/static/chunks/217-879a84d70f7a907c.js +0 -2
- recce/data/_next/static/chunks/29e3cc0d-60045b2e47aa3916.js +0 -1
- recce/data/_next/static/chunks/36e1c10d-8e7be4a6c1f6ab2d.js +0 -1
- recce/data/_next/static/chunks/3998a672-03adacad07b346ac.js +0 -1
- recce/data/_next/static/chunks/3a92ee20-1081c360214f9602.js +0 -1
- recce/data/_next/static/chunks/42-cd3c06533f5fd47c.js +0 -9
- recce/data/_next/static/chunks/450c323b-fd94e7ffaa4a5efa.js +0 -1
- recce/data/_next/static/chunks/47d8844f-929aed9b1c73a905.js +0 -1
- recce/data/_next/static/chunks/608-3b079b544e5d5f5e.js +0 -15
- recce/data/_next/static/chunks/6dc81886-adbfa45836061d79.js +0 -1
- recce/data/_next/static/chunks/7a8a3e83-edf6dc64b5d5f0a5.js +0 -1
- recce/data/_next/static/chunks/7f27ae6c-d5f0438edd5c2a5b.js +0 -1
- recce/data/_next/static/chunks/86730205-cfb14e3f051bab35.js +0 -1
- recce/data/_next/static/chunks/8d700b6a.8bb140898499c512.js +0 -1
- recce/data/_next/static/chunks/92-7ab55ae02606193c.js +0 -1
- recce/data/_next/static/chunks/9746af58-a42b7d169cacadf0.js +0 -1
- recce/data/_next/static/chunks/a30376cd-de84559016d7e133.js +0 -1
- recce/data/_next/static/chunks/app/_not-found/page-01ed58b7f971d311.js +0 -1
- recce/data/_next/static/chunks/app/layout-177a410a97e0d018.js +0 -1
- recce/data/_next/static/chunks/app/page-59241c42b7dd4fcf.js +0 -1
- recce/data/_next/static/chunks/b63b1b3f-4282bdcf459e075c.js +0 -1
- recce/data/_next/static/chunks/bbda5537-9ec25eb1dd62348a.js +0 -1
- recce/data/_next/static/chunks/c132bf7d-08cb668a789d6afd.js +0 -1
- recce/data/_next/static/chunks/ce84277d-2e5d1d46910cf052.js +0 -1
- recce/data/_next/static/chunks/febdd86e-c6b525341634b860.js +0 -54
- recce/data/_next/static/chunks/fee69bc6-2dbccaf9b90474e6.js +0 -1
- recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
- recce/data/_next/static/chunks/main-app-39061b0166c47f55.js +0 -1
- recce/data/_next/static/chunks/main-b5b3ae20a1405261.js +0 -1
- recce/data/_next/static/chunks/pages/_app-437c455677d62394.js +0 -1
- recce/data/_next/static/chunks/pages/_error-e7650df18ca04bde.js +0 -1
- recce/data/_next/static/chunks/webpack-7b49d5ba7e3a434d.js +0 -1
- recce/data/_next/static/css/17a96168e3a9db13.css +0 -1
- recce/data/_next/static/css/1b121dc4d36aeb4d.css +0 -3
- recce/data/_next/static/css/35c6679a098e1e34.css +0 -1
- recce/data/_next/static/css/951e2e0eea2d4a5b.css +0 -14
- recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
- recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
- recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
- recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
- recce/state.py +0 -785
- recce_nightly-1.9.0.20250623.dist-info/RECORD +0 -151
- tests/test_state.py +0 -134
- /recce/data/_next/static/{WrRUb3nV8BhAZG_R8kVma → 6LypcDXgyuSaiSCrsmUub}/_ssgManifest.js +0 -0
- /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
- /recce/data/_next/static/media/{montserrat-cyrillic-ext-800-normal.e6e0d8d0.woff → montserrat-cyrillic-ext-800-normal.a4fa76b5.woff} +0 -0
- /recce/data/_next/static/media/{reload-image.79aabb7d.svg → reload-image.7aa931c7.svg} +0 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/WHEEL +0 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.9.0.20250623.dist-info → recce_nightly-1.25.0.20251112a2066.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import uuid
|
|
5
5
|
from contextlib import contextmanager
|
|
6
|
+
from copy import deepcopy
|
|
6
7
|
from dataclasses import dataclass, fields
|
|
7
8
|
from errno import ENOENT
|
|
8
9
|
from functools import lru_cache
|
|
@@ -25,12 +26,12 @@ from recce.event import log_performance
|
|
|
25
26
|
from recce.exceptions import RecceException
|
|
26
27
|
from recce.util.cll import CLLPerformanceTracking, cll
|
|
27
28
|
from recce.util.lineage import (
|
|
29
|
+
build_column_key,
|
|
28
30
|
filter_dependency_maps,
|
|
29
|
-
filter_lineage_vertices,
|
|
30
|
-
find_column_dependencies,
|
|
31
31
|
find_downstream,
|
|
32
32
|
find_upstream,
|
|
33
33
|
)
|
|
34
|
+
from recce.util.perf_tracking import LineagePerfTracker
|
|
34
35
|
|
|
35
36
|
from ...tasks.profile import ProfileTask
|
|
36
37
|
from ...util.breaking import BreakingPerformanceTracking, parse_change_category
|
|
@@ -278,7 +279,7 @@ class DbtArgs:
|
|
|
278
279
|
target_path: Optional[str] = (None,)
|
|
279
280
|
project_only_flags: Optional[Dict[str, Any]] = None
|
|
280
281
|
which: Optional[str] = None
|
|
281
|
-
state_modified_compare_more_unrendered_values: Optional[bool] =
|
|
282
|
+
state_modified_compare_more_unrendered_values: Optional[bool] = True # new flag added since dbt v1.9
|
|
282
283
|
|
|
283
284
|
|
|
284
285
|
@dataclass
|
|
@@ -407,7 +408,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
407
408
|
|
|
408
409
|
if self.adapter.connections.TYPE == "databricks":
|
|
409
410
|
# reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
|
|
410
|
-
from dbt.adapters.databricks import DatabricksColumn
|
|
411
|
+
from dbt.adapters.databricks.column import DatabricksColumn
|
|
411
412
|
|
|
412
413
|
rows = columns
|
|
413
414
|
columns = []
|
|
@@ -599,7 +600,15 @@ class DbtAdapter(BaseAdapter):
|
|
|
599
600
|
return node.compiled_code
|
|
600
601
|
else:
|
|
601
602
|
from dbt.clients import jinja
|
|
602
|
-
from dbt.context.providers import
|
|
603
|
+
from dbt.context.providers import (
|
|
604
|
+
generate_runtime_macro_context,
|
|
605
|
+
generate_runtime_model_context,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Set up macro resolver for dbt >= 1.8
|
|
609
|
+
macro_manifest = MacroManifest(manifest.macros)
|
|
610
|
+
self.adapter.set_macro_resolver(macro_manifest)
|
|
611
|
+
self.adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
603
612
|
|
|
604
613
|
jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
|
|
605
614
|
jinja_ctx.update(context)
|
|
@@ -658,8 +667,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
658
667
|
@lru_cache(maxsize=2)
|
|
659
668
|
def get_lineage_cached(self, base: Optional[bool] = False, cache_key=0):
|
|
660
669
|
if base is False:
|
|
661
|
-
|
|
662
|
-
|
|
670
|
+
perf_tracker = LineagePerfTracker()
|
|
671
|
+
perf_tracker.start_lineage()
|
|
663
672
|
|
|
664
673
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
665
674
|
catalog = self.curr_catalog if base is False else self.base_catalog
|
|
@@ -736,6 +745,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
736
745
|
nodes[unique_id] = {
|
|
737
746
|
"id": source["unique_id"],
|
|
738
747
|
"name": source["name"],
|
|
748
|
+
"source_name": source["source_name"],
|
|
739
749
|
"resource_type": source["resource_type"],
|
|
740
750
|
"package_name": source["package_name"],
|
|
741
751
|
"config": source["config"],
|
|
@@ -777,10 +787,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
777
787
|
parent_map = self.build_parent_map(nodes, base)
|
|
778
788
|
|
|
779
789
|
if base is False:
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
log_performance("model lineage",
|
|
783
|
-
|
|
790
|
+
perf_tracker.end_lineage()
|
|
791
|
+
perf_tracker.set_total_nodes(len(nodes))
|
|
792
|
+
log_performance("model lineage", perf_tracker.to_dict())
|
|
793
|
+
perf_tracker.reset()
|
|
784
794
|
|
|
785
795
|
return dict(
|
|
786
796
|
parent_map=parent_map,
|
|
@@ -793,15 +803,43 @@ class DbtAdapter(BaseAdapter):
|
|
|
793
803
|
def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
|
|
794
804
|
base = self.get_lineage(base=True)
|
|
795
805
|
current = self.get_lineage(base=False)
|
|
796
|
-
keys = {*base.get("nodes", {}).keys(), *current.get("nodes", {}).keys()}
|
|
797
806
|
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
807
|
+
modified_nodes = self.select_nodes(select="state:modified")
|
|
808
|
+
diff = {}
|
|
809
|
+
for node_id in modified_nodes:
|
|
810
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
811
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
812
|
+
if base_node and curr_node:
|
|
813
|
+
diff[node_id] = NodeDiff(change_status="modified")
|
|
814
|
+
elif base_node:
|
|
815
|
+
diff[node_id] = NodeDiff(change_status="removed")
|
|
816
|
+
elif curr_node:
|
|
817
|
+
diff[node_id] = NodeDiff(change_status="added")
|
|
818
|
+
|
|
819
|
+
return LineageDiff(
|
|
820
|
+
base=base,
|
|
821
|
+
current=current,
|
|
822
|
+
diff=diff,
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
@lru_cache(maxsize=128)
|
|
826
|
+
def get_change_analysis_cached(self, node_id: str):
|
|
827
|
+
breaking_perf_tracker = BreakingPerformanceTracking()
|
|
828
|
+
lineage_diff = self.get_lineage_diff()
|
|
829
|
+
diff = lineage_diff.diff
|
|
830
|
+
|
|
831
|
+
if node_id not in diff or diff[node_id].change_status != "modified":
|
|
832
|
+
return diff.get(node_id)
|
|
833
|
+
|
|
834
|
+
breaking_perf_tracker.increment_modified_nodes()
|
|
835
|
+
breaking_perf_tracker.start_lineage_diff()
|
|
836
|
+
|
|
837
|
+
base = lineage_diff.base
|
|
838
|
+
current = lineage_diff.current
|
|
801
839
|
|
|
802
840
|
base_manifest = as_manifest(self.get_manifest(True))
|
|
803
841
|
curr_manifest = as_manifest(self.get_manifest(False))
|
|
804
|
-
|
|
842
|
+
breaking_perf_tracker.record_checkpoint("manifest")
|
|
805
843
|
|
|
806
844
|
def ref_func(*args):
|
|
807
845
|
if len(args) == 1:
|
|
@@ -821,111 +859,106 @@ class DbtAdapter(BaseAdapter):
|
|
|
821
859
|
source=source_func,
|
|
822
860
|
)
|
|
823
861
|
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
curr_node
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
curr_sql,
|
|
878
|
-
old_schema=base_schema,
|
|
879
|
-
new_schema=curr_schema,
|
|
880
|
-
dialect=dialect,
|
|
881
|
-
perf_tracking=perf_tracking,
|
|
882
|
-
)
|
|
883
|
-
|
|
884
|
-
# Make sure that the case of the column names are the same
|
|
885
|
-
changed_columns = {
|
|
886
|
-
column.lower(): change_status for column, change_status in (change.columns or {}).items()
|
|
887
|
-
}
|
|
888
|
-
changed_columns_names = set(changed_columns)
|
|
889
|
-
changed_columns_final = {}
|
|
862
|
+
base_node = base.get("nodes", {}).get(node_id)
|
|
863
|
+
curr_node = current.get("nodes", {}).get(node_id)
|
|
864
|
+
change = NodeChange(category="unknown")
|
|
865
|
+
if (
|
|
866
|
+
curr_node.get("resource_type") in ["model", "snapshot"]
|
|
867
|
+
and curr_node.get("raw_code") is not None
|
|
868
|
+
and base_node.get("raw_code") is not None
|
|
869
|
+
):
|
|
870
|
+
try:
|
|
871
|
+
|
|
872
|
+
def _get_schema(lineage):
|
|
873
|
+
schema = {}
|
|
874
|
+
nodes = lineage["nodes"]
|
|
875
|
+
parent_list = lineage["parent_map"].get(node_id, [])
|
|
876
|
+
for parent_id in parent_list:
|
|
877
|
+
parent_node = nodes.get(parent_id)
|
|
878
|
+
if parent_node is None:
|
|
879
|
+
continue
|
|
880
|
+
columns = parent_node.get("columns") or {}
|
|
881
|
+
name = parent_node.get("name")
|
|
882
|
+
if parent_node.get("resource_type") == "source":
|
|
883
|
+
parts = parent_id.split(".")
|
|
884
|
+
source = parts[2]
|
|
885
|
+
table = parts[3]
|
|
886
|
+
source = source.replace("-", "_")
|
|
887
|
+
name = f"__{source}__{table}"
|
|
888
|
+
schema[name] = {name: column.get("type") for name, column in columns.items()}
|
|
889
|
+
return schema
|
|
890
|
+
|
|
891
|
+
base_sql = self.generate_sql(
|
|
892
|
+
base_node.get("raw_code"),
|
|
893
|
+
context=jinja_context,
|
|
894
|
+
provided_manifest=base_manifest,
|
|
895
|
+
)
|
|
896
|
+
curr_sql = self.generate_sql(
|
|
897
|
+
curr_node.get("raw_code"),
|
|
898
|
+
context=jinja_context,
|
|
899
|
+
provided_manifest=curr_manifest,
|
|
900
|
+
)
|
|
901
|
+
base_schema = _get_schema(base)
|
|
902
|
+
curr_schema = _get_schema(current)
|
|
903
|
+
dialect = self.adapter.connections.TYPE
|
|
904
|
+
if curr_manifest.metadata.adapter_type is not None:
|
|
905
|
+
dialect = curr_manifest.metadata.adapter_type
|
|
906
|
+
|
|
907
|
+
change = parse_change_category(
|
|
908
|
+
base_sql,
|
|
909
|
+
curr_sql,
|
|
910
|
+
old_schema=base_schema,
|
|
911
|
+
new_schema=curr_schema,
|
|
912
|
+
dialect=dialect,
|
|
913
|
+
perf_tracking=breaking_perf_tracker,
|
|
914
|
+
)
|
|
890
915
|
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
916
|
+
# Make sure that the case of the column names are the same
|
|
917
|
+
changed_columns = {
|
|
918
|
+
column.lower(): change_status for column, change_status in (change.columns or {}).items()
|
|
919
|
+
}
|
|
920
|
+
changed_columns_names = set(changed_columns)
|
|
921
|
+
changed_columns_final = {}
|
|
894
922
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
923
|
+
base_columns = base_node.get("columns") or {}
|
|
924
|
+
curr_columns = curr_node.get("columns") or {}
|
|
925
|
+
columns_names = set(base_columns) | set(curr_columns)
|
|
898
926
|
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
927
|
+
for column_name in columns_names:
|
|
928
|
+
if column_name.lower() in changed_columns_names:
|
|
929
|
+
changed_columns_final[column_name] = changed_columns[column_name.lower()]
|
|
902
930
|
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
diff[key] = NodeDiff(change_status="added")
|
|
931
|
+
change.columns = changed_columns_final
|
|
932
|
+
except Exception:
|
|
933
|
+
# TODO: telemetry
|
|
934
|
+
pass
|
|
908
935
|
|
|
909
|
-
|
|
910
|
-
log_performance("
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
diff=diff,
|
|
916
|
-
)
|
|
936
|
+
breaking_perf_tracker.end_lineage_diff()
|
|
937
|
+
log_performance("change analysis per node", breaking_perf_tracker.to_dict())
|
|
938
|
+
breaking_perf_tracker.reset()
|
|
939
|
+
node_diff = diff.get(node_id)
|
|
940
|
+
node_diff.change = change
|
|
941
|
+
return node_diff
|
|
917
942
|
|
|
918
943
|
def get_cll(
|
|
919
944
|
self,
|
|
920
945
|
node_id: Optional[str] = None,
|
|
921
946
|
column: Optional[str] = None,
|
|
922
947
|
change_analysis: Optional[bool] = False,
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
948
|
+
no_cll: Optional[bool] = False,
|
|
949
|
+
no_upstream: Optional[bool] = False,
|
|
950
|
+
no_downstream: Optional[bool] = False,
|
|
926
951
|
no_filter: Optional[bool] = False,
|
|
927
952
|
) -> CllData:
|
|
928
|
-
cll_tracker =
|
|
953
|
+
cll_tracker = LineagePerfTracker()
|
|
954
|
+
cll_tracker.set_params(
|
|
955
|
+
has_node=node_id is not None,
|
|
956
|
+
has_column=column is not None,
|
|
957
|
+
change_analysis=change_analysis,
|
|
958
|
+
no_cll=no_cll,
|
|
959
|
+
no_upstream=no_upstream,
|
|
960
|
+
no_downstream=no_downstream,
|
|
961
|
+
)
|
|
929
962
|
cll_tracker.start_column_lineage()
|
|
930
963
|
|
|
931
964
|
manifest = self.curr_manifest
|
|
@@ -936,47 +969,114 @@ class DbtAdapter(BaseAdapter):
|
|
|
936
969
|
cll_node_ids = {node_id}
|
|
937
970
|
else:
|
|
938
971
|
lineage_diff = self.get_lineage_diff()
|
|
939
|
-
cll_node_ids = lineage_diff.diff.keys()
|
|
972
|
+
cll_node_ids = set(lineage_diff.diff.keys())
|
|
973
|
+
|
|
974
|
+
cll_tracker.set_init_nodes(len(cll_node_ids))
|
|
940
975
|
|
|
941
976
|
nodes = {}
|
|
942
977
|
columns = {}
|
|
943
978
|
parent_map = {}
|
|
944
979
|
child_map = {}
|
|
945
980
|
|
|
946
|
-
if
|
|
981
|
+
if not no_upstream:
|
|
947
982
|
cll_node_ids = cll_node_ids.union(find_upstream(cll_node_ids, manifest_dict.get("parent_map")))
|
|
948
|
-
if
|
|
983
|
+
if not no_downstream:
|
|
949
984
|
cll_node_ids = cll_node_ids.union(find_downstream(cll_node_ids, manifest_dict.get("child_map")))
|
|
950
985
|
|
|
951
|
-
if
|
|
986
|
+
if not no_cll:
|
|
987
|
+
allowed_related_nodes = set()
|
|
988
|
+
for key in ["sources", "nodes", "exposures", "metrics"]:
|
|
989
|
+
attr = getattr(manifest, key)
|
|
990
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
991
|
+
if hasattr(manifest, "semantic_models"):
|
|
992
|
+
attr = getattr(manifest, "semantic_models")
|
|
993
|
+
allowed_related_nodes.update(set(attr.keys()))
|
|
952
994
|
for cll_node_id in cll_node_ids:
|
|
953
|
-
if
|
|
954
|
-
cll_node_id not in manifest.sources
|
|
955
|
-
and cll_node_id not in manifest.nodes
|
|
956
|
-
and cll_node_id not in manifest.exposures
|
|
957
|
-
):
|
|
995
|
+
if cll_node_id not in allowed_related_nodes:
|
|
958
996
|
continue
|
|
959
|
-
cll_data_one = self.get_cll_cached(cll_node_id, base=False)
|
|
997
|
+
cll_data_one = deepcopy(self.get_cll_cached(cll_node_id, base=False))
|
|
998
|
+
cll_tracker.increment_cll_nodes()
|
|
960
999
|
if cll_data_one is None:
|
|
961
1000
|
continue
|
|
962
1001
|
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
1002
|
+
nodes[cll_node_id] = cll_data_one.nodes.get(cll_node_id)
|
|
1003
|
+
node_diff = None
|
|
1004
|
+
if change_analysis:
|
|
1005
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1006
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1007
|
+
if node_diff is not None:
|
|
1008
|
+
nodes[cll_node_id].change_status = node_diff.change_status
|
|
1009
|
+
if node_diff.change is not None:
|
|
1010
|
+
nodes[cll_node_id].change_category = node_diff.change.category
|
|
971
1011
|
for c_id, c in cll_data_one.columns.items():
|
|
972
1012
|
columns[c_id] = c
|
|
973
|
-
if node_diff is not None
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
1013
|
+
if node_diff is not None:
|
|
1014
|
+
if node_diff.change_status == "added":
|
|
1015
|
+
c.change_status = "added"
|
|
1016
|
+
elif node_diff.change_status == "removed":
|
|
1017
|
+
c.change_status = "removed"
|
|
1018
|
+
elif node_diff.change is not None and node_diff.change.columns is not None:
|
|
1019
|
+
column_diff = node_diff.change.columns.get(c.name)
|
|
1020
|
+
if column_diff:
|
|
1021
|
+
c.change_status = column_diff
|
|
977
1022
|
|
|
978
1023
|
for p_id, parents in cll_data_one.parent_map.items():
|
|
979
1024
|
parent_map[p_id] = parents
|
|
1025
|
+
else:
|
|
1026
|
+
for cll_node_id in cll_node_ids:
|
|
1027
|
+
cll_node = None
|
|
1028
|
+
cll_node_columns: Dict[str, CllColumn] = {}
|
|
1029
|
+
|
|
1030
|
+
if cll_node_id in manifest.sources:
|
|
1031
|
+
cll_node = CllNode.build_cll_node(manifest, "sources", cll_node_id)
|
|
1032
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.sources:
|
|
1033
|
+
cll_node_columns = {
|
|
1034
|
+
column.name: CllColumn(
|
|
1035
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1036
|
+
table_id=cll_node_id,
|
|
1037
|
+
name=column.name,
|
|
1038
|
+
type=column.type,
|
|
1039
|
+
)
|
|
1040
|
+
for column in self.curr_catalog.sources[cll_node_id].columns.values()
|
|
1041
|
+
}
|
|
1042
|
+
elif cll_node_id in manifest.nodes:
|
|
1043
|
+
cll_node = CllNode.build_cll_node(manifest, "nodes", cll_node_id)
|
|
1044
|
+
if self.curr_catalog and cll_node_id in self.curr_catalog.nodes:
|
|
1045
|
+
cll_node_columns = {
|
|
1046
|
+
column.name: CllColumn(
|
|
1047
|
+
id=f"{cll_node_id}_{column.name}",
|
|
1048
|
+
table_id=cll_node_id,
|
|
1049
|
+
name=column.name,
|
|
1050
|
+
type=column.type,
|
|
1051
|
+
)
|
|
1052
|
+
for column in self.curr_catalog.nodes[cll_node_id].columns.values()
|
|
1053
|
+
}
|
|
1054
|
+
elif cll_node_id in manifest.exposures:
|
|
1055
|
+
cll_node = CllNode.build_cll_node(manifest, "exposures", cll_node_id)
|
|
1056
|
+
elif hasattr(manifest, "semantic_models") and cll_node_id in manifest.semantic_models:
|
|
1057
|
+
cll_node = CllNode.build_cll_node(manifest, "semantic_models", cll_node_id)
|
|
1058
|
+
elif cll_node_id in manifest.metrics:
|
|
1059
|
+
cll_node = CllNode.build_cll_node(manifest, "metrics", cll_node_id)
|
|
1060
|
+
|
|
1061
|
+
if not cll_node:
|
|
1062
|
+
continue
|
|
1063
|
+
nodes[cll_node_id] = cll_node
|
|
1064
|
+
|
|
1065
|
+
node_diff = None
|
|
1066
|
+
if change_analysis:
|
|
1067
|
+
node_diff = self.get_change_analysis_cached(cll_node_id)
|
|
1068
|
+
cll_tracker.increment_change_analysis_nodes()
|
|
1069
|
+
if node_diff is not None:
|
|
1070
|
+
cll_node.change_status = node_diff.change_status
|
|
1071
|
+
if node_diff.change is not None:
|
|
1072
|
+
cll_node.change_category = node_diff.change.category
|
|
1073
|
+
for c, cll_column in cll_node_columns.items():
|
|
1074
|
+
cll_node.columns[c] = cll_column
|
|
1075
|
+
columns[cll_column.id] = cll_column
|
|
1076
|
+
if node_diff.change.columns and c in node_diff.change.columns:
|
|
1077
|
+
cll_column.change_status = node_diff.change.columns[c]
|
|
1078
|
+
|
|
1079
|
+
parent_map[cll_node_id] = manifest.parent_map.get(cll_node_id, [])
|
|
980
1080
|
|
|
981
1081
|
# build the child map
|
|
982
1082
|
for parent_id, parents in parent_map.items():
|
|
@@ -987,47 +1087,90 @@ class DbtAdapter(BaseAdapter):
|
|
|
987
1087
|
|
|
988
1088
|
# Find the anchor nodes
|
|
989
1089
|
anchor_node_ids = set()
|
|
1090
|
+
extra_node_ids = set()
|
|
990
1091
|
if node_id is None and column is None:
|
|
991
1092
|
if change_analysis:
|
|
992
1093
|
# If change analysis is requested, we need to find the nodes that have changes
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
1094
|
+
lineage_diff = self.get_lineage_diff()
|
|
1095
|
+
for nid, nd in lineage_diff.diff.items():
|
|
1096
|
+
if nd.change_status == "added":
|
|
1097
|
+
anchor_node_ids.add(nid)
|
|
1098
|
+
n = lineage_diff.current["nodes"].get(nid)
|
|
1099
|
+
n_columns = n.get("columns", {})
|
|
1100
|
+
for c in n_columns:
|
|
1101
|
+
anchor_node_ids.add(build_column_key(nid, c))
|
|
1102
|
+
continue
|
|
1103
|
+
if nd.change_status == "removed":
|
|
1104
|
+
extra_node_ids.add(nid)
|
|
1105
|
+
continue
|
|
1106
|
+
|
|
1107
|
+
node_diff = self.get_change_analysis_cached(nid)
|
|
1108
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1109
|
+
extra_node_ids.add(nid)
|
|
1110
|
+
if no_cll:
|
|
1111
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1112
|
+
anchor_node_ids.add(nid)
|
|
1113
|
+
else:
|
|
1114
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1115
|
+
anchor_node_ids.add(nid)
|
|
1116
|
+
if node_diff.change.columns is not None:
|
|
1117
|
+
for column_name in node_diff.change.columns:
|
|
1118
|
+
anchor_node_ids.add(f"{nid}_{column_name}")
|
|
998
1119
|
else:
|
|
999
1120
|
lineage_diff = self.get_lineage_diff()
|
|
1000
1121
|
anchor_node_ids = lineage_diff.diff.keys()
|
|
1001
1122
|
elif node_id is not None and column is None:
|
|
1002
1123
|
if change_analysis:
|
|
1003
1124
|
# If change analysis is requested, we need to find the nodes that have changes
|
|
1004
|
-
node_diff = self.
|
|
1005
|
-
if node_diff:
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1125
|
+
node_diff = self.get_change_analysis_cached(node_id)
|
|
1126
|
+
if node_diff is not None and node_diff.change is not None:
|
|
1127
|
+
extra_node_ids.add(node_id)
|
|
1128
|
+
if no_cll:
|
|
1129
|
+
if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
|
|
1130
|
+
anchor_node_ids.add(node_id)
|
|
1131
|
+
else:
|
|
1132
|
+
if node_diff.change.category in ["breaking", "unknown"]:
|
|
1133
|
+
anchor_node_ids.add(node_id)
|
|
1134
|
+
if node_diff.change.columns is not None:
|
|
1135
|
+
for column_name in node_diff.change.columns:
|
|
1136
|
+
anchor_node_ids.add(f"{node_id}_{column_name}")
|
|
1010
1137
|
else:
|
|
1011
1138
|
anchor_node_ids.add(node_id)
|
|
1012
1139
|
else:
|
|
1013
1140
|
anchor_node_ids.add(node_id)
|
|
1141
|
+
if not no_cll:
|
|
1142
|
+
node = nodes.get(node_id)
|
|
1143
|
+
if node:
|
|
1144
|
+
for column_name in node.columns:
|
|
1145
|
+
column_key = build_column_key(node_id, column_name)
|
|
1146
|
+
anchor_node_ids.add(column_key)
|
|
1014
1147
|
else:
|
|
1015
1148
|
anchor_node_ids.add(f"{node_id}_{column}")
|
|
1016
1149
|
|
|
1150
|
+
cll_tracker.set_anchor_nodes(len(anchor_node_ids))
|
|
1017
1151
|
result_node_ids = set(anchor_node_ids)
|
|
1018
|
-
if
|
|
1152
|
+
if not no_upstream:
|
|
1019
1153
|
result_node_ids = result_node_ids.union(find_upstream(anchor_node_ids, parent_map))
|
|
1020
|
-
if
|
|
1154
|
+
if not no_downstream:
|
|
1021
1155
|
result_node_ids = result_node_ids.union(find_downstream(anchor_node_ids, child_map))
|
|
1022
1156
|
|
|
1023
1157
|
# Filter the nodes and columns based on the anchor nodes
|
|
1024
1158
|
if not no_filter:
|
|
1025
|
-
nodes = {k: v for k, v in nodes.items() if k in result_node_ids}
|
|
1026
|
-
columns = {k: v for k, v in columns.items() if k in result_node_ids}
|
|
1159
|
+
nodes = {k: v for k, v in nodes.items() if k in result_node_ids or k in extra_node_ids}
|
|
1160
|
+
columns = {k: v for k, v in columns.items() if k in result_node_ids or k in extra_node_ids}
|
|
1161
|
+
|
|
1162
|
+
for node in nodes.values():
|
|
1163
|
+
node.columns = {
|
|
1164
|
+
k: v for k, v in node.columns.items() if v.id in result_node_ids or v.id in extra_node_ids
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
if change_analysis:
|
|
1168
|
+
node.impacted = node.id in result_node_ids
|
|
1169
|
+
|
|
1027
1170
|
parent_map, child_map = filter_dependency_maps(parent_map, child_map, result_node_ids)
|
|
1028
1171
|
|
|
1029
1172
|
cll_tracker.end_column_lineage()
|
|
1030
|
-
cll_tracker.set_total_nodes(len(nodes))
|
|
1173
|
+
cll_tracker.set_total_nodes(len(nodes) + len(columns))
|
|
1031
1174
|
log_performance("column level lineage", cll_tracker.to_dict())
|
|
1032
1175
|
cll_tracker.reset()
|
|
1033
1176
|
|
|
@@ -1046,6 +1189,9 @@ class DbtAdapter(BaseAdapter):
|
|
|
1046
1189
|
if node is None:
|
|
1047
1190
|
return None
|
|
1048
1191
|
|
|
1192
|
+
cll_tracker.set_total_nodes(1)
|
|
1193
|
+
cll_tracker.start_column_lineage()
|
|
1194
|
+
|
|
1049
1195
|
def _apply_all_columns(node: CllNode, transformation_type):
|
|
1050
1196
|
cll_data = CllData()
|
|
1051
1197
|
cll_data.nodes[node.id] = node
|
|
@@ -1170,6 +1316,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
1170
1316
|
depends_on.add(parent_key)
|
|
1171
1317
|
column.transformation_type = c2c_map[name].transformation_type
|
|
1172
1318
|
cll_data.parent_map[column_id] = set(depends_on)
|
|
1319
|
+
|
|
1320
|
+
cll_tracker.end_column_lineage()
|
|
1321
|
+
log_performance("column level lineage per node", cll_tracker.to_dict())
|
|
1322
|
+
cll_tracker.reset()
|
|
1173
1323
|
return cll_data
|
|
1174
1324
|
|
|
1175
1325
|
def get_cll_node(self, node_id: str, base: Optional[bool] = False) -> Tuple[Optional[CllNode], list[str]]:
|
|
@@ -1181,21 +1331,12 @@ class DbtAdapter(BaseAdapter):
|
|
|
1181
1331
|
# model, seed, snapshot
|
|
1182
1332
|
if node_id in manifest.nodes:
|
|
1183
1333
|
found = manifest.nodes[node_id]
|
|
1184
|
-
if found.resource_type not in ["model", "seed", "snapshot"]:
|
|
1185
|
-
return None, []
|
|
1186
|
-
|
|
1187
1334
|
unique_id = found.unique_id
|
|
1188
|
-
node = CllNode(
|
|
1189
|
-
id=found.unique_id,
|
|
1190
|
-
name=found.name,
|
|
1191
|
-
package_name=found.package_name,
|
|
1192
|
-
resource_type=found.resource_type,
|
|
1193
|
-
raw_code=found.raw_code,
|
|
1194
|
-
)
|
|
1335
|
+
node = CllNode.build_cll_node(manifest, "nodes", node_id)
|
|
1195
1336
|
if hasattr(found.depends_on, "nodes"):
|
|
1196
1337
|
parent_list = found.depends_on.nodes
|
|
1197
1338
|
|
|
1198
|
-
if catalog is not None and unique_id in catalog.nodes:
|
|
1339
|
+
if catalog is not None and node is not None and unique_id in catalog.nodes:
|
|
1199
1340
|
columns = {}
|
|
1200
1341
|
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
1201
1342
|
column_id = f"{unique_id}_{col_name}"
|
|
@@ -1207,17 +1348,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
1207
1348
|
if node_id in manifest.sources:
|
|
1208
1349
|
found = manifest.sources[node_id]
|
|
1209
1350
|
unique_id = found.unique_id
|
|
1210
|
-
|
|
1211
|
-
node = CllNode(
|
|
1212
|
-
id=found.unique_id,
|
|
1213
|
-
name=found.name,
|
|
1214
|
-
package_name=found.package_name,
|
|
1215
|
-
resource_type=found.resource_type,
|
|
1216
|
-
source_name=found.source_name,
|
|
1217
|
-
)
|
|
1351
|
+
node = CllNode.build_cll_node(manifest, "sources", node_id)
|
|
1218
1352
|
parent_list = []
|
|
1219
1353
|
|
|
1220
|
-
if catalog is not None and unique_id in catalog.sources:
|
|
1354
|
+
if catalog is not None and node is not None and unique_id in catalog.sources:
|
|
1221
1355
|
columns = {}
|
|
1222
1356
|
for col_name, col_metadata in catalog.sources[unique_id].columns.items():
|
|
1223
1357
|
column_id = f"{unique_id}_{col_name}"
|
|
@@ -1228,13 +1362,19 @@ class DbtAdapter(BaseAdapter):
|
|
|
1228
1362
|
# exposure
|
|
1229
1363
|
if node_id in manifest.exposures:
|
|
1230
1364
|
found = manifest.exposures[node_id]
|
|
1365
|
+
node = CllNode.build_cll_node(manifest, "exposures", node_id)
|
|
1366
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1367
|
+
parent_list = found.depends_on.nodes
|
|
1231
1368
|
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1369
|
+
if hasattr(manifest, "semantic_models") and node_id in manifest.semantic_models:
|
|
1370
|
+
found = manifest.semantic_models[node_id]
|
|
1371
|
+
node = CllNode.build_cll_node(manifest, "semantic_models", node_id)
|
|
1372
|
+
if hasattr(found.depends_on, "nodes"):
|
|
1373
|
+
parent_list = found.depends_on.nodes
|
|
1374
|
+
|
|
1375
|
+
if node_id in manifest.metrics:
|
|
1376
|
+
found = manifest.metrics[node_id]
|
|
1377
|
+
node = CllNode.build_cll_node(manifest, "metrics", node_id)
|
|
1238
1378
|
if hasattr(found.depends_on, "nodes"):
|
|
1239
1379
|
parent_list = found.depends_on.nodes
|
|
1240
1380
|
|
|
@@ -1250,73 +1390,6 @@ class DbtAdapter(BaseAdapter):
|
|
|
1250
1390
|
}
|
|
1251
1391
|
return None
|
|
1252
1392
|
|
|
1253
|
-
def get_impact_radius(self, node_id: str) -> CllData:
|
|
1254
|
-
impacted_nodes = self.get_impacted_nodes(node_id)
|
|
1255
|
-
impacted_cll = self.get_impacted_cll(node_id)
|
|
1256
|
-
|
|
1257
|
-
# merge impact radius
|
|
1258
|
-
return self._merge_cll_data(impacted_nodes, impacted_cll)
|
|
1259
|
-
|
|
1260
|
-
def get_impacted_nodes(self, node_id: str) -> CllData:
|
|
1261
|
-
lineage_diff = self.get_lineage_diff()
|
|
1262
|
-
diff_info = lineage_diff.diff.get(node_id)
|
|
1263
|
-
if diff_info is None:
|
|
1264
|
-
return CllData()
|
|
1265
|
-
change_category = diff_info.change.category
|
|
1266
|
-
|
|
1267
|
-
if change_category == "breaking":
|
|
1268
|
-
cll = self.get_cll(node_id, no_filter=True)
|
|
1269
|
-
_, downstream = find_column_dependencies(node_id, cll.parent_map, cll.child_map)
|
|
1270
|
-
relevant_columns = {node_id}
|
|
1271
|
-
relevant_columns.update(downstream)
|
|
1272
|
-
nodes, columns = filter_lineage_vertices(cll.nodes, cll.columns, relevant_columns)
|
|
1273
|
-
p_map, c_map = filter_dependency_maps(cll.parent_map, cll.child_map, relevant_columns)
|
|
1274
|
-
|
|
1275
|
-
return CllData(nodes=nodes, columns=columns, parent_map=p_map, child_map=c_map)
|
|
1276
|
-
|
|
1277
|
-
return CllData()
|
|
1278
|
-
|
|
1279
|
-
def get_impacted_cll(self, node_id: str) -> CllData:
|
|
1280
|
-
lineage_diff = self.get_lineage_diff()
|
|
1281
|
-
diff_info = lineage_diff.diff.get(node_id)
|
|
1282
|
-
if diff_info is None:
|
|
1283
|
-
return CllData()
|
|
1284
|
-
change_columns = diff_info.change.columns
|
|
1285
|
-
|
|
1286
|
-
cll = self.get_cll(node_id, no_filter=True)
|
|
1287
|
-
relevant_columns = set()
|
|
1288
|
-
for col, change_status in change_columns.items():
|
|
1289
|
-
if change_status == "removed":
|
|
1290
|
-
continue
|
|
1291
|
-
target_column = f"{node_id}_{col}"
|
|
1292
|
-
_, downstream = find_column_dependencies(target_column, cll.parent_map, cll.child_map)
|
|
1293
|
-
relevant_columns.add(target_column)
|
|
1294
|
-
relevant_columns.update(downstream)
|
|
1295
|
-
|
|
1296
|
-
nodes, columns = filter_lineage_vertices(cll.nodes, cll.columns, relevant_columns)
|
|
1297
|
-
p_map, c_map = filter_dependency_maps(cll.parent_map, cll.child_map, relevant_columns)
|
|
1298
|
-
|
|
1299
|
-
return CllData(nodes=nodes, columns=columns, parent_map=p_map, child_map=c_map)
|
|
1300
|
-
|
|
1301
|
-
@staticmethod
|
|
1302
|
-
def _merge_cll_data(base: CllData, target: CllData) -> CllData:
|
|
1303
|
-
merged_nodes = {**base.nodes, **target.nodes}
|
|
1304
|
-
merged_columns = {**base.columns, **target.columns}
|
|
1305
|
-
|
|
1306
|
-
merged_parent_map = {}
|
|
1307
|
-
merged_keys = set(base.parent_map.keys()).union(set(target.parent_map.keys()))
|
|
1308
|
-
for key in merged_keys:
|
|
1309
|
-
merged_parent_map[key] = base.parent_map.get(key, set()).union(target.parent_map.get(key, set()))
|
|
1310
|
-
|
|
1311
|
-
merged_child_map = {}
|
|
1312
|
-
merged_keys = set(base.child_map.keys()).union(set(target.child_map.keys()))
|
|
1313
|
-
for key in merged_keys:
|
|
1314
|
-
merged_child_map[key] = base.child_map.get(key, set()).union(target.child_map.get(key, set()))
|
|
1315
|
-
|
|
1316
|
-
return CllData(
|
|
1317
|
-
nodes=merged_nodes, columns=merged_columns, parent_map=merged_parent_map, child_map=merged_child_map
|
|
1318
|
-
)
|
|
1319
|
-
|
|
1320
1393
|
def build_name_to_unique_id_index(self) -> Dict[str, str]:
|
|
1321
1394
|
name_to_unique_id = {}
|
|
1322
1395
|
curr_manifest = self.get_manifest(base=False)
|
|
@@ -1404,13 +1477,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
1404
1477
|
self.curr_manifest = load_manifest(path=refresh_file_path)
|
|
1405
1478
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1406
1479
|
self.get_cll_cached.cache_clear()
|
|
1480
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1407
1481
|
elif refresh_file_path.endswith("catalog.json"):
|
|
1408
1482
|
self.curr_catalog = load_catalog(path=refresh_file_path)
|
|
1483
|
+
self.get_cll_cached.cache_clear()
|
|
1484
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1409
1485
|
elif self.base_path and target_type == os.path.basename(self.base_path):
|
|
1410
1486
|
if refresh_file_path.endswith("manifest.json"):
|
|
1411
1487
|
self.base_manifest = load_manifest(path=refresh_file_path)
|
|
1488
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1412
1489
|
elif refresh_file_path.endswith("catalog.json"):
|
|
1413
1490
|
self.base_catalog = load_catalog(path=refresh_file_path)
|
|
1491
|
+
self.get_change_analysis_cached.cache_clear()
|
|
1414
1492
|
|
|
1415
1493
|
def create_relation(self, model, base=False):
|
|
1416
1494
|
node = self.find_node_by_name(model, base)
|
|
@@ -1522,7 +1600,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1522
1600
|
if not os.path.isfile(path):
|
|
1523
1601
|
return None
|
|
1524
1602
|
|
|
1525
|
-
with open(path, "r") as f:
|
|
1603
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
1526
1604
|
json_content = f.read()
|
|
1527
1605
|
return json.loads(json_content)
|
|
1528
1606
|
|