recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (213) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +810 -480
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +119 -51
  12. recce/cli.py +1299 -323
  13. recce/config.py +42 -33
  14. recce/connect_to_cloud.py +138 -0
  15. recce/core.py +55 -47
  16. recce/data/404.html +1 -1
  17. recce/data/__next.__PAGE__.txt +10 -0
  18. recce/data/__next._full.txt +23 -0
  19. recce/data/__next._head.txt +8 -0
  20. recce/data/__next._index.txt +8 -0
  21. recce/data/__next._tree.txt +5 -0
  22. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
  23. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
  24. recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
  25. recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
  26. recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
  27. recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
  28. recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
  29. recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
  30. recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
  31. recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
  32. recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
  33. recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
  34. recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
  35. recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
  36. recce/data/_next/static/chunks/99d638224186c118.js +1 -0
  37. recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
  38. recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
  39. recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
  40. recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
  41. recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
  42. recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
  43. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  44. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  45. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  46. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  47. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  48. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  49. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  50. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  51. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  52. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  53. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  54. recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
  55. recce/data/_not-found/__next._full.txt +17 -0
  56. recce/data/_not-found/__next._head.txt +8 -0
  57. recce/data/_not-found/__next._index.txt +8 -0
  58. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  59. recce/data/_not-found/__next._not-found.txt +4 -0
  60. recce/data/_not-found/__next._tree.txt +3 -0
  61. recce/data/_not-found.html +1 -0
  62. recce/data/_not-found.txt +17 -0
  63. recce/data/auth_callback.html +68 -0
  64. recce/data/imgs/reload-image.svg +4 -0
  65. recce/data/index.html +1 -27
  66. recce/data/index.txt +23 -7
  67. recce/diff.py +6 -12
  68. recce/event/__init__.py +86 -74
  69. recce/event/collector.py +33 -22
  70. recce/event/track.py +49 -27
  71. recce/exceptions.py +1 -1
  72. recce/git.py +7 -7
  73. recce/github.py +57 -53
  74. recce/mcp_server.py +716 -0
  75. recce/models/__init__.py +4 -1
  76. recce/models/check.py +6 -7
  77. recce/models/run.py +1 -0
  78. recce/models/types.py +131 -28
  79. recce/pull_request.py +27 -25
  80. recce/run.py +165 -121
  81. recce/server.py +303 -111
  82. recce/state/__init__.py +31 -0
  83. recce/state/cloud.py +632 -0
  84. recce/state/const.py +26 -0
  85. recce/state/local.py +56 -0
  86. recce/state/state.py +119 -0
  87. recce/state/state_loader.py +174 -0
  88. recce/summary.py +188 -143
  89. recce/tasks/__init__.py +19 -3
  90. recce/tasks/core.py +11 -13
  91. recce/tasks/dataframe.py +82 -18
  92. recce/tasks/histogram.py +69 -34
  93. recce/tasks/lineage.py +2 -2
  94. recce/tasks/profile.py +152 -86
  95. recce/tasks/query.py +139 -87
  96. recce/tasks/rowcount.py +37 -31
  97. recce/tasks/schema.py +18 -15
  98. recce/tasks/top_k.py +35 -35
  99. recce/tasks/valuediff.py +216 -152
  100. recce/util/__init__.py +3 -0
  101. recce/util/api_token.py +80 -0
  102. recce/util/breaking.py +87 -85
  103. recce/util/cll.py +274 -219
  104. recce/util/io.py +22 -17
  105. recce/util/lineage.py +65 -16
  106. recce/util/logger.py +1 -1
  107. recce/util/onboarding_state.py +45 -0
  108. recce/util/perf_tracking.py +85 -0
  109. recce/util/recce_cloud.py +322 -72
  110. recce/util/singleton.py +4 -4
  111. recce/yaml/__init__.py +7 -10
  112. recce_cloud/__init__.py +24 -0
  113. recce_cloud/api/__init__.py +17 -0
  114. recce_cloud/api/base.py +111 -0
  115. recce_cloud/api/client.py +150 -0
  116. recce_cloud/api/exceptions.py +26 -0
  117. recce_cloud/api/factory.py +63 -0
  118. recce_cloud/api/github.py +76 -0
  119. recce_cloud/api/gitlab.py +82 -0
  120. recce_cloud/artifact.py +57 -0
  121. recce_cloud/ci_providers/__init__.py +9 -0
  122. recce_cloud/ci_providers/base.py +82 -0
  123. recce_cloud/ci_providers/detector.py +147 -0
  124. recce_cloud/ci_providers/github_actions.py +136 -0
  125. recce_cloud/ci_providers/gitlab_ci.py +130 -0
  126. recce_cloud/cli.py +245 -0
  127. recce_cloud/upload.py +214 -0
  128. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
  129. recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
  130. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
  131. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
  132. tests/adapter/dbt_adapter/conftest.py +9 -5
  133. tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
  134. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  135. tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
  136. tests/adapter/dbt_adapter/test_selector.py +22 -21
  137. tests/recce_cloud/__init__.py +0 -0
  138. tests/recce_cloud/test_ci_providers.py +351 -0
  139. tests/recce_cloud/test_cli.py +372 -0
  140. tests/recce_cloud/test_client.py +273 -0
  141. tests/recce_cloud/test_platform_clients.py +333 -0
  142. tests/tasks/conftest.py +1 -1
  143. tests/tasks/test_histogram.py +58 -66
  144. tests/tasks/test_lineage.py +36 -23
  145. tests/tasks/test_preset_checks.py +45 -31
  146. tests/tasks/test_profile.py +339 -15
  147. tests/tasks/test_query.py +46 -46
  148. tests/tasks/test_row_count.py +65 -46
  149. tests/tasks/test_schema.py +65 -42
  150. tests/tasks/test_top_k.py +22 -18
  151. tests/tasks/test_valuediff.py +43 -32
  152. tests/test_cli.py +174 -60
  153. tests/test_cli_mcp_optional.py +45 -0
  154. tests/test_cloud_listing_cli.py +324 -0
  155. tests/test_config.py +7 -9
  156. tests/test_connect_to_cloud.py +82 -0
  157. tests/test_core.py +151 -4
  158. tests/test_dbt.py +7 -7
  159. tests/test_mcp_server.py +332 -0
  160. tests/test_pull_request.py +1 -1
  161. tests/test_server.py +25 -19
  162. tests/test_summary.py +29 -17
  163. recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
  164. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  165. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  166. recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
  167. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  168. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  169. recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
  170. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  171. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  172. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  173. recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
  174. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  175. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  176. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  177. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  178. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  179. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  180. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  181. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  182. recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
  183. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  184. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  185. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  186. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  187. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  188. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  189. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  190. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  191. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  192. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  193. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  194. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  195. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  196. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  197. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  198. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  199. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  200. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  202. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  203. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  205. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  206. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  207. recce/state.py +0 -753
  208. recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
  209. tests/test_state.py +0 -123
  210. /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
  211. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  212. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
  213. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
@@ -8,14 +8,33 @@ from dataclasses import dataclass, fields
8
8
  from errno import ENOENT
9
9
  from functools import lru_cache
10
10
  from pathlib import Path
11
- from typing import Callable, Dict, List, Optional, Tuple, Iterator, Any, Set, Union, Literal, Type
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Dict,
15
+ Iterator,
16
+ List,
17
+ Literal,
18
+ Optional,
19
+ Set,
20
+ Tuple,
21
+ Type,
22
+ Union,
23
+ )
12
24
 
13
25
  from recce.event import log_performance
14
26
  from recce.exceptions import RecceException
15
- from recce.util.cll import cll, CLLPerformanceTracking
16
- from recce.util.lineage import find_upstream, find_downstream
27
+ from recce.util.cll import CLLPerformanceTracking, cll
28
+ from recce.util.lineage import (
29
+ build_column_key,
30
+ filter_dependency_maps,
31
+ find_downstream,
32
+ find_upstream,
33
+ )
34
+ from recce.util.perf_tracking import LineagePerfTracker
35
+
17
36
  from ...tasks.profile import ProfileTask
18
- from ...util.breaking import parse_change_category, BreakingPerformanceTracking
37
+ from ...util.breaking import BreakingPerformanceTracking, parse_change_category
19
38
 
20
39
  try:
21
40
  import agate
@@ -30,11 +49,30 @@ from watchdog.observers import Observer
30
49
 
31
50
  from recce.adapter.base import BaseAdapter
32
51
  from recce.state import ArtifactsRoot
33
- from .dbt_version import DbtVersion
52
+
34
53
  from ...models import RunType
35
- from ...models.types import LineageDiff, NodeDiff, NodeChange
36
- from ...tasks import Task, QueryTask, QueryBaseTask, QueryDiffTask, ValueDiffTask, ValueDiffDetailTask, ProfileDiffTask, \
37
- RowCountTask, RowCountDiffTask, TopKDiffTask, HistogramDiffTask
54
+ from ...models.types import (
55
+ CllColumn,
56
+ CllData,
57
+ CllNode,
58
+ LineageDiff,
59
+ NodeChange,
60
+ NodeDiff,
61
+ )
62
+ from ...tasks import (
63
+ HistogramDiffTask,
64
+ ProfileDiffTask,
65
+ QueryBaseTask,
66
+ QueryDiffTask,
67
+ QueryTask,
68
+ RowCountDiffTask,
69
+ RowCountTask,
70
+ Task,
71
+ TopKDiffTask,
72
+ ValueDiffDetailTask,
73
+ ValueDiffTask,
74
+ )
75
+ from .dbt_version import DbtVersion
38
76
 
39
77
  dbt_supported_registry: Dict[RunType, Type[Task]] = {
40
78
  RunType.QUERY: QueryTask,
@@ -56,7 +94,7 @@ get_adapter_orig = dbt.adapters.factory.get_adapter
56
94
 
57
95
 
58
96
  def get_adapter(config):
59
- if hasattr(config, 'adapter'):
97
+ if hasattr(config, "adapter"):
60
98
  return config.adapter
61
99
  else:
62
100
  return get_adapter_orig(config)
@@ -69,7 +107,11 @@ from dbt.adapters.base import Column # noqa: E402
69
107
  from dbt.adapters.factory import get_adapter_class_by_name # noqa: E402
70
108
  from dbt.adapters.sql import SQLAdapter # noqa: E402
71
109
  from dbt.config.runtime import RuntimeConfig # noqa: E402
72
- from dbt.contracts.graph.manifest import Manifest, WritableManifest, MacroManifest # noqa: E402
110
+ from dbt.contracts.graph.manifest import ( # noqa: E402
111
+ MacroManifest,
112
+ Manifest,
113
+ WritableManifest,
114
+ )
73
115
  from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
74
116
  from dbt.contracts.results import CatalogArtifact # noqa: E402
75
117
  from dbt.flags import set_from_args # noqa: E402
@@ -78,7 +120,7 @@ from dbt.parser.sql import SqlBlockParser # noqa: E402
78
120
 
79
121
  dbt_version = DbtVersion()
80
122
 
81
- if dbt_version < 'v1.8':
123
+ if dbt_version < "v1.8":
82
124
  from dbt.contracts.connection import Connection
83
125
  else:
84
126
  from dbt.adapters.contracts.connection import Connection
@@ -86,19 +128,22 @@ else:
86
128
 
87
129
  @contextmanager
88
130
  def silence_no_nodes_warning():
89
- if dbt_version >= 'v1.8':
131
+ if dbt_version >= "v1.8":
90
132
  from dbt.events.types import NoNodesForSelectionCriteria
91
133
  from dbt_common.events.functions import WARN_ERROR_OPTIONS
134
+
92
135
  WARN_ERROR_OPTIONS.silence.append(NoNodesForSelectionCriteria.__name__)
93
136
  try:
94
137
  yield
95
138
  finally:
96
- if dbt_version >= 'v1.8':
139
+ if dbt_version >= "v1.8":
97
140
  from dbt_common.events.functions import WARN_ERROR_OPTIONS
141
+
98
142
  WARN_ERROR_OPTIONS.silence.pop()
99
143
 
100
144
 
101
- logger = logging.getLogger('uvicorn')
145
+ logger = logging.getLogger("uvicorn")
146
+ MIN_DBT_NODE_COMPOSITION = 3
102
147
 
103
148
 
104
149
  class ArtifactsEventHandler(FileSystemEventHandler):
@@ -147,16 +192,18 @@ class EnvironmentEventHandler(FileSystemEventHandler):
147
192
 
148
193
 
149
194
  def merge_tables(tables: List[agate.Table]) -> agate.Table:
150
- if dbt_version < 'v1.8':
195
+ if dbt_version < "v1.8":
151
196
  from dbt.clients.agate_helper import merge_tables
197
+
152
198
  return merge_tables(tables)
153
199
  else:
154
200
  from dbt_common.clients.agate_helper import merge_tables
201
+
155
202
  return merge_tables(tables)
156
203
 
157
204
 
158
205
  def as_manifest(m: WritableManifest) -> Manifest:
159
- if dbt_version < 'v1.8':
206
+ if dbt_version < "v1.8":
160
207
  data = m.__dict__
161
208
  all_fields = set([x.name for x in fields(Manifest)])
162
209
  new_data = {k: v for k, v in data.items() if k in all_fields}
@@ -184,12 +231,13 @@ def load_catalog(path: str = None, data: dict = None):
184
231
 
185
232
 
186
233
  def previous_state(state_path: Path, target_path: Path, project_root: Path) -> PreviousState:
187
- if dbt_version < 'v1.5.2':
234
+ if dbt_version < "v1.5.2":
188
235
  return PreviousState(state_path, target_path)
189
236
  else:
190
237
  try:
191
238
  # Overwrite the level_tag method temporarily to avoid the warning message
192
- from dbt.events.types import WarnStateTargetEqual, EventLevel
239
+ from dbt.events.types import EventLevel, WarnStateTargetEqual
240
+
193
241
  original_level_tag_func = WarnStateTargetEqual.level_tag
194
242
  WarnStateTargetEqual.level_tag = lambda x: EventLevel.DEBUG
195
243
  except ImportError:
@@ -209,12 +257,12 @@ def previous_state(state_path: Path, target_path: Path, project_root: Path) -> P
209
257
  def default_profiles_dir():
210
258
  # Precedence: DBT_PROFILES_DIR > current working directory > ~/.dbt/
211
259
  # https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
212
- if os.getenv('DBT_PROFILES_DIR'):
213
- return os.getenv('DBT_PROFILES_DIR')
214
- elif os.path.exists(os.path.join(os.getcwd(), 'profiles.yml')):
260
+ if os.getenv("DBT_PROFILES_DIR"):
261
+ return os.getenv("DBT_PROFILES_DIR")
262
+ elif os.path.exists(os.path.join(os.getcwd(), "profiles.yml")):
215
263
  return os.getcwd()
216
264
  else:
217
- return os.path.expanduser('~/.dbt/')
265
+ return os.path.expanduser("~/.dbt/")
218
266
 
219
267
 
220
268
  @dataclass()
@@ -222,15 +270,16 @@ class DbtArgs:
222
270
  """
223
271
  Used for RuntimeConfig.from_args
224
272
  """
225
- threads: Optional[int] = 1,
226
- target: Optional[str] = None,
227
- profiles_dir: Optional[str] = None,
228
- project_dir: Optional[str] = None,
229
- profile: Optional[str] = None,
230
- target_path: Optional[str] = None,
273
+
274
+ threads: Optional[int] = (1,)
275
+ target: Optional[str] = (None,)
276
+ profiles_dir: Optional[str] = (None,)
277
+ project_dir: Optional[str] = (None,)
278
+ profile: Optional[str] = (None,)
279
+ target_path: Optional[str] = (None,)
231
280
  project_only_flags: Optional[Dict[str, Any]] = None
232
281
  which: Optional[str] = None
233
- state_modified_compare_more_unrendered_values: Optional[bool] = False # new flag added since dbt v1.9
282
+ state_modified_compare_more_unrendered_values: Optional[bool] = True # new flag added since dbt v1.9
234
283
 
235
284
 
236
285
  @dataclass
@@ -258,32 +307,18 @@ class DbtAdapter(BaseAdapter):
258
307
 
259
308
  def support_tasks(self):
260
309
  support_map = {run_type.value: True for run_type in dbt_supported_registry}
261
- supported_dbt_packages = set([package.package_name for package in self.manifest.macros.values()])
262
-
263
- if 'dbt_profiler' not in supported_dbt_packages:
264
- support_map[RunType.PROFILE_DIFF.value] = False
265
- support_map[RunType.PROFILE.value] = False
266
-
267
- if 'audit_helper' not in supported_dbt_packages:
268
- support_map[RunType.VALUE_DIFF.value] = False
269
- support_map[RunType.VALUE_DIFF_DETAIL.value] = False
270
- support_map['query_diff_with_primary_key'] = False
271
310
 
272
311
  return support_map
273
312
 
274
313
  @classmethod
275
- def load(cls,
276
- no_artifacts=False,
277
- review=False,
278
- **kwargs):
279
-
280
- target = kwargs.get('target')
281
- target_path = kwargs.get('target_path', 'target')
282
- target_base_path = kwargs.get('target_base_path', 'target-base')
314
+ def load(cls, no_artifacts=False, review=False, **kwargs):
315
+ target = kwargs.get("target")
316
+ target_path = kwargs.get("target_path", "target")
317
+ target_base_path = kwargs.get("target_base_path", "target-base")
283
318
 
284
- profile_name = kwargs.get('profile')
285
- project_dir = kwargs.get('project_dir')
286
- profiles_dir = kwargs.get('profiles_dir')
319
+ profile_name = kwargs.get("profile")
320
+ project_dir = kwargs.get("project_dir")
321
+ profiles_dir = kwargs.get("profiles_dir")
287
322
 
288
323
  if profiles_dir is None:
289
324
  profiles_dir = default_profiles_dir()
@@ -297,21 +332,25 @@ class DbtAdapter(BaseAdapter):
297
332
  profiles_dir=profiles_dir,
298
333
  profile=profile_name,
299
334
  project_only_flags={},
300
- which='list'
335
+ which="list",
301
336
  )
302
337
  set_from_args(args, args)
303
338
 
304
339
  from dbt.exceptions import DbtProjectError
340
+
305
341
  try:
306
342
  # adapter
307
- if dbt_version < 'v1.8':
343
+ if dbt_version < "v1.8":
308
344
  runtime_config = RuntimeConfig.from_args(args)
309
345
  adapter_name = runtime_config.credentials.type
310
346
  adapter_cls = get_adapter_class_by_name(adapter_name)
311
347
  adapter: SQLAdapter = adapter_cls(runtime_config)
312
348
  else:
313
- from dbt_common.context import set_invocation_context, get_invocation_context
314
349
  from dbt.mp_context import get_mp_context
350
+ from dbt_common.context import (
351
+ get_invocation_context,
352
+ set_invocation_context,
353
+ )
315
354
 
316
355
  set_invocation_context({})
317
356
  get_invocation_context()._env = dict(os.environ)
@@ -320,6 +359,7 @@ class DbtAdapter(BaseAdapter):
320
359
  adapter_cls = get_adapter_class_by_name(adapter_name)
321
360
  adapter: SQLAdapter = adapter_cls(runtime_config, get_mp_context())
322
361
  from dbt.adapters.factory import FACTORY
362
+
323
363
  FACTORY.adapters[adapter_name] = adapter
324
364
 
325
365
  adapter.connections.set_connection_name()
@@ -329,7 +369,7 @@ class DbtAdapter(BaseAdapter):
329
369
  runtime_config=runtime_config,
330
370
  adapter=adapter,
331
371
  review_mode=review,
332
- base_path=target_base_path
372
+ base_path=target_base_path,
333
373
  )
334
374
  except DbtProjectError as e:
335
375
  raise e
@@ -350,27 +390,26 @@ class DbtAdapter(BaseAdapter):
350
390
 
351
391
  def get_columns(self, model: str, base=False) -> List[Column]:
352
392
  relation = self.create_relation(model, base)
353
- get_columns_macro = 'get_columns_in_relation'
354
- if self.adapter.connections.TYPE == 'databricks':
355
- get_columns_macro = 'get_columns_comments'
393
+ get_columns_macro = "get_columns_in_relation"
394
+ if self.adapter.connections.TYPE == "databricks":
395
+ get_columns_macro = "get_columns_comments"
356
396
 
357
- if dbt_version < 'v1.8':
397
+ if dbt_version < "v1.8":
358
398
  columns = self.adapter.execute_macro(
359
- get_columns_macro,
360
- kwargs={"relation": relation},
361
- manifest=self.manifest)
399
+ get_columns_macro, kwargs={"relation": relation}, manifest=self.manifest
400
+ )
362
401
  else:
363
402
  from dbt.context.providers import generate_runtime_macro_context
403
+
364
404
  macro_manifest = MacroManifest(self.manifest.macros)
365
405
  self.adapter.set_macro_resolver(macro_manifest)
366
406
  self.adapter.set_macro_context_generator(generate_runtime_macro_context)
367
- columns = self.adapter.execute_macro(
368
- get_columns_macro,
369
- kwargs={"relation": relation})
407
+ columns = self.adapter.execute_macro(get_columns_macro, kwargs={"relation": relation})
370
408
 
371
- if self.adapter.connections.TYPE == 'databricks':
409
+ if self.adapter.connections.TYPE == "databricks":
372
410
  # reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
373
- from dbt.adapters.databricks import DatabricksColumn
411
+ from dbt.adapters.databricks.column import DatabricksColumn
412
+
374
413
  rows = columns
375
414
  columns = []
376
415
  for row in rows:
@@ -378,7 +417,9 @@ class DbtAdapter(BaseAdapter):
378
417
  break
379
418
  columns.append(
380
419
  DatabricksColumn(
381
- column=row["col_name"], dtype=row["data_type"], comment=row["comment"]
420
+ column=row["col_name"],
421
+ dtype=row["data_type"],
422
+ comment=row["comment"],
382
423
  )
383
424
  )
384
425
  return columns
@@ -389,29 +430,29 @@ class DbtAdapter(BaseAdapter):
389
430
  manifest = self.curr_manifest if base is False else self.base_manifest
390
431
  manifest_dict = manifest.to_dict()
391
432
 
392
- node = manifest_dict['nodes'].get(model_id)
433
+ node = manifest_dict["nodes"].get(model_id)
393
434
  if node is None:
394
435
  return {}
395
436
 
396
- node_name = node['name']
397
- with self.adapter.connection_named('model'):
437
+ node_name = node["name"]
438
+ with self.adapter.connection_named("model"):
398
439
  columns = [column for column in self.get_columns(node_name, base=base)]
399
440
 
400
- child_map: List[str] = manifest_dict['child_map'][model_id]
441
+ child_map: List[str] = manifest_dict["child_map"][model_id]
401
442
  cols_not_null = []
402
443
  cols_unique = []
403
444
 
404
445
  for child in child_map:
405
- comps = child.split('.')
446
+ comps = child.split(".")
406
447
  child_type = comps[0]
407
448
  child_name = comps[2]
408
449
 
409
- not_null_prefix = f'not_null_{node_name}_'
410
- if child_type == 'test' and child_name.startswith(not_null_prefix):
411
- cols_not_null.append(child_name[len(not_null_prefix):])
412
- unique_prefix = f'unique_{node_name}_'
413
- if child_type == 'test' and child_name.startswith(unique_prefix):
414
- cols_unique.append(child_name[len(unique_prefix):])
450
+ not_null_prefix = f"not_null_{node_name}_"
451
+ if child_type == "test" and child_name.startswith(not_null_prefix):
452
+ cols_not_null.append(child_name[len(not_null_prefix) :])
453
+ unique_prefix = f"unique_{node_name}_"
454
+ if child_type == "test" and child_name.startswith(unique_prefix):
455
+ cols_unique.append(child_name[len(unique_prefix) :])
415
456
 
416
457
  columns_info = {}
417
458
  primary_key = None
@@ -419,16 +460,16 @@ class DbtAdapter(BaseAdapter):
419
460
  col_name = c.column
420
461
  col = dict(name=col_name, type=c.dtype)
421
462
  if col_name in cols_not_null:
422
- col['not_null'] = True
463
+ col["not_null"] = True
423
464
  if col_name in cols_unique:
424
- col['unique'] = True
465
+ col["unique"] = True
425
466
  if not primary_key:
426
467
  primary_key = col_name
427
468
  columns_info[col_name] = col
428
469
 
429
470
  result = dict(columns=columns_info)
430
471
  if primary_key:
431
- result['primary_key'] = primary_key
472
+ result["primary_key"] = primary_key
432
473
 
433
474
  return result
434
475
 
@@ -437,7 +478,7 @@ class DbtAdapter(BaseAdapter):
437
478
  Load the artifacts from the 'target' and 'target-base' directory
438
479
  """
439
480
  if self.runtime_config is None:
440
- raise Exception('Cannot find the dbt project configuration')
481
+ raise Exception("Cannot find the dbt project configuration")
441
482
 
442
483
  project_root = self.runtime_config.project_root
443
484
  target_path = self.runtime_config.target_path
@@ -446,17 +487,17 @@ class DbtAdapter(BaseAdapter):
446
487
  self.base_path = os.path.join(project_root, target_base_path)
447
488
 
448
489
  # load the artifacts
449
- path = os.path.join(project_root, target_path, 'manifest.json')
490
+ path = os.path.join(project_root, target_path, "manifest.json")
450
491
  curr_manifest = load_manifest(path=path)
451
492
  if curr_manifest is None:
452
493
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
453
- path = os.path.join(project_root, target_base_path, 'manifest.json')
494
+ path = os.path.join(project_root, target_base_path, "manifest.json")
454
495
  base_manifest = load_manifest(path=path)
455
496
  if base_manifest is None:
456
497
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
457
498
 
458
- curr_catalog = load_catalog(path=os.path.join(project_root, target_path, 'catalog.json'))
459
- base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, 'catalog.json'))
499
+ curr_catalog = load_catalog(path=os.path.join(project_root, target_path, "catalog.json"))
500
+ base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, "catalog.json"))
460
501
 
461
502
  # set the value if all the artifacts are loaded successfully
462
503
  self.curr_manifest = curr_manifest
@@ -474,22 +515,21 @@ class DbtAdapter(BaseAdapter):
474
515
 
475
516
  # set the file paths to watch
476
517
  self.artifacts_files = [
477
- os.path.join(project_root, target_path, 'manifest.json'),
478
- os.path.join(project_root, target_path, 'catalog.json'),
479
- os.path.join(project_root, target_base_path, 'manifest.json'),
480
- os.path.join(project_root, target_base_path, 'catalog.json'),
518
+ os.path.join(project_root, target_path, "manifest.json"),
519
+ os.path.join(project_root, target_path, "catalog.json"),
520
+ os.path.join(project_root, target_base_path, "manifest.json"),
521
+ os.path.join(project_root, target_base_path, "catalog.json"),
481
522
  ]
482
523
 
483
524
  def is_python_model(self, node_id: str, base: Optional[bool] = False):
484
525
  manifest = self.curr_manifest if base is False else self.base_manifest
485
526
  model = manifest.nodes.get(node_id)
486
- if hasattr(model, 'language'):
487
- return model.language == 'python'
527
+ if hasattr(model, "language"):
528
+ return model.language == "python"
488
529
 
489
530
  return False
490
531
 
491
532
  def find_node_by_name(self, node_name, base=False) -> Optional[ManifestNode]:
492
-
493
533
  manifest = self.curr_manifest if base is False else self.base_manifest
494
534
 
495
535
  for key, node in manifest.nodes.items():
@@ -499,22 +539,22 @@ class DbtAdapter(BaseAdapter):
499
539
  return None
500
540
 
501
541
  def get_node_name_by_id(self, unique_id):
502
- if unique_id.startswith('source.'):
542
+ if unique_id.startswith("source."):
503
543
  if unique_id in self.curr_manifest.sources:
504
544
  return self.curr_manifest.sources[unique_id].name
505
545
  elif unique_id in self.base_manifest.sources:
506
546
  return self.base_manifest.sources[unique_id].name
507
- elif unique_id.startswith('metric.'):
547
+ elif unique_id.startswith("metric."):
508
548
  if unique_id in self.curr_manifest.metrics:
509
549
  return self.curr_manifest.metrics[unique_id].name
510
550
  elif unique_id in self.base_manifest.metrics:
511
551
  return self.base_manifest.metrics[unique_id].name
512
- elif unique_id.startswith('exposure.'):
552
+ elif unique_id.startswith("exposure."):
513
553
  if unique_id in self.curr_manifest.exposures:
514
554
  return self.curr_manifest.exposures[unique_id].name
515
555
  elif unique_id in self.base_manifest.exposures:
516
556
  return self.base_manifest.exposures[unique_id].name
517
- elif unique_id.startswith('semantic_model.'):
557
+ elif unique_id.startswith("semantic_model."):
518
558
  if unique_id in self.curr_manifest.semantic_models:
519
559
  return self.curr_manifest.semantic_models[unique_id].name
520
560
  elif unique_id in self.base_manifest.semantic_models:
@@ -529,14 +569,24 @@ class DbtAdapter(BaseAdapter):
529
569
  def get_manifest(self, base: bool):
530
570
  return self.curr_manifest if base is False else self.base_manifest
531
571
 
532
- def generate_sql(self, sql_template: str, base: bool = False, context=None, provided_manifest=None):
572
+ def generate_sql(
573
+ self,
574
+ sql_template: str,
575
+ base: bool = False,
576
+ context=None,
577
+ provided_manifest=None,
578
+ ):
533
579
  if context is None:
534
580
  context = {}
535
581
  manifest = provided_manifest if provided_manifest is not None else as_manifest(self.get_manifest(base))
536
582
  parser = SqlBlockParser(self.runtime_config, manifest, self.runtime_config)
537
583
 
538
- if dbt_version >= dbt_version.parse('v1.8'):
539
- from dbt_common.context import set_invocation_context, get_invocation_context
584
+ if dbt_version >= dbt_version.parse("v1.8"):
585
+ from dbt_common.context import (
586
+ get_invocation_context,
587
+ set_invocation_context,
588
+ )
589
+
540
590
  set_invocation_context({})
541
591
  get_invocation_context()._env = dict(os.environ)
542
592
 
@@ -544,21 +594,35 @@ class DbtAdapter(BaseAdapter):
544
594
  node = parser.parse_remote(sql_template, node_id)
545
595
  process_node(self.runtime_config, manifest, node)
546
596
 
547
- if dbt_version < dbt_version.parse('v1.8'):
597
+ if dbt_version < dbt_version.parse("v1.8"):
548
598
  compiler = self.adapter.get_compiler()
549
599
  compiler.compile_node(node, manifest, context)
550
600
  return node.compiled_code
551
601
  else:
552
- from dbt.context.providers import generate_runtime_model_context
553
602
  from dbt.clients import jinja
603
+ from dbt.context.providers import (
604
+ generate_runtime_macro_context,
605
+ generate_runtime_model_context,
606
+ )
607
+
608
+ # Set up macro resolver for dbt >= 1.8
609
+ macro_manifest = MacroManifest(manifest.macros)
610
+ self.adapter.set_macro_resolver(macro_manifest)
611
+ self.adapter.set_macro_context_generator(generate_runtime_macro_context)
612
+
554
613
  jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
555
614
  jinja_ctx.update(context)
556
615
  compiled_code = jinja.get_rendered(sql_template, jinja_ctx, node)
557
616
  return compiled_code
558
617
 
559
- def execute(self, sql: str, auto_begin: bool = False, fetch: bool = False, limit: Optional[int] = None) -> Tuple[
560
- any, agate.Table]:
561
- if dbt_version < dbt_version.parse('v1.6'):
618
+ def execute(
619
+ self,
620
+ sql: str,
621
+ auto_begin: bool = False,
622
+ fetch: bool = False,
623
+ limit: Optional[int] = None,
624
+ ) -> Tuple[any, agate.Table]:
625
+ if dbt_version < dbt_version.parse("v1.6"):
562
626
  return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch)
563
627
 
564
628
  return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
@@ -569,7 +633,7 @@ class DbtAdapter(BaseAdapter):
569
633
 
570
634
  node_ids = nodes.keys()
571
635
  parent_map = {}
572
- for k, parents in manifest_dict['parent_map'].items():
636
+ for k, parents in manifest_dict["parent_map"].items():
573
637
  if k not in node_ids:
574
638
  continue
575
639
  parent_map[k] = [parent for parent in parents if parent in node_ids]
@@ -580,8 +644,8 @@ class DbtAdapter(BaseAdapter):
580
644
  manifest = self.curr_manifest if base is False else self.base_manifest
581
645
  manifest_dict = manifest.to_dict()
582
646
 
583
- if node_id in manifest_dict['parent_map']:
584
- return manifest_dict['parent_map'][node_id]
647
+ if node_id in manifest_dict["parent_map"]:
648
+ return manifest_dict["parent_map"][node_id]
585
649
 
586
650
  def get_lineage(self, base: Optional[bool] = False):
587
651
  manifest = self.curr_manifest if base is False else self.base_manifest
@@ -590,19 +654,21 @@ class DbtAdapter(BaseAdapter):
590
654
  return self.get_lineage_cached(base, cache_key)
591
655
 
592
656
  def get_lineage_diff(self) -> LineageDiff:
593
- cache_key = hash((
594
- id(self.base_manifest),
595
- id(self.base_catalog),
596
- id(self.curr_manifest),
597
- id(self.curr_catalog),
598
- ))
657
+ cache_key = hash(
658
+ (
659
+ id(self.base_manifest),
660
+ id(self.base_catalog),
661
+ id(self.curr_manifest),
662
+ id(self.curr_catalog),
663
+ )
664
+ )
599
665
  return self._get_lineage_diff_cached(cache_key)
600
666
 
601
667
  @lru_cache(maxsize=2)
602
668
  def get_lineage_cached(self, base: Optional[bool] = False, cache_key=0):
603
669
  if base is False:
604
- cll_tracker = CLLPerformanceTracking()
605
- cll_tracker.start_lineage()
670
+ perf_tracker = LineagePerfTracker()
671
+ perf_tracker.start_lineage()
606
672
 
607
673
  manifest = self.curr_manifest if base is False else self.base_manifest
608
674
  catalog = self.curr_catalog if base is False else self.base_catalog
@@ -614,48 +680,48 @@ class DbtAdapter(BaseAdapter):
614
680
 
615
681
  nodes = {}
616
682
 
617
- for node in manifest_dict['nodes'].values():
618
- unique_id = node['unique_id']
619
- resource_type = node['resource_type']
683
+ for node in manifest_dict["nodes"].values():
684
+ unique_id = node["unique_id"]
685
+ resource_type = node["resource_type"]
620
686
 
621
- if resource_type not in ['model', 'seed', 'exposure', 'snapshot']:
687
+ if resource_type not in ["model", "seed", "exposure", "snapshot"]:
622
688
  continue
623
689
 
624
690
  nodes[unique_id] = {
625
- 'id': node['unique_id'],
626
- 'name': node['name'],
627
- 'resource_type': node['resource_type'],
628
- 'package_name': node['package_name'],
629
- 'schema': node['schema'],
630
- 'config': node['config'],
631
- 'checksum': node['checksum'],
632
- 'raw_code': node['raw_code'],
691
+ "id": node["unique_id"],
692
+ "name": node["name"],
693
+ "resource_type": node["resource_type"],
694
+ "package_name": node["package_name"],
695
+ "schema": node["schema"],
696
+ "config": node["config"],
697
+ "checksum": node["checksum"],
698
+ "raw_code": node["raw_code"],
633
699
  }
634
700
 
635
701
  # List of <type>.<package_name>.<node_name>.<hash>
636
702
  # model.jaffle_shop.customer_segments
637
703
  # test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d
638
704
  # test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1
639
- child_map: List[str] = manifest_dict['child_map'][unique_id]
705
+ child_map: List[str] = manifest_dict["child_map"][unique_id]
640
706
  cols_not_null = []
641
707
  cols_unique = []
642
708
 
643
709
  for child in child_map:
644
- node_name = node['name']
645
- comps = child.split('.')
646
- if len(comps) < 2:
710
+ node_name = node["name"]
711
+ comps = child.split(".")
712
+ if len(comps) < MIN_DBT_NODE_COMPOSITION:
647
713
  # only happens in unittest
648
714
  continue
649
715
 
650
716
  child_type = comps[0]
651
717
  child_name = comps[2]
652
718
 
653
- not_null_prefix = f'not_null_{node_name}_'
654
- if child_type == 'test' and child_name.startswith(not_null_prefix):
655
- cols_not_null.append(child_name[len(not_null_prefix):])
656
- unique_prefix = f'unique_{node_name}_'
657
- if child_type == 'test' and child_name.startswith(unique_prefix):
658
- cols_unique.append(child_name[len(unique_prefix):])
719
+ not_null_prefix = f"not_null_{node_name}_"
720
+ if child_type == "test" and child_name.startswith(not_null_prefix):
721
+ cols_not_null.append(child_name[len(not_null_prefix) :])
722
+ unique_prefix = f"unique_{node_name}_"
723
+ if child_type == "test" and child_name.startswith(unique_prefix):
724
+ cols_unique.append(child_name[len(unique_prefix) :])
659
725
 
660
726
  if catalog is not None and unique_id in catalog.nodes:
661
727
  columns = {}
@@ -663,70 +729,68 @@ class DbtAdapter(BaseAdapter):
663
729
  for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
664
730
  col = dict(name=col_name, type=col_metadata.type)
665
731
  if col_name in cols_not_null:
666
- col['not_null'] = True
732
+ col["not_null"] = True
667
733
  if col_name in cols_unique:
668
- col['unique'] = True
734
+ col["unique"] = True
669
735
  if not primary_key:
670
736
  primary_key = col_name
671
737
  columns[col_name] = col
672
- nodes[unique_id]['columns'] = columns
738
+ nodes[unique_id]["columns"] = columns
673
739
  if primary_key:
674
- nodes[unique_id]['primary_key'] = primary_key
740
+ nodes[unique_id]["primary_key"] = primary_key
675
741
 
676
- for source in manifest_dict['sources'].values():
677
- unique_id = source['unique_id']
742
+ for source in manifest_dict["sources"].values():
743
+ unique_id = source["unique_id"]
678
744
 
679
745
  nodes[unique_id] = {
680
- 'id': source['unique_id'],
681
- 'name': source['name'],
682
- 'resource_type': source['resource_type'],
683
- 'package_name': source['package_name'],
684
- 'config': source['config'],
746
+ "id": source["unique_id"],
747
+ "name": source["name"],
748
+ "source_name": source["source_name"],
749
+ "resource_type": source["resource_type"],
750
+ "package_name": source["package_name"],
751
+ "config": source["config"],
685
752
  }
686
753
 
687
754
  if catalog is not None and unique_id in catalog.sources:
688
- nodes[unique_id]['columns'] = {
689
- col_name: {
690
- 'name': col_name,
691
- 'type': col_metadata.type
692
- }
755
+ nodes[unique_id]["columns"] = {
756
+ col_name: {"name": col_name, "type": col_metadata.type}
693
757
  for col_name, col_metadata in catalog.sources[unique_id].columns.items()
694
758
  }
695
759
 
696
- for exposure in manifest_dict['exposures'].values():
697
- nodes[exposure['unique_id']] = {
698
- 'id': exposure['unique_id'],
699
- 'name': exposure['name'],
700
- 'resource_type': exposure['resource_type'],
701
- 'package_name': exposure['package_name'],
702
- 'config': exposure['config'],
760
+ for exposure in manifest_dict["exposures"].values():
761
+ nodes[exposure["unique_id"]] = {
762
+ "id": exposure["unique_id"],
763
+ "name": exposure["name"],
764
+ "resource_type": exposure["resource_type"],
765
+ "package_name": exposure["package_name"],
766
+ "config": exposure["config"],
703
767
  }
704
- for metric in manifest_dict['metrics'].values():
705
- nodes[metric['unique_id']] = {
706
- 'id': metric['unique_id'],
707
- 'name': metric['name'],
708
- 'resource_type': metric['resource_type'],
709
- 'package_name': metric['package_name'],
710
- 'config': metric['config'],
768
+ for metric in manifest_dict["metrics"].values():
769
+ nodes[metric["unique_id"]] = {
770
+ "id": metric["unique_id"],
771
+ "name": metric["name"],
772
+ "resource_type": metric["resource_type"],
773
+ "package_name": metric["package_name"],
774
+ "config": metric["config"],
711
775
  }
712
776
 
713
- if 'semantic_models' in manifest_dict:
714
- for semantic_models in manifest_dict['semantic_models'].values():
715
- nodes[semantic_models['unique_id']] = {
716
- 'id': semantic_models['unique_id'],
717
- 'name': semantic_models['name'],
718
- 'resource_type': semantic_models['resource_type'],
719
- 'package_name': semantic_models['package_name'],
720
- 'config': semantic_models['config'],
777
+ if "semantic_models" in manifest_dict:
778
+ for semantic_models in manifest_dict["semantic_models"].values():
779
+ nodes[semantic_models["unique_id"]] = {
780
+ "id": semantic_models["unique_id"],
781
+ "name": semantic_models["name"],
782
+ "resource_type": semantic_models["resource_type"],
783
+ "package_name": semantic_models["package_name"],
784
+ "config": semantic_models["config"],
721
785
  }
722
786
 
723
787
  parent_map = self.build_parent_map(nodes, base)
724
788
 
725
789
  if base is False:
726
- cll_tracker.end_lineage()
727
- cll_tracker.set_total_nodes(len(nodes))
728
- log_performance('model lineage', cll_tracker.to_dict())
729
- cll_tracker.reset()
790
+ perf_tracker.end_lineage()
791
+ perf_tracker.set_total_nodes(len(nodes))
792
+ log_performance("model lineage", perf_tracker.to_dict())
793
+ perf_tracker.reset()
730
794
 
731
795
  return dict(
732
796
  parent_map=parent_map,
@@ -739,18 +803,43 @@ class DbtAdapter(BaseAdapter):
739
803
  def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
740
804
  base = self.get_lineage(base=True)
741
805
  current = self.get_lineage(base=False)
742
- keys = {
743
- *base.get('nodes', {}).keys(),
744
- *current.get('nodes', {}).keys()
745
- }
746
806
 
747
- # Start to diff
748
- perf_tracking = BreakingPerformanceTracking()
749
- perf_tracking.start_lineage_diff()
807
+ modified_nodes = self.select_nodes(select="state:modified")
808
+ diff = {}
809
+ for node_id in modified_nodes:
810
+ base_node = base.get("nodes", {}).get(node_id)
811
+ curr_node = current.get("nodes", {}).get(node_id)
812
+ if base_node and curr_node:
813
+ diff[node_id] = NodeDiff(change_status="modified")
814
+ elif base_node:
815
+ diff[node_id] = NodeDiff(change_status="removed")
816
+ elif curr_node:
817
+ diff[node_id] = NodeDiff(change_status="added")
818
+
819
+ return LineageDiff(
820
+ base=base,
821
+ current=current,
822
+ diff=diff,
823
+ )
824
+
825
+ @lru_cache(maxsize=128)
826
+ def get_change_analysis_cached(self, node_id: str):
827
+ breaking_perf_tracker = BreakingPerformanceTracking()
828
+ lineage_diff = self.get_lineage_diff()
829
+ diff = lineage_diff.diff
830
+
831
+ if node_id not in diff or diff[node_id].change_status != "modified":
832
+ return diff.get(node_id)
833
+
834
+ breaking_perf_tracker.increment_modified_nodes()
835
+ breaking_perf_tracker.start_lineage_diff()
836
+
837
+ base = lineage_diff.base
838
+ current = lineage_diff.current
750
839
 
751
840
  base_manifest = as_manifest(self.get_manifest(True))
752
841
  curr_manifest = as_manifest(self.get_manifest(False))
753
- perf_tracking.record_checkpoint('manifest')
842
+ breaking_perf_tracker.record_checkpoint("manifest")
754
843
 
755
844
  def ref_func(*args):
756
845
  if len(args) == 1:
@@ -762,7 +851,7 @@ class DbtAdapter(BaseAdapter):
762
851
  return node
763
852
 
764
853
  def source_func(source_name, table_name):
765
- source_name = source_name.replace('-', '_')
854
+ source_name = source_name.replace("-", "_")
766
855
  return f"__{source_name}__{table_name}"
767
856
 
768
857
  jinja_context = dict(
@@ -770,305 +859,534 @@ class DbtAdapter(BaseAdapter):
770
859
  source=source_func,
771
860
  )
772
861
 
773
- # for each node, compare the base and current lineage
774
- diff = {}
775
- for key in keys:
776
- base_node = base.get('nodes', {}).get(key)
777
- curr_node = current.get('nodes', {}).get(key)
778
- if base_node and curr_node:
779
- base_checksum = base_node.get('checksum', {}).get('checksum')
780
- curr_checksum = curr_node.get('checksum', {}).get('checksum')
781
- change = None
782
- if base_checksum is None or curr_checksum is None or base_checksum == curr_checksum:
783
- continue
784
-
785
- if curr_node.get('resource_type') == 'model':
786
- try:
787
- perf_tracking.increment_modified_nodes()
788
-
789
- def _get_schema(lineage):
790
- schema = {}
791
- nodes = lineage['nodes']
792
- parent_list = lineage['parent_map'].get(key, [])
793
- for parent_id in parent_list:
794
- parent_node = nodes.get(parent_id)
795
- if parent_node is None:
796
- continue
797
- columns = parent_node.get('columns') or {}
798
- name = parent_node.get('name')
799
- if parent_node.get('resource_type') == 'source':
800
- parts = parent_id.split('.')
801
- source = parts[2]
802
- table = parts[3]
803
- source = source.replace('-', '_')
804
- name = f"__{source}__{table}"
805
- schema[name] = {
806
- name: column.get('type') for name, column in columns.items()
807
- }
808
- return schema
809
-
810
- base_sql = self.generate_sql(
811
- base_node.get('raw_code'),
812
- context=jinja_context,
813
- provided_manifest=base_manifest
814
- )
815
- curr_sql = self.generate_sql(
816
- curr_node.get('raw_code'),
817
- context=jinja_context,
818
- provided_manifest=curr_manifest
819
- )
820
- base_schema = _get_schema(base)
821
- curr_schema = _get_schema(current)
822
- dialect = self.adapter.connections.TYPE
823
- if curr_manifest.metadata.adapter_type is not None:
824
- dialect = curr_manifest.metadata.adapter_type
825
-
826
- change = parse_change_category(
827
- base_sql,
828
- curr_sql,
829
- old_schema=base_schema,
830
- new_schema=curr_schema,
831
- dialect=dialect,
832
- perf_tracking=perf_tracking,
833
- )
834
-
835
- # Make sure that the case of the column names are the same
836
- changed_columns = {
837
- column.lower(): change_status
838
- for column, change_status in (change.columns or {}).items()
839
- }
840
- changed_columns_names = set(changed_columns)
841
- changed_columns_final = {}
842
-
843
- base_columns = base_node.get('columns') or {}
844
- curr_columns = curr_node.get('columns') or {}
845
- columns_names = set(base_columns) | set(curr_columns)
846
-
847
- for column_name in columns_names:
848
- if column_name.lower() in changed_columns_names:
849
- changed_columns_final[column_name] = changed_columns[column_name.lower()]
850
-
851
- change.columns = changed_columns_final
852
- except Exception:
853
- change = NodeChange(category='unknown')
854
-
855
- diff[key] = NodeDiff(change_status='modified', change=change)
856
- elif base_node:
857
- diff[key] = NodeDiff(change_status='removed')
858
- elif curr_node:
859
- diff[key] = NodeDiff(change_status='added')
860
-
861
- perf_tracking.end_lineage_diff()
862
- log_performance('model lineage diff', perf_tracking.to_dict())
862
+ base_node = base.get("nodes", {}).get(node_id)
863
+ curr_node = current.get("nodes", {}).get(node_id)
864
+ change = NodeChange(category="unknown")
865
+ if (
866
+ curr_node.get("resource_type") in ["model", "snapshot"]
867
+ and curr_node.get("raw_code") is not None
868
+ and base_node.get("raw_code") is not None
869
+ ):
870
+ try:
871
+
872
+ def _get_schema(lineage):
873
+ schema = {}
874
+ nodes = lineage["nodes"]
875
+ parent_list = lineage["parent_map"].get(node_id, [])
876
+ for parent_id in parent_list:
877
+ parent_node = nodes.get(parent_id)
878
+ if parent_node is None:
879
+ continue
880
+ columns = parent_node.get("columns") or {}
881
+ name = parent_node.get("name")
882
+ if parent_node.get("resource_type") == "source":
883
+ parts = parent_id.split(".")
884
+ source = parts[2]
885
+ table = parts[3]
886
+ source = source.replace("-", "_")
887
+ name = f"__{source}__{table}"
888
+ schema[name] = {name: column.get("type") for name, column in columns.items()}
889
+ return schema
890
+
891
+ base_sql = self.generate_sql(
892
+ base_node.get("raw_code"),
893
+ context=jinja_context,
894
+ provided_manifest=base_manifest,
895
+ )
896
+ curr_sql = self.generate_sql(
897
+ curr_node.get("raw_code"),
898
+ context=jinja_context,
899
+ provided_manifest=curr_manifest,
900
+ )
901
+ base_schema = _get_schema(base)
902
+ curr_schema = _get_schema(current)
903
+ dialect = self.adapter.connections.TYPE
904
+ if curr_manifest.metadata.adapter_type is not None:
905
+ dialect = curr_manifest.metadata.adapter_type
906
+
907
+ change = parse_change_category(
908
+ base_sql,
909
+ curr_sql,
910
+ old_schema=base_schema,
911
+ new_schema=curr_schema,
912
+ dialect=dialect,
913
+ perf_tracking=breaking_perf_tracker,
914
+ )
863
915
 
864
- return LineageDiff(
865
- base=base,
866
- current=current,
867
- diff=diff,
916
+ # Make sure that the case of the column names are the same
917
+ changed_columns = {
918
+ column.lower(): change_status for column, change_status in (change.columns or {}).items()
919
+ }
920
+ changed_columns_names = set(changed_columns)
921
+ changed_columns_final = {}
922
+
923
+ base_columns = base_node.get("columns") or {}
924
+ curr_columns = curr_node.get("columns") or {}
925
+ columns_names = set(base_columns) | set(curr_columns)
926
+
927
+ for column_name in columns_names:
928
+ if column_name.lower() in changed_columns_names:
929
+ changed_columns_final[column_name] = changed_columns[column_name.lower()]
930
+
931
+ change.columns = changed_columns_final
932
+ except Exception:
933
+ # TODO: telemetry
934
+ pass
935
+
936
+ breaking_perf_tracker.end_lineage_diff()
937
+ log_performance("change analysis per node", breaking_perf_tracker.to_dict())
938
+ breaking_perf_tracker.reset()
939
+ node_diff = diff.get(node_id)
940
+ node_diff.change = change
941
+ return node_diff
942
+
943
+ def get_cll(
944
+ self,
945
+ node_id: Optional[str] = None,
946
+ column: Optional[str] = None,
947
+ change_analysis: Optional[bool] = False,
948
+ no_cll: Optional[bool] = False,
949
+ no_upstream: Optional[bool] = False,
950
+ no_downstream: Optional[bool] = False,
951
+ no_filter: Optional[bool] = False,
952
+ ) -> CllData:
953
+ cll_tracker = LineagePerfTracker()
954
+ cll_tracker.set_params(
955
+ has_node=node_id is not None,
956
+ has_column=column is not None,
957
+ change_analysis=change_analysis,
958
+ no_cll=no_cll,
959
+ no_upstream=no_upstream,
960
+ no_downstream=no_downstream,
868
961
  )
869
-
870
- def get_cll_by_node_id(self, node_id: str, base: Optional[bool] = False):
871
- cll_tracker = CLLPerformanceTracking()
872
962
  cll_tracker.start_column_lineage()
873
963
 
874
- manifest = self.curr_manifest if base is False else self.base_manifest
964
+ manifest = self.curr_manifest
875
965
  manifest_dict = manifest.to_dict()
876
966
 
877
- parent_ids = find_upstream(node_id, manifest_dict.get('parent_map'))
878
- child_ids = find_downstream(node_id, manifest_dict.get('child_map'))
879
- cll_node_ids = parent_ids.union(child_ids)
880
- cll_node_ids.add(node_id)
967
+ # Find related model nodes
968
+ if node_id is not None:
969
+ cll_node_ids = {node_id}
970
+ else:
971
+ lineage_diff = self.get_lineage_diff()
972
+ cll_node_ids = set(lineage_diff.diff.keys())
973
+
974
+ cll_tracker.set_init_nodes(len(cll_node_ids))
881
975
 
882
- node_manifest = self.get_lineage_nodes_metadata(base=base)
883
976
  nodes = {}
884
- for node_id in cll_node_ids:
885
- if node_id not in node_manifest:
886
- continue
887
- nodes[node_id] = self.get_cll_cached(node_id, base=base)
977
+ columns = {}
978
+ parent_map = {}
979
+ child_map = {}
980
+
981
+ if not no_upstream:
982
+ cll_node_ids = cll_node_ids.union(find_upstream(cll_node_ids, manifest_dict.get("parent_map")))
983
+ if not no_downstream:
984
+ cll_node_ids = cll_node_ids.union(find_downstream(cll_node_ids, manifest_dict.get("child_map")))
985
+
986
+ if not no_cll:
987
+ allowed_related_nodes = set()
988
+ for key in ["sources", "nodes", "exposures", "metrics"]:
989
+ attr = getattr(manifest, key)
990
+ allowed_related_nodes.update(set(attr.keys()))
991
+ if hasattr(manifest, "semantic_models"):
992
+ attr = getattr(manifest, "semantic_models")
993
+ allowed_related_nodes.update(set(attr.keys()))
994
+ for cll_node_id in cll_node_ids:
995
+ if cll_node_id not in allowed_related_nodes:
996
+ continue
997
+ cll_data_one = deepcopy(self.get_cll_cached(cll_node_id, base=False))
998
+ cll_tracker.increment_cll_nodes()
999
+ if cll_data_one is None:
1000
+ continue
1001
+
1002
+ nodes[cll_node_id] = cll_data_one.nodes.get(cll_node_id)
1003
+ node_diff = None
1004
+ if change_analysis:
1005
+ node_diff = self.get_change_analysis_cached(cll_node_id)
1006
+ cll_tracker.increment_change_analysis_nodes()
1007
+ if node_diff is not None:
1008
+ nodes[cll_node_id].change_status = node_diff.change_status
1009
+ if node_diff.change is not None:
1010
+ nodes[cll_node_id].change_category = node_diff.change.category
1011
+ for c_id, c in cll_data_one.columns.items():
1012
+ columns[c_id] = c
1013
+ if node_diff is not None:
1014
+ if node_diff.change_status == "added":
1015
+ c.change_status = "added"
1016
+ elif node_diff.change_status == "removed":
1017
+ c.change_status = "removed"
1018
+ elif node_diff.change is not None and node_diff.change.columns is not None:
1019
+ column_diff = node_diff.change.columns.get(c.name)
1020
+ if column_diff:
1021
+ c.change_status = column_diff
1022
+
1023
+ for p_id, parents in cll_data_one.parent_map.items():
1024
+ parent_map[p_id] = parents
1025
+ else:
1026
+ for cll_node_id in cll_node_ids:
1027
+ cll_node = None
1028
+ cll_node_columns: Dict[str, CllColumn] = {}
1029
+
1030
+ if cll_node_id in manifest.sources:
1031
+ cll_node = CllNode.build_cll_node(manifest, "sources", cll_node_id)
1032
+ if self.curr_catalog and cll_node_id in self.curr_catalog.sources:
1033
+ cll_node_columns = {
1034
+ column.name: CllColumn(
1035
+ id=f"{cll_node_id}_{column.name}",
1036
+ table_id=cll_node_id,
1037
+ name=column.name,
1038
+ type=column.type,
1039
+ )
1040
+ for column in self.curr_catalog.sources[cll_node_id].columns.values()
1041
+ }
1042
+ elif cll_node_id in manifest.nodes:
1043
+ cll_node = CllNode.build_cll_node(manifest, "nodes", cll_node_id)
1044
+ if self.curr_catalog and cll_node_id in self.curr_catalog.nodes:
1045
+ cll_node_columns = {
1046
+ column.name: CllColumn(
1047
+ id=f"{cll_node_id}_{column.name}",
1048
+ table_id=cll_node_id,
1049
+ name=column.name,
1050
+ type=column.type,
1051
+ )
1052
+ for column in self.curr_catalog.nodes[cll_node_id].columns.values()
1053
+ }
1054
+ elif cll_node_id in manifest.exposures:
1055
+ cll_node = CllNode.build_cll_node(manifest, "exposures", cll_node_id)
1056
+ elif hasattr(manifest, "semantic_models") and cll_node_id in manifest.semantic_models:
1057
+ cll_node = CllNode.build_cll_node(manifest, "semantic_models", cll_node_id)
1058
+ elif cll_node_id in manifest.metrics:
1059
+ cll_node = CllNode.build_cll_node(manifest, "metrics", cll_node_id)
1060
+
1061
+ if not cll_node:
1062
+ continue
1063
+ nodes[cll_node_id] = cll_node
1064
+
1065
+ node_diff = None
1066
+ if change_analysis:
1067
+ node_diff = self.get_change_analysis_cached(cll_node_id)
1068
+ cll_tracker.increment_change_analysis_nodes()
1069
+ if node_diff is not None:
1070
+ cll_node.change_status = node_diff.change_status
1071
+ if node_diff.change is not None:
1072
+ cll_node.change_category = node_diff.change.category
1073
+ for c, cll_column in cll_node_columns.items():
1074
+ cll_node.columns[c] = cll_column
1075
+ columns[cll_column.id] = cll_column
1076
+ if node_diff.change.columns and c in node_diff.change.columns:
1077
+ cll_column.change_status = node_diff.change.columns[c]
1078
+
1079
+ parent_map[cll_node_id] = manifest.parent_map.get(cll_node_id, [])
1080
+
1081
+ # build the child map
1082
+ for parent_id, parents in parent_map.items():
1083
+ for parent in parents:
1084
+ if parent not in child_map:
1085
+ child_map[parent] = set()
1086
+ child_map[parent].add(parent_id)
1087
+
1088
+ # Find the anchor nodes
1089
+ anchor_node_ids = set()
1090
+ extra_node_ids = set()
1091
+ if node_id is None and column is None:
1092
+ if change_analysis:
1093
+ # If change analysis is requested, we need to find the nodes that have changes
1094
+ lineage_diff = self.get_lineage_diff()
1095
+ for nid, nd in lineage_diff.diff.items():
1096
+ if nd.change_status == "added":
1097
+ anchor_node_ids.add(nid)
1098
+ n = lineage_diff.current["nodes"].get(nid)
1099
+ n_columns = n.get("columns", {})
1100
+ for c in n_columns:
1101
+ anchor_node_ids.add(build_column_key(nid, c))
1102
+ continue
1103
+ if nd.change_status == "removed":
1104
+ extra_node_ids.add(nid)
1105
+ continue
1106
+
1107
+ node_diff = self.get_change_analysis_cached(nid)
1108
+ if node_diff is not None and node_diff.change is not None:
1109
+ extra_node_ids.add(nid)
1110
+ if no_cll:
1111
+ if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
1112
+ anchor_node_ids.add(nid)
1113
+ else:
1114
+ if node_diff.change.category in ["breaking", "unknown"]:
1115
+ anchor_node_ids.add(nid)
1116
+ if node_diff.change.columns is not None:
1117
+ for column_name in node_diff.change.columns:
1118
+ anchor_node_ids.add(f"{nid}_{column_name}")
1119
+ else:
1120
+ lineage_diff = self.get_lineage_diff()
1121
+ anchor_node_ids = lineage_diff.diff.keys()
1122
+ elif node_id is not None and column is None:
1123
+ if change_analysis:
1124
+ # If change analysis is requested, we need to find the nodes that have changes
1125
+ node_diff = self.get_change_analysis_cached(node_id)
1126
+ if node_diff is not None and node_diff.change is not None:
1127
+ extra_node_ids.add(node_id)
1128
+ if no_cll:
1129
+ if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
1130
+ anchor_node_ids.add(node_id)
1131
+ else:
1132
+ if node_diff.change.category in ["breaking", "unknown"]:
1133
+ anchor_node_ids.add(node_id)
1134
+ if node_diff.change.columns is not None:
1135
+ for column_name in node_diff.change.columns:
1136
+ anchor_node_ids.add(f"{node_id}_{column_name}")
1137
+ else:
1138
+ anchor_node_ids.add(node_id)
1139
+ else:
1140
+ anchor_node_ids.add(node_id)
1141
+ if not no_cll:
1142
+ node = nodes.get(node_id)
1143
+ if node:
1144
+ for column_name in node.columns:
1145
+ column_key = build_column_key(node_id, column_name)
1146
+ anchor_node_ids.add(column_key)
1147
+ else:
1148
+ anchor_node_ids.add(f"{node_id}_{column}")
1149
+
1150
+ cll_tracker.set_anchor_nodes(len(anchor_node_ids))
1151
+ result_node_ids = set(anchor_node_ids)
1152
+ if not no_upstream:
1153
+ result_node_ids = result_node_ids.union(find_upstream(anchor_node_ids, parent_map))
1154
+ if not no_downstream:
1155
+ result_node_ids = result_node_ids.union(find_downstream(anchor_node_ids, child_map))
1156
+
1157
+ # Filter the nodes and columns based on the anchor nodes
1158
+ if not no_filter:
1159
+ nodes = {k: v for k, v in nodes.items() if k in result_node_ids or k in extra_node_ids}
1160
+ columns = {k: v for k, v in columns.items() if k in result_node_ids or k in extra_node_ids}
1161
+
1162
+ for node in nodes.values():
1163
+ node.columns = {
1164
+ k: v for k, v in node.columns.items() if v.id in result_node_ids or v.id in extra_node_ids
1165
+ }
1166
+
1167
+ if change_analysis:
1168
+ node.impacted = node.id in result_node_ids
1169
+
1170
+ parent_map, child_map = filter_dependency_maps(parent_map, child_map, result_node_ids)
888
1171
 
889
1172
  cll_tracker.end_column_lineage()
890
- cll_tracker.set_total_nodes(len(nodes))
891
- log_performance('column level lineage', cll_tracker.to_dict())
1173
+ cll_tracker.set_total_nodes(len(nodes) + len(columns))
1174
+ log_performance("column level lineage", cll_tracker.to_dict())
892
1175
  cll_tracker.reset()
893
1176
 
894
- return dict(nodes=nodes)
1177
+ return CllData(
1178
+ nodes=nodes,
1179
+ columns=columns,
1180
+ parent_map=parent_map,
1181
+ child_map=child_map,
1182
+ )
895
1183
 
896
1184
  @lru_cache(maxsize=128)
897
- def get_cll_cached(self, node_id: str, base: Optional[bool] = False):
898
- nodes = self.get_lineage_nodes_metadata(base=base)
1185
+ def get_cll_cached(self, node_id: str, base: Optional[bool] = False) -> Optional[CllData]:
1186
+ cll_tracker = CLLPerformanceTracking()
899
1187
 
900
- manifest = self.curr_manifest if base is False else self.base_manifest
901
- manifest_dict = manifest.to_dict()
902
- parent_list = []
903
- if node_id in manifest_dict['parent_map']:
904
- parent_list = manifest_dict['parent_map'][node_id]
905
-
906
- node = deepcopy(nodes[node_id])
907
- self.append_column_lineage(node, parent_list, base)
908
- return node
909
-
910
- def append_column_lineage(self, node: Dict, parent_list: List, base: Optional[bool] = False):
911
- def _apply_all_columns(node, trans_type, depends_on):
912
- for col in node.get('columns', {}).values():
913
- col['transformation_type'] = trans_type
914
- col['depends_on'] = depends_on
915
-
916
- def _depend_node_to_id(column_lineage, nodes):
917
- for cl in column_lineage.values():
918
- for depend_on in cl.depends_on:
919
- if depend_on.node.startswith('__'):
920
- for n in nodes.values():
921
- if n.get('resource_type') != 'source':
922
- continue
923
- # __source__table -> source.table
924
- source_table = depend_on.node.lstrip("_").replace("__", ".", 1).lower()
925
- if source_table in n.get('id'):
926
- depend_on.node = n.get('id')
927
- break
928
- else:
929
- for n in nodes.values():
930
- if n.get('name') == depend_on.node.lower():
931
- depend_on.node = n.get('id')
932
- break
1188
+ node, parent_list = self.get_cll_node(node_id, base=base)
1189
+ if node is None:
1190
+ return None
1191
+
1192
+ cll_tracker.set_total_nodes(1)
1193
+ cll_tracker.start_column_lineage()
1194
+
1195
+ def _apply_all_columns(node: CllNode, transformation_type):
1196
+ cll_data = CllData()
1197
+ cll_data.nodes[node.id] = node
1198
+ cll_data.parent_map[node.id] = set(parent_list)
1199
+ for col in node.columns.values():
1200
+ column_id = f"{node.id}_{col.name}"
1201
+ col.transformation_type = transformation_type
1202
+ cll_data.columns[column_id] = col
1203
+ cll_data.parent_map[column_id] = set()
1204
+ return cll_data
933
1205
 
934
- cll_tracker = CLLPerformanceTracking()
935
- nodes = self.get_lineage_nodes_metadata(base=base)
936
1206
  manifest = as_manifest(self.get_manifest(base))
937
- resource_type = node.get('resource_type')
938
- if resource_type not in {'model', 'seed', 'source', 'snapshot'}:
939
- return
1207
+ catalog = self.curr_catalog if base is False else self.base_catalog
1208
+ resource_type = node.resource_type
1209
+ if resource_type not in {"model", "seed", "source", "snapshot"}:
1210
+ return _apply_all_columns(node, "unknown")
940
1211
 
941
- if resource_type == 'source' or resource_type == 'seed':
942
- _apply_all_columns(node, 'source', [])
943
- return
1212
+ if resource_type == "source" or resource_type == "seed":
1213
+ return _apply_all_columns(node, "source")
944
1214
 
945
- if node.get('raw_code') is None or self.is_python_model(node.get('id'), base=base):
946
- _apply_all_columns(node, 'unknown', [])
947
- return
1215
+ if node.raw_code is None or self.is_python_model(node.id, base=base):
1216
+ return _apply_all_columns(node, "unknown")
948
1217
 
949
- # dbt <= 1.8, MetricFlow expects the time spine table to be named metricflow_time_spine
950
- if node.get('name') == 'metricflow_time_spine':
951
- _apply_all_columns(node, 'source', [])
952
- return
1218
+ if node.name == "metricflow_time_spine":
1219
+ return _apply_all_columns(node, "source")
953
1220
 
954
- if not node.get('columns', {}):
955
- # no catalog
956
- return
1221
+ if not node.columns:
1222
+ return _apply_all_columns(node, "unknown")
1223
+
1224
+ table_id_map = {}
957
1225
 
958
1226
  def ref_func(*args):
1227
+ node_name: str = None
1228
+ project_or_package: str = None
1229
+
959
1230
  if len(args) == 1:
960
- node = args[0]
961
- elif len(args) > 1:
962
- node = args[1]
1231
+ node_name = args[0]
963
1232
  else:
964
- return None
965
- return node
1233
+ project_or_package = args[0]
1234
+ node_name = args[1]
966
1235
 
967
- def source_func(source_name, table_name):
968
- return f"__{source_name}__{table_name}"
1236
+ for key, n in manifest.nodes.items():
1237
+ if n.name != node_name:
1238
+ continue
1239
+ if project_or_package is not None and n.package_name != project_or_package:
1240
+ continue
1241
+
1242
+ # replace id "." to "_"
1243
+ unique_id = n.unique_id
1244
+ table_name = unique_id.replace(".", "_")
1245
+ table_id_map[table_name.lower()] = unique_id
1246
+ return table_name
1247
+
1248
+ raise ValueError(f"Cannot find node {node_name} in the manifest")
1249
+
1250
+ def source_func(source_name, name):
1251
+ for key, n in manifest.sources.items():
1252
+ if n.source_name != source_name:
1253
+ continue
1254
+ if n.name != name:
1255
+ continue
969
1256
 
970
- raw_code = node.get('raw_code')
1257
+ # replace id "." to "_"
1258
+ unique_id = n.unique_id
1259
+ table_name = unique_id.replace(".", "_")
1260
+ table_id_map[table_name.lower()] = unique_id
1261
+ return table_name
1262
+
1263
+ raise ValueError(f"Cannot find source {source_name}.{name} in the manifest")
1264
+
1265
+ raw_code = node.raw_code
971
1266
  jinja_context = dict(
972
1267
  ref=ref_func,
973
1268
  source=source_func,
974
1269
  )
975
1270
 
976
1271
  schema = {}
977
- for parent_id in parent_list:
978
- parent_node = nodes.get(parent_id)
979
- if parent_node is None:
980
- continue
981
- columns = parent_node.get('columns') or {}
982
- name = parent_node.get('name')
983
- if parent_node.get('resource_type') == 'source':
984
- parts = parent_id.split('.')
985
- source = parts[2]
986
- table = parts[3]
987
- name = f"__{source}__{table}"
988
- schema[name] = {
989
- name: column.get('type') for name, column in columns.items()
990
- }
1272
+ if catalog is not None:
1273
+ for parent_id in parent_list:
1274
+ table_name = parent_id.replace(".", "_")
1275
+ columns = {}
1276
+ if parent_id in catalog.nodes:
1277
+ for col_name, col_metadata in catalog.nodes[parent_id].columns.items():
1278
+ columns[col_name] = col_metadata.type
1279
+ if parent_id in catalog.sources:
1280
+ for col_name, col_metadata in catalog.sources[parent_id].columns.items():
1281
+ columns[col_name] = col_metadata.type
1282
+ schema[table_name] = columns
991
1283
 
992
1284
  try:
993
- # provide a manifest to speedup and not pollute the manifest
994
1285
  compiled_sql = self.generate_sql(raw_code, base=base, context=jinja_context, provided_manifest=manifest)
995
1286
  dialect = self.adapter.type()
996
- # find adapter type from the manifest, otherwise we use the adapter type from the adapter
997
1287
  if self.get_manifest(base).metadata.adapter_type is not None:
998
1288
  dialect = self.get_manifest(base).metadata.adapter_type
999
- column_lineage = cll(compiled_sql, schema=schema, dialect=dialect)
1289
+ m2c, c2c_map = cll(compiled_sql, schema=schema, dialect=dialect)
1000
1290
  except RecceException:
1001
- # TODO: provide parsing error message if needed
1002
- _apply_all_columns(node, 'unknown', [])
1003
1291
  cll_tracker.increment_sqlglot_error_nodes()
1004
- return
1292
+ return _apply_all_columns(node, "unknown")
1005
1293
  except Exception:
1006
- _apply_all_columns(node, 'unknown', [])
1007
1294
  cll_tracker.increment_other_error_nodes()
1008
- return
1009
-
1010
- _depend_node_to_id(column_lineage, nodes)
1295
+ return _apply_all_columns(node, "unknown")
1296
+
1297
+ # Add cll dependency to the node.
1298
+ cll_data = CllData()
1299
+ cll_data.nodes[node.id] = node
1300
+ cll_data.columns = {f"{node.id}_{col.name}": col for col in node.columns.values()}
1301
+
1302
+ # parent map for node
1303
+ depends_on = set(parent_list)
1304
+ for d in m2c:
1305
+ parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
1306
+ depends_on.add(parent_key)
1307
+ cll_data.parent_map[node_id] = depends_on
1308
+
1309
+ # parent map for columns
1310
+ for name, column in node.columns.items():
1311
+ depends_on = set()
1312
+ column_id = f"{node.id}_{name}"
1313
+ if name in c2c_map:
1314
+ for d in c2c_map[name].depends_on:
1315
+ parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
1316
+ depends_on.add(parent_key)
1317
+ column.transformation_type = c2c_map[name].transformation_type
1318
+ cll_data.parent_map[column_id] = set(depends_on)
1011
1319
 
1012
- for name, column in node.get('columns', {}).items():
1013
- if name in column_lineage:
1014
- column['depends_on'] = column_lineage[name].depends_on
1015
- column['transformation_type'] = column_lineage[name].type
1320
+ cll_tracker.end_column_lineage()
1321
+ log_performance("column level lineage per node", cll_tracker.to_dict())
1322
+ cll_tracker.reset()
1323
+ return cll_data
1016
1324
 
1017
- @lru_cache(maxsize=2)
1018
- def get_lineage_nodes_metadata(self, base: Optional[bool] = False):
1325
+ def get_cll_node(self, node_id: str, base: Optional[bool] = False) -> Tuple[Optional[CllNode], list[str]]:
1019
1326
  manifest = self.curr_manifest if base is False else self.base_manifest
1020
1327
  catalog = self.curr_catalog if base is False else self.base_catalog
1021
- manifest_dict = manifest.to_dict()
1328
+ parent_list = []
1329
+ node = None
1022
1330
 
1023
- nodes = {}
1024
- for node in manifest_dict['nodes'].values():
1025
- unique_id = node['unique_id']
1026
- resource_type = node['resource_type']
1331
+ # model, seed, snapshot
1332
+ if node_id in manifest.nodes:
1333
+ found = manifest.nodes[node_id]
1334
+ unique_id = found.unique_id
1335
+ node = CllNode.build_cll_node(manifest, "nodes", node_id)
1336
+ if hasattr(found.depends_on, "nodes"):
1337
+ parent_list = found.depends_on.nodes
1027
1338
 
1028
- if resource_type not in ['model', 'seed', 'exposure', 'snapshot']:
1029
- continue
1339
+ if catalog is not None and node is not None and unique_id in catalog.nodes:
1340
+ columns = {}
1341
+ for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
1342
+ column_id = f"{unique_id}_{col_name}"
1343
+ col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
1344
+ columns[col_name] = col
1345
+ node.columns = columns
1030
1346
 
1031
- nodes[unique_id] = {
1032
- 'id': node['unique_id'],
1033
- 'name': node['name'],
1034
- 'resource_type': node['resource_type'],
1035
- 'raw_code': node['raw_code'],
1036
- }
1347
+ # source
1348
+ if node_id in manifest.sources:
1349
+ found = manifest.sources[node_id]
1350
+ unique_id = found.unique_id
1351
+ node = CllNode.build_cll_node(manifest, "sources", node_id)
1352
+ parent_list = []
1037
1353
 
1038
- if catalog is not None and unique_id in catalog.nodes:
1354
+ if catalog is not None and node is not None and unique_id in catalog.sources:
1039
1355
  columns = {}
1040
- for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
1041
- col = dict(name=col_name, type=col_metadata.type)
1356
+ for col_name, col_metadata in catalog.sources[unique_id].columns.items():
1357
+ column_id = f"{unique_id}_{col_name}"
1358
+ col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
1042
1359
  columns[col_name] = col
1043
- nodes[unique_id]['columns'] = columns
1360
+ node.columns = columns
1044
1361
 
1045
- for source in manifest_dict['sources'].values():
1046
- unique_id = source['unique_id']
1362
+ # exposure
1363
+ if node_id in manifest.exposures:
1364
+ found = manifest.exposures[node_id]
1365
+ node = CllNode.build_cll_node(manifest, "exposures", node_id)
1366
+ if hasattr(found.depends_on, "nodes"):
1367
+ parent_list = found.depends_on.nodes
1047
1368
 
1048
- nodes[unique_id] = {
1049
- 'id': source['unique_id'],
1050
- 'name': source['name'],
1051
- 'resource_type': source['resource_type'],
1052
- }
1369
+ if hasattr(manifest, "semantic_models") and node_id in manifest.semantic_models:
1370
+ found = manifest.semantic_models[node_id]
1371
+ node = CllNode.build_cll_node(manifest, "semantic_models", node_id)
1372
+ if hasattr(found.depends_on, "nodes"):
1373
+ parent_list = found.depends_on.nodes
1053
1374
 
1054
- if catalog is not None and unique_id in catalog.sources:
1055
- nodes[unique_id]['columns'] = {
1056
- col_name: {
1057
- 'name': col_name,
1058
- 'type': col_metadata.type
1059
- }
1060
- for col_name, col_metadata in catalog.sources[unique_id].columns.items()
1061
- }
1375
+ if node_id in manifest.metrics:
1376
+ found = manifest.metrics[node_id]
1377
+ node = CllNode.build_cll_node(manifest, "metrics", node_id)
1378
+ if hasattr(found.depends_on, "nodes"):
1379
+ parent_list = found.depends_on.nodes
1062
1380
 
1063
- return nodes
1381
+ return node, parent_list
1064
1382
 
1065
1383
  def get_manifests_by_id(self, unique_id: str):
1066
1384
  curr_manifest = self.get_manifest(base=False)
1067
1385
  base_manifest = self.get_manifest(base=True)
1068
1386
  if unique_id in curr_manifest.nodes.keys() or unique_id in base_manifest.nodes.keys():
1069
1387
  return {
1070
- 'current': curr_manifest.nodes.get(unique_id),
1071
- 'base': base_manifest.nodes.get(unique_id)
1388
+ "current": curr_manifest.nodes.get(unique_id),
1389
+ "base": base_manifest.nodes.get(unique_id),
1072
1390
  }
1073
1391
  return None
1074
1392
 
@@ -1091,39 +1409,40 @@ class DbtAdapter(BaseAdapter):
1091
1409
  if self.base_path:
1092
1410
  self.artifacts_observer.schedule(event_handler, self.base_path, recursive=False)
1093
1411
  self.artifacts_observer.start()
1094
- logger.info('Start monitoring dbt artifacts')
1412
+ logger.info("Start monitoring dbt artifacts")
1095
1413
 
1096
1414
  def stop_monitor_artifacts(self):
1097
1415
  if self.artifacts_files:
1098
1416
  self.artifacts_observer.stop()
1099
1417
  self.artifacts_observer.join()
1100
- logger.info('Stop monitoring artifacts')
1418
+ logger.info("Stop monitoring artifacts")
1101
1419
 
1102
1420
  def start_monitor_base_env(self, callback: Callable = None):
1103
- target_base_dir = os.path.join(self.runtime_config.project_root, 'target-base')
1421
+ target_base_dir = os.path.join(self.runtime_config.project_root, "target-base")
1104
1422
  base_env_files = {
1105
- os.path.join(target_base_dir, 'manifest.json'),
1106
- os.path.join(target_base_dir, 'catalog.json'),
1423
+ os.path.join(target_base_dir, "manifest.json"),
1424
+ os.path.join(target_base_dir, "catalog.json"),
1107
1425
  }
1108
1426
  event_handler = EnvironmentEventHandler(self.base_env_observer, base_env_files, callback=callback)
1109
1427
  self.base_env_observer.schedule(event_handler, self.runtime_config.project_root, recursive=True)
1110
1428
  self.base_env_observer.start()
1111
- logger.info('Start monitoring base environment')
1429
+ logger.info("Start monitoring base environment")
1112
1430
 
1113
1431
  def stop_monitor_base_env(self):
1114
1432
  if self.base_env_observer.is_alive():
1115
1433
  self.base_env_observer.stop()
1116
1434
  self.base_env_observer.join()
1117
- logger.info('Stop monitoring base environment')
1118
-
1119
- def set_artifacts(self,
1120
- base_manifest: WritableManifest,
1121
- curr_manifest: WritableManifest,
1122
- manifest: Manifest,
1123
- previous_manifest: Manifest,
1124
- base_catalog: CatalogArtifact,
1125
- curr_catalog: CatalogArtifact,
1126
- ):
1435
+ logger.info("Stop monitoring base environment")
1436
+
1437
+ def set_artifacts(
1438
+ self,
1439
+ base_manifest: WritableManifest,
1440
+ curr_manifest: WritableManifest,
1441
+ manifest: Manifest,
1442
+ previous_manifest: Manifest,
1443
+ base_catalog: CatalogArtifact,
1444
+ curr_catalog: CatalogArtifact,
1445
+ ):
1127
1446
  self.curr_manifest = curr_manifest
1128
1447
  self.base_manifest = base_manifest
1129
1448
  self.manifest = manifest
@@ -1132,7 +1451,7 @@ class DbtAdapter(BaseAdapter):
1132
1451
  self.previous_state = previous_state(
1133
1452
  Path(self.base_path),
1134
1453
  Path(self.runtime_config.target_path),
1135
- Path(self.runtime_config.project_root)
1454
+ Path(self.runtime_config.project_root),
1136
1455
  )
1137
1456
  self.previous_state.manifest = previous_manifest
1138
1457
 
@@ -1154,19 +1473,22 @@ class DbtAdapter(BaseAdapter):
1154
1473
  # we capture the original manifest as base and only update the current
1155
1474
  target_type = os.path.basename(os.path.dirname(refresh_file_path))
1156
1475
  if self.target_path and target_type == os.path.basename(self.target_path):
1157
- if refresh_file_path.endswith('manifest.json'):
1476
+ if refresh_file_path.endswith("manifest.json"):
1158
1477
  self.curr_manifest = load_manifest(path=refresh_file_path)
1159
1478
  self.manifest = as_manifest(self.curr_manifest)
1160
1479
  self.get_cll_cached.cache_clear()
1161
- self.get_lineage_nodes_metadata.cache_clear()
1162
- elif refresh_file_path.endswith('catalog.json'):
1480
+ self.get_change_analysis_cached.cache_clear()
1481
+ elif refresh_file_path.endswith("catalog.json"):
1163
1482
  self.curr_catalog = load_catalog(path=refresh_file_path)
1164
- self.get_lineage_nodes_metadata.cache_clear()
1483
+ self.get_cll_cached.cache_clear()
1484
+ self.get_change_analysis_cached.cache_clear()
1165
1485
  elif self.base_path and target_type == os.path.basename(self.base_path):
1166
- if refresh_file_path.endswith('manifest.json'):
1486
+ if refresh_file_path.endswith("manifest.json"):
1167
1487
  self.base_manifest = load_manifest(path=refresh_file_path)
1168
- elif refresh_file_path.endswith('catalog.json'):
1488
+ self.get_change_analysis_cached.cache_clear()
1489
+ elif refresh_file_path.endswith("catalog.json"):
1169
1490
  self.base_catalog = load_catalog(path=refresh_file_path)
1491
+ self.get_change_analysis_cached.cache_clear()
1170
1492
 
1171
1493
  def create_relation(self, model, base=False):
1172
1494
  node = self.find_node_by_name(model, base)
@@ -1180,18 +1502,22 @@ class DbtAdapter(BaseAdapter):
1180
1502
  select: Optional[str] = None,
1181
1503
  exclude: Optional[str] = None,
1182
1504
  packages: Optional[list[str]] = None,
1183
- view_mode: Optional[Literal['all', 'changed_models']] = None,
1505
+ view_mode: Optional[Literal["all", "changed_models"]] = None,
1184
1506
  ) -> Set[str]:
1185
- from dbt.graph import NodeSelector
1186
- from dbt.compilation import Compiler
1187
- from dbt.graph import parse_difference, SelectionIntersection, SelectionUnion
1188
1507
  import dbt.compilation
1508
+ from dbt.compilation import Compiler
1509
+ from dbt.graph import (
1510
+ NodeSelector,
1511
+ SelectionIntersection,
1512
+ SelectionUnion,
1513
+ parse_difference,
1514
+ )
1189
1515
 
1190
1516
  select_list = [select] if select else None
1191
1517
  exclude_list = [exclude] if exclude else None
1192
1518
 
1193
1519
  def _parse_difference(include, exclude):
1194
- if dbt_version < 'v1.8':
1520
+ if dbt_version < "v1.8":
1195
1521
  return parse_difference(include, exclude, "eager")
1196
1522
  else:
1197
1523
  return parse_difference(include, exclude)
@@ -1199,10 +1525,10 @@ class DbtAdapter(BaseAdapter):
1199
1525
  specs = [_parse_difference(select_list, exclude_list)]
1200
1526
 
1201
1527
  if packages is not None:
1202
- package_spec = SelectionUnion([_parse_difference([f'package:{p}'], None) for p in packages])
1528
+ package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
1203
1529
  specs.append(package_spec)
1204
- if view_mode and view_mode == 'changed_models':
1205
- specs.append(_parse_difference(['1+state:modified+'], None))
1530
+ if view_mode and view_mode == "changed_models":
1531
+ specs.append(_parse_difference(["1+state:modified+"], None))
1206
1532
  spec = SelectionIntersection(specs)
1207
1533
 
1208
1534
  manifest = Manifest()
@@ -1215,8 +1541,8 @@ class DbtAdapter(BaseAdapter):
1215
1541
  for node_id, node in manifest_prev.nodes.items():
1216
1542
  if node_id not in manifest.nodes:
1217
1543
  node_dict = node.to_dict()
1218
- if 'raw_code' in node_dict:
1219
- node_dict['raw_code'] = "__removed__"
1544
+ if "raw_code" in node_dict:
1545
+ node_dict["raw_code"] = "__removed__"
1220
1546
  node_class = type(node)
1221
1547
  removed_node = node_class.from_dict(node_dict)
1222
1548
  manifest.nodes[node_id] = removed_node
@@ -1225,8 +1551,11 @@ class DbtAdapter(BaseAdapter):
1225
1551
  manifest.sources = {**manifest_prev.sources, **manifest_curr.sources}
1226
1552
  manifest.exposures = {**manifest_prev.exposures, **manifest_curr.exposures}
1227
1553
  manifest.metrics = {**manifest_prev.metrics, **manifest_curr.metrics}
1228
- if hasattr(manifest_prev, 'semantic_models'):
1229
- manifest.semantic_models = {**manifest_prev.semantic_models, **manifest_curr.semantic_models}
1554
+ if hasattr(manifest_prev, "semantic_models"):
1555
+ manifest.semantic_models = {
1556
+ **manifest_prev.semantic_models,
1557
+ **manifest_curr.semantic_models,
1558
+ }
1230
1559
 
1231
1560
  compiler = Compiler(self.runtime_config)
1232
1561
  # disable to print compile states
@@ -1241,28 +1570,28 @@ class DbtAdapter(BaseAdapter):
1241
1570
  return selector.get_selected(spec)
1242
1571
 
1243
1572
  def export_artifacts(self) -> ArtifactsRoot:
1244
- '''
1573
+ """
1245
1574
  Export the artifacts from the current state
1246
- '''
1575
+ """
1247
1576
  artifacts = ArtifactsRoot()
1248
1577
 
1249
1578
  def _load_artifact(artifact):
1250
1579
  return artifact.to_dict() if artifact else None
1251
1580
 
1252
1581
  artifacts.base = {
1253
- 'manifest': _load_artifact(self.base_manifest),
1254
- 'catalog': _load_artifact(self.base_catalog),
1582
+ "manifest": _load_artifact(self.base_manifest),
1583
+ "catalog": _load_artifact(self.base_catalog),
1255
1584
  }
1256
1585
  artifacts.current = {
1257
- 'manifest': _load_artifact(self.curr_manifest),
1258
- 'catalog': _load_artifact(self.curr_catalog),
1586
+ "manifest": _load_artifact(self.curr_manifest),
1587
+ "catalog": _load_artifact(self.curr_catalog),
1259
1588
  }
1260
1589
  return artifacts
1261
1590
 
1262
1591
  def export_artifacts_from_file(self) -> ArtifactsRoot:
1263
- '''
1592
+ """
1264
1593
  Export the artifacts from the state file. This is the old implementation
1265
- '''
1594
+ """
1266
1595
  artifacts = ArtifactsRoot()
1267
1596
  target_path = self.runtime_config.target_path
1268
1597
  target_base_path = self.base_path
@@ -1271,18 +1600,18 @@ class DbtAdapter(BaseAdapter):
1271
1600
  if not os.path.isfile(path):
1272
1601
  return None
1273
1602
 
1274
- with open(path, 'r') as f:
1603
+ with open(path, "r", encoding="utf-8") as f:
1275
1604
  json_content = f.read()
1276
1605
  return json.loads(json_content)
1277
1606
 
1278
1607
  project_root = self.runtime_config.project_root
1279
1608
  artifacts.base = {
1280
- 'manifest': _load_artifact(os.path.join(project_root, target_base_path, 'manifest.json')),
1281
- 'catalog': _load_artifact(os.path.join(project_root, target_base_path, 'catalog.json')),
1609
+ "manifest": _load_artifact(os.path.join(project_root, target_base_path, "manifest.json")),
1610
+ "catalog": _load_artifact(os.path.join(project_root, target_base_path, "catalog.json")),
1282
1611
  }
1283
1612
  artifacts.current = {
1284
- 'manifest': _load_artifact(os.path.join(project_root, target_path, 'manifest.json')),
1285
- 'catalog': _load_artifact(os.path.join(project_root, target_path, 'catalog.json')),
1613
+ "manifest": _load_artifact(os.path.join(project_root, target_path, "manifest.json")),
1614
+ "catalog": _load_artifact(os.path.join(project_root, target_path, "catalog.json")),
1286
1615
  }
1287
1616
  return artifacts
1288
1617
 
@@ -1290,7 +1619,7 @@ class DbtAdapter(BaseAdapter):
1290
1619
  # Merge the artifacts from the state file or cloud
1291
1620
  def _select_artifact(
1292
1621
  original: Union[WritableManifest, CatalogArtifact],
1293
- new: Union[WritableManifest, CatalogArtifact]
1622
+ new: Union[WritableManifest, CatalogArtifact],
1294
1623
  ):
1295
1624
  if merge:
1296
1625
  if not original:
@@ -1301,16 +1630,16 @@ class DbtAdapter(BaseAdapter):
1301
1630
  else:
1302
1631
  return new
1303
1632
 
1304
- self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get('manifest')))
1305
- self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get('manifest')))
1306
- self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get('catalog')))
1307
- self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get('catalog')))
1633
+ self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get("manifest")))
1634
+ self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get("manifest")))
1635
+ self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get("catalog")))
1636
+ self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get("catalog")))
1308
1637
 
1309
1638
  self.manifest = as_manifest(self.curr_manifest)
1310
1639
  self.previous_state = previous_state(
1311
1640
  Path(self.base_path),
1312
1641
  Path(self.runtime_config.target_path),
1313
- Path(self.runtime_config.project_root)
1642
+ Path(self.runtime_config.project_root),
1314
1643
  )
1315
1644
  self.previous_state.manifest = as_manifest(self.base_manifest)
1316
1645
 
@@ -1326,7 +1655,8 @@ class DbtAdapter(BaseAdapter):
1326
1655
 
1327
1656
  if not self.curr_manifest or not self.base_manifest:
1328
1657
  raise Exception(
1329
- 'No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state')
1658
+ "No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state"
1659
+ )
1330
1660
 
1331
1661
  @contextmanager
1332
1662
  def connection_named(self, name: str) -> Iterator[None]: