recce-nightly 0.62.0.20250417__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (245) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +845 -461
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +59 -42
  8. recce/apis/check_events_api.py +353 -0
  9. recce/apis/check_func.py +41 -35
  10. recce/apis/run_api.py +25 -19
  11. recce/apis/run_func.py +64 -25
  12. recce/artifact.py +119 -51
  13. recce/cli.py +1301 -324
  14. recce/config.py +43 -34
  15. recce/connect_to_cloud.py +138 -0
  16. recce/core.py +55 -47
  17. recce/data/404/index.html +2 -0
  18. recce/data/404.html +2 -1
  19. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  20. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  21. recce/data/__next.__PAGE__.txt +6 -0
  22. recce/data/__next._full.txt +32 -0
  23. recce/data/__next._head.txt +8 -0
  24. recce/data/__next._index.txt +14 -0
  25. recce/data/__next._tree.txt +8 -0
  26. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  27. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  28. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  29. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  30. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  31. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  32. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  33. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  34. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  35. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  36. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  37. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  38. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  39. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  40. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  41. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  42. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  43. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  44. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  45. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  46. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  47. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  48. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  49. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  50. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  51. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  52. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  53. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  54. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  55. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  56. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  57. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  58. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  59. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  60. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  61. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  62. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  63. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  64. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  65. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  66. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  67. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  68. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  69. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  70. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  71. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  72. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  73. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  74. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  75. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  76. recce/data/_not-found/__next._full.txt +24 -0
  77. recce/data/_not-found/__next._head.txt +8 -0
  78. recce/data/_not-found/__next._index.txt +13 -0
  79. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  80. recce/data/_not-found/__next._not-found.txt +4 -0
  81. recce/data/_not-found/__next._tree.txt +6 -0
  82. recce/data/_not-found/index.html +2 -0
  83. recce/data/_not-found/index.txt +24 -0
  84. recce/data/auth_callback.html +68 -0
  85. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  86. recce/data/checks/__next._full.txt +39 -0
  87. recce/data/checks/__next._head.txt +8 -0
  88. recce/data/checks/__next._index.txt +14 -0
  89. recce/data/checks/__next._tree.txt +8 -0
  90. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  91. recce/data/checks/__next.checks.txt +4 -0
  92. recce/data/checks/index.html +2 -0
  93. recce/data/checks/index.txt +39 -0
  94. recce/data/imgs/reload-image.svg +4 -0
  95. recce/data/index.html +2 -27
  96. recce/data/index.txt +32 -7
  97. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  98. recce/data/lineage/__next._full.txt +39 -0
  99. recce/data/lineage/__next._head.txt +8 -0
  100. recce/data/lineage/__next._index.txt +14 -0
  101. recce/data/lineage/__next._tree.txt +8 -0
  102. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  103. recce/data/lineage/__next.lineage.txt +4 -0
  104. recce/data/lineage/index.html +2 -0
  105. recce/data/lineage/index.txt +39 -0
  106. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  107. recce/data/query/__next._full.txt +37 -0
  108. recce/data/query/__next._head.txt +8 -0
  109. recce/data/query/__next._index.txt +14 -0
  110. recce/data/query/__next._tree.txt +8 -0
  111. recce/data/query/__next.query.__PAGE__.txt +9 -0
  112. recce/data/query/__next.query.txt +4 -0
  113. recce/data/query/index.html +2 -0
  114. recce/data/query/index.txt +37 -0
  115. recce/diff.py +6 -12
  116. recce/event/CONFIG.bak +1 -0
  117. recce/event/__init__.py +86 -74
  118. recce/event/collector.py +33 -22
  119. recce/event/track.py +49 -27
  120. recce/exceptions.py +1 -1
  121. recce/git.py +7 -7
  122. recce/github.py +57 -53
  123. recce/mcp_server.py +725 -0
  124. recce/models/__init__.py +4 -1
  125. recce/models/check.py +438 -21
  126. recce/models/run.py +1 -0
  127. recce/models/types.py +134 -28
  128. recce/pull_request.py +27 -25
  129. recce/run.py +179 -122
  130. recce/server.py +394 -104
  131. recce/state/__init__.py +31 -0
  132. recce/state/cloud.py +644 -0
  133. recce/state/const.py +26 -0
  134. recce/state/local.py +56 -0
  135. recce/state/state.py +119 -0
  136. recce/state/state_loader.py +174 -0
  137. recce/summary.py +196 -149
  138. recce/tasks/__init__.py +19 -3
  139. recce/tasks/core.py +11 -13
  140. recce/tasks/dataframe.py +82 -18
  141. recce/tasks/histogram.py +69 -34
  142. recce/tasks/lineage.py +2 -2
  143. recce/tasks/profile.py +152 -86
  144. recce/tasks/query.py +180 -89
  145. recce/tasks/rowcount.py +37 -31
  146. recce/tasks/schema.py +18 -15
  147. recce/tasks/top_k.py +35 -35
  148. recce/tasks/utils.py +147 -0
  149. recce/tasks/valuediff.py +247 -155
  150. recce/util/__init__.py +3 -0
  151. recce/util/api_token.py +80 -0
  152. recce/util/breaking.py +105 -100
  153. recce/util/cll.py +274 -219
  154. recce/util/cloud/__init__.py +15 -0
  155. recce/util/cloud/base.py +115 -0
  156. recce/util/cloud/check_events.py +190 -0
  157. recce/util/cloud/checks.py +242 -0
  158. recce/util/io.py +22 -17
  159. recce/util/lineage.py +65 -16
  160. recce/util/logger.py +1 -1
  161. recce/util/onboarding_state.py +45 -0
  162. recce/util/perf_tracking.py +85 -0
  163. recce/util/recce_cloud.py +347 -72
  164. recce/util/singleton.py +4 -4
  165. recce/util/startup_perf.py +121 -0
  166. recce/yaml/__init__.py +7 -10
  167. recce_nightly-1.30.0.20251221.dist-info/METADATA +195 -0
  168. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  169. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  170. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  171. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  172. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  173. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  174. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  175. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  176. recce/data/_next/static/chunks/500-e51c92a025a51234.js +0 -65
  177. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  178. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  179. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  180. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  181. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  182. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  183. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  184. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  185. recce/data/_next/static/chunks/app/page-9adc25782272ed2e.js +0 -1
  186. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  187. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  188. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  189. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  190. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  191. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  192. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  193. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  194. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  195. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  196. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  197. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  198. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  199. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  200. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  202. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  203. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  205. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  206. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  207. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  208. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  209. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  210. recce/data/_next/static/qiyFlux77VkhxiceAJe_F/_buildManifest.js +0 -1
  211. recce/state.py +0 -753
  212. recce_nightly-0.62.0.20250417.dist-info/METADATA +0 -311
  213. recce_nightly-0.62.0.20250417.dist-info/RECORD +0 -139
  214. recce_nightly-0.62.0.20250417.dist-info/top_level.txt +0 -2
  215. tests/__init__.py +0 -0
  216. tests/adapter/__init__.py +0 -0
  217. tests/adapter/dbt_adapter/__init__.py +0 -0
  218. tests/adapter/dbt_adapter/conftest.py +0 -13
  219. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -283
  220. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -40
  221. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -102
  222. tests/adapter/dbt_adapter/test_selector.py +0 -177
  223. tests/tasks/__init__.py +0 -0
  224. tests/tasks/conftest.py +0 -4
  225. tests/tasks/test_histogram.py +0 -137
  226. tests/tasks/test_lineage.py +0 -42
  227. tests/tasks/test_preset_checks.py +0 -50
  228. tests/tasks/test_profile.py +0 -73
  229. tests/tasks/test_query.py +0 -151
  230. tests/tasks/test_row_count.py +0 -116
  231. tests/tasks/test_schema.py +0 -99
  232. tests/tasks/test_top_k.py +0 -73
  233. tests/tasks/test_valuediff.py +0 -74
  234. tests/test_cli.py +0 -122
  235. tests/test_config.py +0 -45
  236. tests/test_core.py +0 -27
  237. tests/test_dbt.py +0 -36
  238. tests/test_pull_request.py +0 -130
  239. tests/test_server.py +0 -98
  240. tests/test_state.py +0 -123
  241. tests/test_summary.py +0 -57
  242. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  243. /recce/data/_next/static/{qiyFlux77VkhxiceAJe_F → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  244. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  245. {recce_nightly-0.62.0.20250417.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
@@ -8,14 +8,34 @@ from dataclasses import dataclass, fields
8
8
  from errno import ENOENT
9
9
  from functools import lru_cache
10
10
  from pathlib import Path
11
- from typing import Callable, Dict, List, Optional, Tuple, Iterator, Any, Set, Union, Literal, Type
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Dict,
15
+ Iterator,
16
+ List,
17
+ Literal,
18
+ Optional,
19
+ Set,
20
+ Tuple,
21
+ Type,
22
+ Union,
23
+ )
12
24
 
13
25
  from recce.event import log_performance
14
26
  from recce.exceptions import RecceException
15
- from recce.util.cll import cll, CLLPerformanceTracking
16
- from recce.util.lineage import find_upstream, find_downstream
27
+ from recce.util.cll import CLLPerformanceTracking, cll
28
+ from recce.util.lineage import (
29
+ build_column_key,
30
+ filter_dependency_maps,
31
+ find_downstream,
32
+ find_upstream,
33
+ )
34
+ from recce.util.perf_tracking import LineagePerfTracker
35
+ from recce.util.startup_perf import track_timing
36
+
17
37
  from ...tasks.profile import ProfileTask
18
- from ...util.breaking import parse_change_category, BreakingPerformanceTracking
38
+ from ...util.breaking import BreakingPerformanceTracking, parse_change_category
19
39
 
20
40
  try:
21
41
  import agate
@@ -30,11 +50,30 @@ from watchdog.observers import Observer
30
50
 
31
51
  from recce.adapter.base import BaseAdapter
32
52
  from recce.state import ArtifactsRoot
33
- from .dbt_version import DbtVersion
53
+
34
54
  from ...models import RunType
35
- from ...models.types import LineageDiff, NodeDiff, NodeChange
36
- from ...tasks import Task, QueryTask, QueryBaseTask, QueryDiffTask, ValueDiffTask, ValueDiffDetailTask, ProfileDiffTask, \
37
- RowCountTask, RowCountDiffTask, TopKDiffTask, HistogramDiffTask
55
+ from ...models.types import (
56
+ CllColumn,
57
+ CllData,
58
+ CllNode,
59
+ LineageDiff,
60
+ NodeChange,
61
+ NodeDiff,
62
+ )
63
+ from ...tasks import (
64
+ HistogramDiffTask,
65
+ ProfileDiffTask,
66
+ QueryBaseTask,
67
+ QueryDiffTask,
68
+ QueryTask,
69
+ RowCountDiffTask,
70
+ RowCountTask,
71
+ Task,
72
+ TopKDiffTask,
73
+ ValueDiffDetailTask,
74
+ ValueDiffTask,
75
+ )
76
+ from .dbt_version import DbtVersion
38
77
 
39
78
  dbt_supported_registry: Dict[RunType, Type[Task]] = {
40
79
  RunType.QUERY: QueryTask,
@@ -56,7 +95,7 @@ get_adapter_orig = dbt.adapters.factory.get_adapter
56
95
 
57
96
 
58
97
  def get_adapter(config):
59
- if hasattr(config, 'adapter'):
98
+ if hasattr(config, "adapter"):
60
99
  return config.adapter
61
100
  else:
62
101
  return get_adapter_orig(config)
@@ -69,7 +108,12 @@ from dbt.adapters.base import Column # noqa: E402
69
108
  from dbt.adapters.factory import get_adapter_class_by_name # noqa: E402
70
109
  from dbt.adapters.sql import SQLAdapter # noqa: E402
71
110
  from dbt.config.runtime import RuntimeConfig # noqa: E402
72
- from dbt.contracts.graph.manifest import Manifest, WritableManifest, MacroManifest # noqa: E402
111
+ from dbt.contracts.graph.manifest import ( # noqa: E402
112
+ MacroManifest,
113
+ Manifest,
114
+ ManifestMetadata,
115
+ WritableManifest,
116
+ )
73
117
  from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
74
118
  from dbt.contracts.results import CatalogArtifact # noqa: E402
75
119
  from dbt.flags import set_from_args # noqa: E402
@@ -78,7 +122,7 @@ from dbt.parser.sql import SqlBlockParser # noqa: E402
78
122
 
79
123
  dbt_version = DbtVersion()
80
124
 
81
- if dbt_version < 'v1.8':
125
+ if dbt_version < "v1.8":
82
126
  from dbt.contracts.connection import Connection
83
127
  else:
84
128
  from dbt.adapters.contracts.connection import Connection
@@ -86,19 +130,22 @@ else:
86
130
 
87
131
  @contextmanager
88
132
  def silence_no_nodes_warning():
89
- if dbt_version >= 'v1.8':
133
+ if dbt_version >= "v1.8":
90
134
  from dbt.events.types import NoNodesForSelectionCriteria
91
135
  from dbt_common.events.functions import WARN_ERROR_OPTIONS
136
+
92
137
  WARN_ERROR_OPTIONS.silence.append(NoNodesForSelectionCriteria.__name__)
93
138
  try:
94
139
  yield
95
140
  finally:
96
- if dbt_version >= 'v1.8':
141
+ if dbt_version >= "v1.8":
97
142
  from dbt_common.events.functions import WARN_ERROR_OPTIONS
143
+
98
144
  WARN_ERROR_OPTIONS.silence.pop()
99
145
 
100
146
 
101
- logger = logging.getLogger('uvicorn')
147
+ logger = logging.getLogger("uvicorn")
148
+ MIN_DBT_NODE_COMPOSITION = 3
102
149
 
103
150
 
104
151
  class ArtifactsEventHandler(FileSystemEventHandler):
@@ -147,24 +194,29 @@ class EnvironmentEventHandler(FileSystemEventHandler):
147
194
 
148
195
 
149
196
  def merge_tables(tables: List[agate.Table]) -> agate.Table:
150
- if dbt_version < 'v1.8':
197
+ if dbt_version < "v1.8":
151
198
  from dbt.clients.agate_helper import merge_tables
199
+
152
200
  return merge_tables(tables)
153
201
  else:
154
202
  from dbt_common.clients.agate_helper import merge_tables
203
+
155
204
  return merge_tables(tables)
156
205
 
157
206
 
158
207
  def as_manifest(m: WritableManifest) -> Manifest:
159
- if dbt_version < 'v1.8':
208
+ if dbt_version < "v1.8":
160
209
  data = m.__dict__
161
210
  all_fields = set([x.name for x in fields(Manifest)])
162
211
  new_data = {k: v for k, v in data.items() if k in all_fields}
163
212
  return Manifest(**new_data)
164
213
  else:
165
- return Manifest.from_writable_manifest(m)
214
+ result = Manifest.from_writable_manifest(m)
215
+ result.metadata = ManifestMetadata(**m.metadata.__dict__)
216
+ return result
166
217
 
167
218
 
219
+ @track_timing(record_size=True)
168
220
  def load_manifest(path: str = None, data: dict = None):
169
221
  if path is not None:
170
222
  if not os.path.isfile(path):
@@ -174,6 +226,7 @@ def load_manifest(path: str = None, data: dict = None):
174
226
  return WritableManifest.upgrade_schema_version(data)
175
227
 
176
228
 
229
+ @track_timing(record_size=True)
177
230
  def load_catalog(path: str = None, data: dict = None):
178
231
  if path is not None:
179
232
  if not os.path.isfile(path):
@@ -184,12 +237,13 @@ def load_catalog(path: str = None, data: dict = None):
184
237
 
185
238
 
186
239
  def previous_state(state_path: Path, target_path: Path, project_root: Path) -> PreviousState:
187
- if dbt_version < 'v1.5.2':
240
+ if dbt_version < "v1.5.2":
188
241
  return PreviousState(state_path, target_path)
189
242
  else:
190
243
  try:
191
244
  # Overwrite the level_tag method temporarily to avoid the warning message
192
- from dbt.events.types import WarnStateTargetEqual, EventLevel
245
+ from dbt.events.types import EventLevel, WarnStateTargetEqual
246
+
193
247
  original_level_tag_func = WarnStateTargetEqual.level_tag
194
248
  WarnStateTargetEqual.level_tag = lambda x: EventLevel.DEBUG
195
249
  except ImportError:
@@ -209,12 +263,12 @@ def previous_state(state_path: Path, target_path: Path, project_root: Path) -> P
209
263
  def default_profiles_dir():
210
264
  # Precedence: DBT_PROFILES_DIR > current working directory > ~/.dbt/
211
265
  # https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
212
- if os.getenv('DBT_PROFILES_DIR'):
213
- return os.getenv('DBT_PROFILES_DIR')
214
- elif os.path.exists(os.path.join(os.getcwd(), 'profiles.yml')):
266
+ if os.getenv("DBT_PROFILES_DIR"):
267
+ return os.getenv("DBT_PROFILES_DIR")
268
+ elif os.path.exists(os.path.join(os.getcwd(), "profiles.yml")):
215
269
  return os.getcwd()
216
270
  else:
217
- return os.path.expanduser('~/.dbt/')
271
+ return os.path.expanduser("~/.dbt/")
218
272
 
219
273
 
220
274
  @dataclass()
@@ -222,15 +276,16 @@ class DbtArgs:
222
276
  """
223
277
  Used for RuntimeConfig.from_args
224
278
  """
225
- threads: Optional[int] = 1,
226
- target: Optional[str] = None,
227
- profiles_dir: Optional[str] = None,
228
- project_dir: Optional[str] = None,
229
- profile: Optional[str] = None,
230
- target_path: Optional[str] = None,
279
+
280
+ threads: Optional[int] = (1,)
281
+ target: Optional[str] = (None,)
282
+ profiles_dir: Optional[str] = (None,)
283
+ project_dir: Optional[str] = (None,)
284
+ profile: Optional[str] = (None,)
285
+ target_path: Optional[str] = (None,)
231
286
  project_only_flags: Optional[Dict[str, Any]] = None
232
287
  which: Optional[str] = None
233
- state_modified_compare_more_unrendered_values: Optional[bool] = False # new flag added since dbt v1.9
288
+ state_modified_compare_more_unrendered_values: Optional[bool] = True # new flag added since dbt v1.9
234
289
 
235
290
 
236
291
  @dataclass
@@ -258,32 +313,18 @@ class DbtAdapter(BaseAdapter):
258
313
 
259
314
  def support_tasks(self):
260
315
  support_map = {run_type.value: True for run_type in dbt_supported_registry}
261
- supported_dbt_packages = set([package.package_name for package in self.manifest.macros.values()])
262
-
263
- if 'dbt_profiler' not in supported_dbt_packages:
264
- support_map[RunType.PROFILE_DIFF.value] = False
265
- support_map[RunType.PROFILE.value] = False
266
-
267
- if 'audit_helper' not in supported_dbt_packages:
268
- support_map[RunType.VALUE_DIFF.value] = False
269
- support_map[RunType.VALUE_DIFF_DETAIL.value] = False
270
- support_map['query_diff_with_primary_key'] = False
271
316
 
272
317
  return support_map
273
318
 
274
319
  @classmethod
275
- def load(cls,
276
- no_artifacts=False,
277
- review=False,
278
- **kwargs):
279
-
280
- target = kwargs.get('target')
281
- target_path = kwargs.get('target_path', 'target')
282
- target_base_path = kwargs.get('target_base_path', 'target-base')
320
+ def load(cls, no_artifacts=False, review=False, **kwargs):
321
+ target = kwargs.get("target")
322
+ target_path = kwargs.get("target_path", "target")
323
+ target_base_path = kwargs.get("target_base_path", "target-base")
283
324
 
284
- profile_name = kwargs.get('profile')
285
- project_dir = kwargs.get('project_dir')
286
- profiles_dir = kwargs.get('profiles_dir')
325
+ profile_name = kwargs.get("profile")
326
+ project_dir = kwargs.get("project_dir")
327
+ profiles_dir = kwargs.get("profiles_dir")
287
328
 
288
329
  if profiles_dir is None:
289
330
  profiles_dir = default_profiles_dir()
@@ -297,21 +338,25 @@ class DbtAdapter(BaseAdapter):
297
338
  profiles_dir=profiles_dir,
298
339
  profile=profile_name,
299
340
  project_only_flags={},
300
- which='list'
341
+ which="list",
301
342
  )
302
343
  set_from_args(args, args)
303
344
 
304
345
  from dbt.exceptions import DbtProjectError
346
+
305
347
  try:
306
348
  # adapter
307
- if dbt_version < 'v1.8':
349
+ if dbt_version < "v1.8":
308
350
  runtime_config = RuntimeConfig.from_args(args)
309
351
  adapter_name = runtime_config.credentials.type
310
352
  adapter_cls = get_adapter_class_by_name(adapter_name)
311
353
  adapter: SQLAdapter = adapter_cls(runtime_config)
312
354
  else:
313
- from dbt_common.context import set_invocation_context, get_invocation_context
314
355
  from dbt.mp_context import get_mp_context
356
+ from dbt_common.context import (
357
+ get_invocation_context,
358
+ set_invocation_context,
359
+ )
315
360
 
316
361
  set_invocation_context({})
317
362
  get_invocation_context()._env = dict(os.environ)
@@ -319,6 +364,9 @@ class DbtAdapter(BaseAdapter):
319
364
  adapter_name = runtime_config.credentials.type
320
365
  adapter_cls = get_adapter_class_by_name(adapter_name)
321
366
  adapter: SQLAdapter = adapter_cls(runtime_config, get_mp_context())
367
+ from dbt.adapters.factory import FACTORY
368
+
369
+ FACTORY.adapters[adapter_name] = adapter
322
370
 
323
371
  adapter.connections.set_connection_name()
324
372
  runtime_config.adapter = adapter
@@ -327,7 +375,7 @@ class DbtAdapter(BaseAdapter):
327
375
  runtime_config=runtime_config,
328
376
  adapter=adapter,
329
377
  review_mode=review,
330
- base_path=target_base_path
378
+ base_path=target_base_path,
331
379
  )
332
380
  except DbtProjectError as e:
333
381
  raise e
@@ -348,27 +396,26 @@ class DbtAdapter(BaseAdapter):
348
396
 
349
397
  def get_columns(self, model: str, base=False) -> List[Column]:
350
398
  relation = self.create_relation(model, base)
351
- get_columns_macro = 'get_columns_in_relation'
352
- if self.adapter.connections.TYPE == 'databricks':
353
- get_columns_macro = 'get_columns_comments'
399
+ get_columns_macro = "get_columns_in_relation"
400
+ if self.adapter.connections.TYPE == "databricks":
401
+ get_columns_macro = "get_columns_comments"
354
402
 
355
- if dbt_version < 'v1.8':
403
+ if dbt_version < "v1.8":
356
404
  columns = self.adapter.execute_macro(
357
- get_columns_macro,
358
- kwargs={"relation": relation},
359
- manifest=self.manifest)
405
+ get_columns_macro, kwargs={"relation": relation}, manifest=self.manifest
406
+ )
360
407
  else:
361
408
  from dbt.context.providers import generate_runtime_macro_context
409
+
362
410
  macro_manifest = MacroManifest(self.manifest.macros)
363
411
  self.adapter.set_macro_resolver(macro_manifest)
364
412
  self.adapter.set_macro_context_generator(generate_runtime_macro_context)
365
- columns = self.adapter.execute_macro(
366
- get_columns_macro,
367
- kwargs={"relation": relation})
413
+ columns = self.adapter.execute_macro(get_columns_macro, kwargs={"relation": relation})
368
414
 
369
- if self.adapter.connections.TYPE == 'databricks':
415
+ if self.adapter.connections.TYPE == "databricks":
370
416
  # reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
371
- from dbt.adapters.databricks import DatabricksColumn
417
+ from dbt.adapters.databricks.column import DatabricksColumn
418
+
372
419
  rows = columns
373
420
  columns = []
374
421
  for row in rows:
@@ -376,7 +423,9 @@ class DbtAdapter(BaseAdapter):
376
423
  break
377
424
  columns.append(
378
425
  DatabricksColumn(
379
- column=row["col_name"], dtype=row["data_type"], comment=row["comment"]
426
+ column=row["col_name"],
427
+ dtype=row["data_type"],
428
+ comment=row["comment"],
380
429
  )
381
430
  )
382
431
  return columns
@@ -387,29 +436,29 @@ class DbtAdapter(BaseAdapter):
387
436
  manifest = self.curr_manifest if base is False else self.base_manifest
388
437
  manifest_dict = manifest.to_dict()
389
438
 
390
- node = manifest_dict['nodes'].get(model_id)
439
+ node = manifest_dict["nodes"].get(model_id)
391
440
  if node is None:
392
441
  return {}
393
442
 
394
- node_name = node['name']
395
- with self.adapter.connection_named('model'):
443
+ node_name = node["name"]
444
+ with self.adapter.connection_named("model"):
396
445
  columns = [column for column in self.get_columns(node_name, base=base)]
397
446
 
398
- child_map: List[str] = manifest_dict['child_map'][model_id]
447
+ child_map: List[str] = manifest_dict["child_map"][model_id]
399
448
  cols_not_null = []
400
449
  cols_unique = []
401
450
 
402
451
  for child in child_map:
403
- comps = child.split('.')
452
+ comps = child.split(".")
404
453
  child_type = comps[0]
405
454
  child_name = comps[2]
406
455
 
407
- not_null_prefix = f'not_null_{node_name}_'
408
- if child_type == 'test' and child_name.startswith(not_null_prefix):
409
- cols_not_null.append(child_name[len(not_null_prefix):])
410
- unique_prefix = f'unique_{node_name}_'
411
- if child_type == 'test' and child_name.startswith(unique_prefix):
412
- cols_unique.append(child_name[len(unique_prefix):])
456
+ not_null_prefix = f"not_null_{node_name}_"
457
+ if child_type == "test" and child_name.startswith(not_null_prefix):
458
+ cols_not_null.append(child_name[len(not_null_prefix) :])
459
+ unique_prefix = f"unique_{node_name}_"
460
+ if child_type == "test" and child_name.startswith(unique_prefix):
461
+ cols_unique.append(child_name[len(unique_prefix) :])
413
462
 
414
463
  columns_info = {}
415
464
  primary_key = None
@@ -417,25 +466,26 @@ class DbtAdapter(BaseAdapter):
417
466
  col_name = c.column
418
467
  col = dict(name=col_name, type=c.dtype)
419
468
  if col_name in cols_not_null:
420
- col['not_null'] = True
469
+ col["not_null"] = True
421
470
  if col_name in cols_unique:
422
- col['unique'] = True
471
+ col["unique"] = True
423
472
  if not primary_key:
424
473
  primary_key = col_name
425
474
  columns_info[col_name] = col
426
475
 
427
476
  result = dict(columns=columns_info)
428
477
  if primary_key:
429
- result['primary_key'] = primary_key
478
+ result["primary_key"] = primary_key
430
479
 
431
480
  return result
432
481
 
482
+ @track_timing("artifact_load")
433
483
  def load_artifacts(self):
434
484
  """
435
485
  Load the artifacts from the 'target' and 'target-base' directory
436
486
  """
437
487
  if self.runtime_config is None:
438
- raise Exception('Cannot find the dbt project configuration')
488
+ raise Exception("Cannot find the dbt project configuration")
439
489
 
440
490
  project_root = self.runtime_config.project_root
441
491
  target_path = self.runtime_config.target_path
@@ -444,17 +494,21 @@ class DbtAdapter(BaseAdapter):
444
494
  self.base_path = os.path.join(project_root, target_base_path)
445
495
 
446
496
  # load the artifacts
447
- path = os.path.join(project_root, target_path, 'manifest.json')
448
- curr_manifest = load_manifest(path=path)
497
+ path = os.path.join(project_root, target_path, "manifest.json")
498
+ curr_manifest = load_manifest(path=path, timing_name="curr_manifest")
449
499
  if curr_manifest is None:
450
500
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
451
- path = os.path.join(project_root, target_base_path, 'manifest.json')
452
- base_manifest = load_manifest(path=path)
501
+ path = os.path.join(project_root, target_base_path, "manifest.json")
502
+ base_manifest = load_manifest(path=path, timing_name="base_manifest")
453
503
  if base_manifest is None:
454
504
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
455
505
 
456
- curr_catalog = load_catalog(path=os.path.join(project_root, target_path, 'catalog.json'))
457
- base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, 'catalog.json'))
506
+ curr_catalog = load_catalog(
507
+ path=os.path.join(project_root, target_path, "catalog.json"), timing_name="curr_catalog"
508
+ )
509
+ base_catalog = load_catalog(
510
+ path=os.path.join(project_root, target_base_path, "catalog.json"), timing_name="base_catalog"
511
+ )
458
512
 
459
513
  # set the value if all the artifacts are loaded successfully
460
514
  self.curr_manifest = curr_manifest
@@ -472,22 +526,21 @@ class DbtAdapter(BaseAdapter):
472
526
 
473
527
  # set the file paths to watch
474
528
  self.artifacts_files = [
475
- os.path.join(project_root, target_path, 'manifest.json'),
476
- os.path.join(project_root, target_path, 'catalog.json'),
477
- os.path.join(project_root, target_base_path, 'manifest.json'),
478
- os.path.join(project_root, target_base_path, 'catalog.json'),
529
+ os.path.join(project_root, target_path, "manifest.json"),
530
+ os.path.join(project_root, target_path, "catalog.json"),
531
+ os.path.join(project_root, target_base_path, "manifest.json"),
532
+ os.path.join(project_root, target_base_path, "catalog.json"),
479
533
  ]
480
534
 
481
535
  def is_python_model(self, node_id: str, base: Optional[bool] = False):
482
536
  manifest = self.curr_manifest if base is False else self.base_manifest
483
537
  model = manifest.nodes.get(node_id)
484
- if hasattr(model, 'language'):
485
- return model.language == 'python'
538
+ if hasattr(model, "language"):
539
+ return model.language == "python"
486
540
 
487
541
  return False
488
542
 
489
543
  def find_node_by_name(self, node_name, base=False) -> Optional[ManifestNode]:
490
-
491
544
  manifest = self.curr_manifest if base is False else self.base_manifest
492
545
 
493
546
  for key, node in manifest.nodes.items():
@@ -497,22 +550,22 @@ class DbtAdapter(BaseAdapter):
497
550
  return None
498
551
 
499
552
  def get_node_name_by_id(self, unique_id):
500
- if unique_id.startswith('source.'):
553
+ if unique_id.startswith("source."):
501
554
  if unique_id in self.curr_manifest.sources:
502
555
  return self.curr_manifest.sources[unique_id].name
503
556
  elif unique_id in self.base_manifest.sources:
504
557
  return self.base_manifest.sources[unique_id].name
505
- elif unique_id.startswith('metric.'):
558
+ elif unique_id.startswith("metric."):
506
559
  if unique_id in self.curr_manifest.metrics:
507
560
  return self.curr_manifest.metrics[unique_id].name
508
561
  elif unique_id in self.base_manifest.metrics:
509
562
  return self.base_manifest.metrics[unique_id].name
510
- elif unique_id.startswith('exposure.'):
563
+ elif unique_id.startswith("exposure."):
511
564
  if unique_id in self.curr_manifest.exposures:
512
565
  return self.curr_manifest.exposures[unique_id].name
513
566
  elif unique_id in self.base_manifest.exposures:
514
567
  return self.base_manifest.exposures[unique_id].name
515
- elif unique_id.startswith('semantic_model.'):
568
+ elif unique_id.startswith("semantic_model."):
516
569
  if unique_id in self.curr_manifest.semantic_models:
517
570
  return self.curr_manifest.semantic_models[unique_id].name
518
571
  elif unique_id in self.base_manifest.semantic_models:
@@ -527,14 +580,24 @@ class DbtAdapter(BaseAdapter):
527
580
  def get_manifest(self, base: bool):
528
581
  return self.curr_manifest if base is False else self.base_manifest
529
582
 
530
- def generate_sql(self, sql_template: str, base: bool = False, context=None, provided_manifest=None):
583
+ def generate_sql(
584
+ self,
585
+ sql_template: str,
586
+ base: bool = False,
587
+ context=None,
588
+ provided_manifest=None,
589
+ ):
531
590
  if context is None:
532
591
  context = {}
533
592
  manifest = provided_manifest if provided_manifest is not None else as_manifest(self.get_manifest(base))
534
593
  parser = SqlBlockParser(self.runtime_config, manifest, self.runtime_config)
535
594
 
536
- if dbt_version >= dbt_version.parse('v1.8'):
537
- from dbt_common.context import set_invocation_context, get_invocation_context
595
+ if dbt_version >= dbt_version.parse("v1.8"):
596
+ from dbt_common.context import (
597
+ get_invocation_context,
598
+ set_invocation_context,
599
+ )
600
+
538
601
  set_invocation_context({})
539
602
  get_invocation_context()._env = dict(os.environ)
540
603
 
@@ -542,21 +605,35 @@ class DbtAdapter(BaseAdapter):
542
605
  node = parser.parse_remote(sql_template, node_id)
543
606
  process_node(self.runtime_config, manifest, node)
544
607
 
545
- if dbt_version < dbt_version.parse('v1.8'):
608
+ if dbt_version < dbt_version.parse("v1.8"):
546
609
  compiler = self.adapter.get_compiler()
547
610
  compiler.compile_node(node, manifest, context)
548
611
  return node.compiled_code
549
612
  else:
550
- from dbt.context.providers import generate_runtime_model_context
551
613
  from dbt.clients import jinja
614
+ from dbt.context.providers import (
615
+ generate_runtime_macro_context,
616
+ generate_runtime_model_context,
617
+ )
618
+
619
+ # Set up macro resolver for dbt >= 1.8
620
+ macro_manifest = MacroManifest(manifest.macros)
621
+ self.adapter.set_macro_resolver(macro_manifest)
622
+ self.adapter.set_macro_context_generator(generate_runtime_macro_context)
623
+
552
624
  jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
553
625
  jinja_ctx.update(context)
554
626
  compiled_code = jinja.get_rendered(sql_template, jinja_ctx, node)
555
627
  return compiled_code
556
628
 
557
- def execute(self, sql: str, auto_begin: bool = False, fetch: bool = False, limit: Optional[int] = None) -> Tuple[
558
- any, agate.Table]:
559
- if dbt_version < dbt_version.parse('v1.6'):
629
+ def execute(
630
+ self,
631
+ sql: str,
632
+ auto_begin: bool = False,
633
+ fetch: bool = False,
634
+ limit: Optional[int] = None,
635
+ ) -> Tuple[any, agate.Table]:
636
+ if dbt_version < dbt_version.parse("v1.6"):
560
637
  return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch)
561
638
 
562
639
  return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
@@ -567,7 +644,7 @@ class DbtAdapter(BaseAdapter):
567
644
 
568
645
  node_ids = nodes.keys()
569
646
  parent_map = {}
570
- for k, parents in manifest_dict['parent_map'].items():
647
+ for k, parents in manifest_dict["parent_map"].items():
571
648
  if k not in node_ids:
572
649
  continue
573
650
  parent_map[k] = [parent for parent in parents if parent in node_ids]
@@ -578,8 +655,8 @@ class DbtAdapter(BaseAdapter):
578
655
  manifest = self.curr_manifest if base is False else self.base_manifest
579
656
  manifest_dict = manifest.to_dict()
580
657
 
581
- if node_id in manifest_dict['parent_map']:
582
- return manifest_dict['parent_map'][node_id]
658
+ if node_id in manifest_dict["parent_map"]:
659
+ return manifest_dict["parent_map"][node_id]
583
660
 
584
661
  def get_lineage(self, base: Optional[bool] = False):
585
662
  manifest = self.curr_manifest if base is False else self.base_manifest
@@ -588,19 +665,21 @@ class DbtAdapter(BaseAdapter):
588
665
  return self.get_lineage_cached(base, cache_key)
589
666
 
590
667
  def get_lineage_diff(self) -> LineageDiff:
591
- cache_key = hash((
592
- id(self.base_manifest),
593
- id(self.base_catalog),
594
- id(self.curr_manifest),
595
- id(self.curr_catalog),
596
- ))
668
+ cache_key = hash(
669
+ (
670
+ id(self.base_manifest),
671
+ id(self.base_catalog),
672
+ id(self.curr_manifest),
673
+ id(self.curr_catalog),
674
+ )
675
+ )
597
676
  return self._get_lineage_diff_cached(cache_key)
598
677
 
599
678
  @lru_cache(maxsize=2)
600
679
  def get_lineage_cached(self, base: Optional[bool] = False, cache_key=0):
601
680
  if base is False:
602
- cll_tracker = CLLPerformanceTracking()
603
- cll_tracker.start_lineage()
681
+ perf_tracker = LineagePerfTracker()
682
+ perf_tracker.start_lineage()
604
683
 
605
684
  manifest = self.curr_manifest if base is False else self.base_manifest
606
685
  catalog = self.curr_catalog if base is False else self.base_catalog
@@ -612,48 +691,48 @@ class DbtAdapter(BaseAdapter):
612
691
 
613
692
  nodes = {}
614
693
 
615
- for node in manifest_dict['nodes'].values():
616
- unique_id = node['unique_id']
617
- resource_type = node['resource_type']
694
+ for node in manifest_dict["nodes"].values():
695
+ unique_id = node["unique_id"]
696
+ resource_type = node["resource_type"]
618
697
 
619
- if resource_type not in ['model', 'seed', 'exposure', 'snapshot']:
698
+ if resource_type not in ["model", "seed", "exposure", "snapshot"]:
620
699
  continue
621
700
 
622
701
  nodes[unique_id] = {
623
- 'id': node['unique_id'],
624
- 'name': node['name'],
625
- 'resource_type': node['resource_type'],
626
- 'package_name': node['package_name'],
627
- 'schema': node['schema'],
628
- 'config': node['config'],
629
- 'checksum': node['checksum'],
630
- 'raw_code': node['raw_code'],
702
+ "id": node["unique_id"],
703
+ "name": node["name"],
704
+ "resource_type": node["resource_type"],
705
+ "package_name": node["package_name"],
706
+ "schema": node["schema"],
707
+ "config": node["config"],
708
+ "checksum": node["checksum"],
709
+ "raw_code": node["raw_code"],
631
710
  }
632
711
 
633
712
  # List of <type>.<package_name>.<node_name>.<hash>
634
713
  # model.jaffle_shop.customer_segments
635
714
  # test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d
636
715
  # test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1
637
- child_map: List[str] = manifest_dict['child_map'][unique_id]
716
+ child_map: List[str] = manifest_dict["child_map"][unique_id]
638
717
  cols_not_null = []
639
718
  cols_unique = []
640
719
 
641
720
  for child in child_map:
642
- node_name = node['name']
643
- comps = child.split('.')
644
- if len(comps) < 2:
721
+ node_name = node["name"]
722
+ comps = child.split(".")
723
+ if len(comps) < MIN_DBT_NODE_COMPOSITION:
645
724
  # only happens in unittest
646
725
  continue
647
726
 
648
727
  child_type = comps[0]
649
728
  child_name = comps[2]
650
729
 
651
- not_null_prefix = f'not_null_{node_name}_'
652
- if child_type == 'test' and child_name.startswith(not_null_prefix):
653
- cols_not_null.append(child_name[len(not_null_prefix):])
654
- unique_prefix = f'unique_{node_name}_'
655
- if child_type == 'test' and child_name.startswith(unique_prefix):
656
- cols_unique.append(child_name[len(unique_prefix):])
730
+ not_null_prefix = f"not_null_{node_name}_"
731
+ if child_type == "test" and child_name.startswith(not_null_prefix):
732
+ cols_not_null.append(child_name[len(not_null_prefix) :])
733
+ unique_prefix = f"unique_{node_name}_"
734
+ if child_type == "test" and child_name.startswith(unique_prefix):
735
+ cols_unique.append(child_name[len(unique_prefix) :])
657
736
 
658
737
  if catalog is not None and unique_id in catalog.nodes:
659
738
  columns = {}
@@ -661,70 +740,68 @@ class DbtAdapter(BaseAdapter):
661
740
  for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
662
741
  col = dict(name=col_name, type=col_metadata.type)
663
742
  if col_name in cols_not_null:
664
- col['not_null'] = True
743
+ col["not_null"] = True
665
744
  if col_name in cols_unique:
666
- col['unique'] = True
745
+ col["unique"] = True
667
746
  if not primary_key:
668
747
  primary_key = col_name
669
748
  columns[col_name] = col
670
- nodes[unique_id]['columns'] = columns
749
+ nodes[unique_id]["columns"] = columns
671
750
  if primary_key:
672
- nodes[unique_id]['primary_key'] = primary_key
751
+ nodes[unique_id]["primary_key"] = primary_key
673
752
 
674
- for source in manifest_dict['sources'].values():
675
- unique_id = source['unique_id']
753
+ for source in manifest_dict["sources"].values():
754
+ unique_id = source["unique_id"]
676
755
 
677
756
  nodes[unique_id] = {
678
- 'id': source['unique_id'],
679
- 'name': source['name'],
680
- 'resource_type': source['resource_type'],
681
- 'package_name': source['package_name'],
682
- 'config': source['config'],
757
+ "id": source["unique_id"],
758
+ "name": source["name"],
759
+ "source_name": source["source_name"],
760
+ "resource_type": source["resource_type"],
761
+ "package_name": source["package_name"],
762
+ "config": source["config"],
683
763
  }
684
764
 
685
765
  if catalog is not None and unique_id in catalog.sources:
686
- nodes[unique_id]['columns'] = {
687
- col_name: {
688
- 'name': col_name,
689
- 'type': col_metadata.type
690
- }
766
+ nodes[unique_id]["columns"] = {
767
+ col_name: {"name": col_name, "type": col_metadata.type}
691
768
  for col_name, col_metadata in catalog.sources[unique_id].columns.items()
692
769
  }
693
770
 
694
- for exposure in manifest_dict['exposures'].values():
695
- nodes[exposure['unique_id']] = {
696
- 'id': exposure['unique_id'],
697
- 'name': exposure['name'],
698
- 'resource_type': exposure['resource_type'],
699
- 'package_name': exposure['package_name'],
700
- 'config': exposure['config'],
771
+ for exposure in manifest_dict["exposures"].values():
772
+ nodes[exposure["unique_id"]] = {
773
+ "id": exposure["unique_id"],
774
+ "name": exposure["name"],
775
+ "resource_type": exposure["resource_type"],
776
+ "package_name": exposure["package_name"],
777
+ "config": exposure["config"],
701
778
  }
702
- for metric in manifest_dict['metrics'].values():
703
- nodes[metric['unique_id']] = {
704
- 'id': metric['unique_id'],
705
- 'name': metric['name'],
706
- 'resource_type': metric['resource_type'],
707
- 'package_name': metric['package_name'],
708
- 'config': metric['config'],
779
+ for metric in manifest_dict["metrics"].values():
780
+ nodes[metric["unique_id"]] = {
781
+ "id": metric["unique_id"],
782
+ "name": metric["name"],
783
+ "resource_type": metric["resource_type"],
784
+ "package_name": metric["package_name"],
785
+ "config": metric["config"],
709
786
  }
710
787
 
711
- if 'semantic_models' in manifest_dict:
712
- for semantic_models in manifest_dict['semantic_models'].values():
713
- nodes[semantic_models['unique_id']] = {
714
- 'id': semantic_models['unique_id'],
715
- 'name': semantic_models['name'],
716
- 'resource_type': semantic_models['resource_type'],
717
- 'package_name': semantic_models['package_name'],
718
- 'config': semantic_models['config'],
788
+ if "semantic_models" in manifest_dict:
789
+ for semantic_models in manifest_dict["semantic_models"].values():
790
+ nodes[semantic_models["unique_id"]] = {
791
+ "id": semantic_models["unique_id"],
792
+ "name": semantic_models["name"],
793
+ "resource_type": semantic_models["resource_type"],
794
+ "package_name": semantic_models["package_name"],
795
+ "config": semantic_models["config"],
719
796
  }
720
797
 
721
798
  parent_map = self.build_parent_map(nodes, base)
722
799
 
723
800
  if base is False:
724
- cll_tracker.end_lineage()
725
- cll_tracker.set_total_nodes(len(nodes))
726
- log_performance('model lineage', cll_tracker.to_dict())
727
- cll_tracker.reset()
801
+ perf_tracker.end_lineage()
802
+ perf_tracker.set_total_nodes(len(nodes))
803
+ log_performance("model lineage", perf_tracker.to_dict())
804
+ perf_tracker.reset()
728
805
 
729
806
  return dict(
730
807
  parent_map=parent_map,
@@ -737,18 +814,43 @@ class DbtAdapter(BaseAdapter):
737
814
  def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
738
815
  base = self.get_lineage(base=True)
739
816
  current = self.get_lineage(base=False)
740
- keys = {
741
- *base.get('nodes', {}).keys(),
742
- *current.get('nodes', {}).keys()
743
- }
744
817
 
745
- # Start to diff
746
- perf_tracking = BreakingPerformanceTracking()
747
- perf_tracking.start_lineage_diff()
818
+ modified_nodes = self.select_nodes(select="state:modified")
819
+ diff = {}
820
+ for node_id in modified_nodes:
821
+ base_node = base.get("nodes", {}).get(node_id)
822
+ curr_node = current.get("nodes", {}).get(node_id)
823
+ if base_node and curr_node:
824
+ diff[node_id] = NodeDiff(change_status="modified")
825
+ elif base_node:
826
+ diff[node_id] = NodeDiff(change_status="removed")
827
+ elif curr_node:
828
+ diff[node_id] = NodeDiff(change_status="added")
829
+
830
+ return LineageDiff(
831
+ base=base,
832
+ current=current,
833
+ diff=diff,
834
+ )
835
+
836
+ @lru_cache(maxsize=128)
837
+ def get_change_analysis_cached(self, node_id: str):
838
+ breaking_perf_tracker = BreakingPerformanceTracking()
839
+ lineage_diff = self.get_lineage_diff()
840
+ diff = lineage_diff.diff
841
+
842
+ if node_id not in diff or diff[node_id].change_status != "modified":
843
+ return diff.get(node_id)
844
+
845
+ breaking_perf_tracker.increment_modified_nodes()
846
+ breaking_perf_tracker.start_lineage_diff()
847
+
848
+ base = lineage_diff.base
849
+ current = lineage_diff.current
748
850
 
749
851
  base_manifest = as_manifest(self.get_manifest(True))
750
852
  curr_manifest = as_manifest(self.get_manifest(False))
751
- perf_tracking.record_checkpoint('manifest')
853
+ breaking_perf_tracker.record_checkpoint("manifest")
752
854
 
753
855
  def ref_func(*args):
754
856
  if len(args) == 1:
@@ -760,7 +862,7 @@ class DbtAdapter(BaseAdapter):
760
862
  return node
761
863
 
762
864
  def source_func(source_name, table_name):
763
- source_name = source_name.replace('-', '_')
865
+ source_name = source_name.replace("-", "_")
764
866
  return f"__{source_name}__{table_name}"
765
867
 
766
868
  jinja_context = dict(
@@ -768,284 +870,534 @@ class DbtAdapter(BaseAdapter):
768
870
  source=source_func,
769
871
  )
770
872
 
771
- # for each node, compare the base and current lineage
772
- diff = {}
773
- for key in keys:
774
- base_node = base.get('nodes', {}).get(key)
775
- curr_node = current.get('nodes', {}).get(key)
776
- if base_node and curr_node:
777
- base_checksum = base_node.get('checksum', {}).get('checksum')
778
- curr_checksum = curr_node.get('checksum', {}).get('checksum')
779
- if base_checksum is None or curr_checksum is None or base_checksum == curr_checksum:
780
- continue
781
-
782
- if curr_node.get('resource_type') == 'model':
783
- try:
784
- perf_tracking.increment_modified_nodes()
785
-
786
- def _get_schema(lineage):
787
- schema = {}
788
- nodes = lineage['nodes']
789
- parent_list = lineage['parent_map'].get(key, [])
790
- for parent_id in parent_list:
791
- parent_node = nodes.get(parent_id)
792
- if parent_node is None:
793
- continue
794
- columns = parent_node.get('columns') or {}
795
- name = parent_node.get('name')
796
- if parent_node.get('resource_type') == 'source':
797
- parts = parent_id.split('.')
798
- source = parts[2]
799
- table = parts[3]
800
- source = source.replace('-', '_')
801
- name = f"__{source}__{table}"
802
- schema[name] = {
803
- name: column.get('type') for name, column in columns.items()
804
- }
805
- return schema
806
-
807
- base_sql = self.generate_sql(
808
- base_node.get('raw_code'),
809
- context=jinja_context,
810
- provided_manifest=base_manifest
811
- )
812
- curr_sql = self.generate_sql(
813
- curr_node.get('raw_code'),
814
- context=jinja_context,
815
- provided_manifest=curr_manifest
816
- )
817
- base_schema = _get_schema(base)
818
- curr_schema = _get_schema(current)
819
- dialect = self.adapter.connections.TYPE
820
-
821
- change = parse_change_category(
822
- base_sql,
823
- curr_sql,
824
- old_schema=base_schema,
825
- new_schema=curr_schema,
826
- dialect=dialect,
827
- perf_tracking=perf_tracking,
828
- )
829
- except Exception:
830
- change = NodeChange(category='unknown')
831
-
832
- diff[key] = NodeDiff(change_status='modified', change=change)
833
- elif base_node:
834
- diff[key] = NodeDiff(change_status='removed')
835
- elif curr_node:
836
- diff[key] = NodeDiff(change_status='added')
837
-
838
- perf_tracking.end_lineage_diff()
839
- log_performance('model lineage diff', perf_tracking.to_dict())
873
+ base_node = base.get("nodes", {}).get(node_id)
874
+ curr_node = current.get("nodes", {}).get(node_id)
875
+ change = NodeChange(category="unknown")
876
+ if (
877
+ curr_node.get("resource_type") in ["model", "snapshot"]
878
+ and curr_node.get("raw_code") is not None
879
+ and base_node.get("raw_code") is not None
880
+ ):
881
+ try:
882
+
883
+ def _get_schema(lineage):
884
+ schema = {}
885
+ nodes = lineage["nodes"]
886
+ parent_list = lineage["parent_map"].get(node_id, [])
887
+ for parent_id in parent_list:
888
+ parent_node = nodes.get(parent_id)
889
+ if parent_node is None:
890
+ continue
891
+ columns = parent_node.get("columns") or {}
892
+ name = parent_node.get("name")
893
+ if parent_node.get("resource_type") == "source":
894
+ parts = parent_id.split(".")
895
+ source = parts[2]
896
+ table = parts[3]
897
+ source = source.replace("-", "_")
898
+ name = f"__{source}__{table}"
899
+ schema[name] = {name: column.get("type") for name, column in columns.items()}
900
+ return schema
901
+
902
+ base_sql = self.generate_sql(
903
+ base_node.get("raw_code"),
904
+ context=jinja_context,
905
+ provided_manifest=base_manifest,
906
+ )
907
+ curr_sql = self.generate_sql(
908
+ curr_node.get("raw_code"),
909
+ context=jinja_context,
910
+ provided_manifest=curr_manifest,
911
+ )
912
+ base_schema = _get_schema(base)
913
+ curr_schema = _get_schema(current)
914
+ dialect = self.adapter.connections.TYPE
915
+ if curr_manifest.metadata.adapter_type is not None:
916
+ dialect = curr_manifest.metadata.adapter_type
917
+
918
+ change = parse_change_category(
919
+ base_sql,
920
+ curr_sql,
921
+ old_schema=base_schema,
922
+ new_schema=curr_schema,
923
+ dialect=dialect,
924
+ perf_tracking=breaking_perf_tracker,
925
+ )
840
926
 
841
- return LineageDiff(
842
- base=base,
843
- current=current,
844
- diff=diff,
927
+ # Make sure that the case of the column names are the same
928
+ changed_columns = {
929
+ column.lower(): change_status for column, change_status in (change.columns or {}).items()
930
+ }
931
+ changed_columns_names = set(changed_columns)
932
+ changed_columns_final = {}
933
+
934
+ base_columns = base_node.get("columns") or {}
935
+ curr_columns = curr_node.get("columns") or {}
936
+ columns_names = set(base_columns) | set(curr_columns)
937
+
938
+ for column_name in columns_names:
939
+ if column_name.lower() in changed_columns_names:
940
+ changed_columns_final[column_name] = changed_columns[column_name.lower()]
941
+
942
+ change.columns = changed_columns_final
943
+ except Exception:
944
+ # TODO: telemetry
945
+ pass
946
+
947
+ breaking_perf_tracker.end_lineage_diff()
948
+ log_performance("change analysis per node", breaking_perf_tracker.to_dict())
949
+ breaking_perf_tracker.reset()
950
+ node_diff = diff.get(node_id)
951
+ node_diff.change = change
952
+ return node_diff
953
+
954
+ def get_cll(
955
+ self,
956
+ node_id: Optional[str] = None,
957
+ column: Optional[str] = None,
958
+ change_analysis: Optional[bool] = False,
959
+ no_cll: Optional[bool] = False,
960
+ no_upstream: Optional[bool] = False,
961
+ no_downstream: Optional[bool] = False,
962
+ no_filter: Optional[bool] = False,
963
+ ) -> CllData:
964
+ cll_tracker = LineagePerfTracker()
965
+ cll_tracker.set_params(
966
+ has_node=node_id is not None,
967
+ has_column=column is not None,
968
+ change_analysis=change_analysis,
969
+ no_cll=no_cll,
970
+ no_upstream=no_upstream,
971
+ no_downstream=no_downstream,
845
972
  )
846
-
847
- def get_cll_by_node_id(self, node_id: str, base: Optional[bool] = False):
848
- cll_tracker = CLLPerformanceTracking()
849
973
  cll_tracker.start_column_lineage()
850
974
 
851
- manifest = self.curr_manifest if base is False else self.base_manifest
975
+ manifest = self.curr_manifest
852
976
  manifest_dict = manifest.to_dict()
853
977
 
854
- parent_ids = find_upstream(node_id, manifest_dict.get('parent_map'))
855
- child_ids = find_downstream(node_id, manifest_dict.get('child_map'))
856
- cll_node_ids = parent_ids.union(child_ids)
857
- cll_node_ids.add(node_id)
978
+ # Find related model nodes
979
+ if node_id is not None:
980
+ cll_node_ids = {node_id}
981
+ else:
982
+ lineage_diff = self.get_lineage_diff()
983
+ cll_node_ids = set(lineage_diff.diff.keys())
984
+
985
+ cll_tracker.set_init_nodes(len(cll_node_ids))
858
986
 
859
- node_manifest = self.get_lineage_nodes_metadata(base=base)
860
987
  nodes = {}
861
- for node_id in cll_node_ids:
862
- if node_id not in node_manifest:
863
- continue
864
- nodes[node_id] = self.get_cll_cached(node_id, base=base)
988
+ columns = {}
989
+ parent_map = {}
990
+ child_map = {}
991
+
992
+ if not no_upstream:
993
+ cll_node_ids = cll_node_ids.union(find_upstream(cll_node_ids, manifest_dict.get("parent_map")))
994
+ if not no_downstream:
995
+ cll_node_ids = cll_node_ids.union(find_downstream(cll_node_ids, manifest_dict.get("child_map")))
996
+
997
+ if not no_cll:
998
+ allowed_related_nodes = set()
999
+ for key in ["sources", "nodes", "exposures", "metrics"]:
1000
+ attr = getattr(manifest, key)
1001
+ allowed_related_nodes.update(set(attr.keys()))
1002
+ if hasattr(manifest, "semantic_models"):
1003
+ attr = getattr(manifest, "semantic_models")
1004
+ allowed_related_nodes.update(set(attr.keys()))
1005
+ for cll_node_id in cll_node_ids:
1006
+ if cll_node_id not in allowed_related_nodes:
1007
+ continue
1008
+ cll_data_one = deepcopy(self.get_cll_cached(cll_node_id, base=False))
1009
+ cll_tracker.increment_cll_nodes()
1010
+ if cll_data_one is None:
1011
+ continue
1012
+
1013
+ nodes[cll_node_id] = cll_data_one.nodes.get(cll_node_id)
1014
+ node_diff = None
1015
+ if change_analysis:
1016
+ node_diff = self.get_change_analysis_cached(cll_node_id)
1017
+ cll_tracker.increment_change_analysis_nodes()
1018
+ if node_diff is not None:
1019
+ nodes[cll_node_id].change_status = node_diff.change_status
1020
+ if node_diff.change is not None:
1021
+ nodes[cll_node_id].change_category = node_diff.change.category
1022
+ for c_id, c in cll_data_one.columns.items():
1023
+ columns[c_id] = c
1024
+ if node_diff is not None:
1025
+ if node_diff.change_status == "added":
1026
+ c.change_status = "added"
1027
+ elif node_diff.change_status == "removed":
1028
+ c.change_status = "removed"
1029
+ elif node_diff.change is not None and node_diff.change.columns is not None:
1030
+ column_diff = node_diff.change.columns.get(c.name)
1031
+ if column_diff:
1032
+ c.change_status = column_diff
1033
+
1034
+ for p_id, parents in cll_data_one.parent_map.items():
1035
+ parent_map[p_id] = parents
1036
+ else:
1037
+ for cll_node_id in cll_node_ids:
1038
+ cll_node = None
1039
+ cll_node_columns: Dict[str, CllColumn] = {}
1040
+
1041
+ if cll_node_id in manifest.sources:
1042
+ cll_node = CllNode.build_cll_node(manifest, "sources", cll_node_id)
1043
+ if self.curr_catalog and cll_node_id in self.curr_catalog.sources:
1044
+ cll_node_columns = {
1045
+ column.name: CllColumn(
1046
+ id=f"{cll_node_id}_{column.name}",
1047
+ table_id=cll_node_id,
1048
+ name=column.name,
1049
+ type=column.type,
1050
+ )
1051
+ for column in self.curr_catalog.sources[cll_node_id].columns.values()
1052
+ }
1053
+ elif cll_node_id in manifest.nodes:
1054
+ cll_node = CllNode.build_cll_node(manifest, "nodes", cll_node_id)
1055
+ if self.curr_catalog and cll_node_id in self.curr_catalog.nodes:
1056
+ cll_node_columns = {
1057
+ column.name: CllColumn(
1058
+ id=f"{cll_node_id}_{column.name}",
1059
+ table_id=cll_node_id,
1060
+ name=column.name,
1061
+ type=column.type,
1062
+ )
1063
+ for column in self.curr_catalog.nodes[cll_node_id].columns.values()
1064
+ }
1065
+ elif cll_node_id in manifest.exposures:
1066
+ cll_node = CllNode.build_cll_node(manifest, "exposures", cll_node_id)
1067
+ elif hasattr(manifest, "semantic_models") and cll_node_id in manifest.semantic_models:
1068
+ cll_node = CllNode.build_cll_node(manifest, "semantic_models", cll_node_id)
1069
+ elif cll_node_id in manifest.metrics:
1070
+ cll_node = CllNode.build_cll_node(manifest, "metrics", cll_node_id)
1071
+
1072
+ if not cll_node:
1073
+ continue
1074
+ nodes[cll_node_id] = cll_node
1075
+
1076
+ node_diff = None
1077
+ if change_analysis:
1078
+ node_diff = self.get_change_analysis_cached(cll_node_id)
1079
+ cll_tracker.increment_change_analysis_nodes()
1080
+ if node_diff is not None:
1081
+ cll_node.change_status = node_diff.change_status
1082
+ if node_diff.change is not None:
1083
+ cll_node.change_category = node_diff.change.category
1084
+ for c, cll_column in cll_node_columns.items():
1085
+ cll_node.columns[c] = cll_column
1086
+ columns[cll_column.id] = cll_column
1087
+ if node_diff.change.columns and c in node_diff.change.columns:
1088
+ cll_column.change_status = node_diff.change.columns[c]
1089
+
1090
+ parent_map[cll_node_id] = manifest.parent_map.get(cll_node_id, [])
1091
+
1092
+ # build the child map
1093
+ for parent_id, parents in parent_map.items():
1094
+ for parent in parents:
1095
+ if parent not in child_map:
1096
+ child_map[parent] = set()
1097
+ child_map[parent].add(parent_id)
1098
+
1099
+ # Find the anchor nodes
1100
+ anchor_node_ids = set()
1101
+ extra_node_ids = set()
1102
+ if node_id is None and column is None:
1103
+ if change_analysis:
1104
+ # If change analysis is requested, we need to find the nodes that have changes
1105
+ lineage_diff = self.get_lineage_diff()
1106
+ for nid, nd in lineage_diff.diff.items():
1107
+ if nd.change_status == "added":
1108
+ anchor_node_ids.add(nid)
1109
+ n = lineage_diff.current["nodes"].get(nid)
1110
+ n_columns = n.get("columns", {})
1111
+ for c in n_columns:
1112
+ anchor_node_ids.add(build_column_key(nid, c))
1113
+ continue
1114
+ if nd.change_status == "removed":
1115
+ extra_node_ids.add(nid)
1116
+ continue
1117
+
1118
+ node_diff = self.get_change_analysis_cached(nid)
1119
+ if node_diff is not None and node_diff.change is not None:
1120
+ extra_node_ids.add(nid)
1121
+ if no_cll:
1122
+ if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
1123
+ anchor_node_ids.add(nid)
1124
+ else:
1125
+ if node_diff.change.category in ["breaking", "unknown"]:
1126
+ anchor_node_ids.add(nid)
1127
+ if node_diff.change.columns is not None:
1128
+ for column_name in node_diff.change.columns:
1129
+ anchor_node_ids.add(f"{nid}_{column_name}")
1130
+ else:
1131
+ lineage_diff = self.get_lineage_diff()
1132
+ anchor_node_ids = lineage_diff.diff.keys()
1133
+ elif node_id is not None and column is None:
1134
+ if change_analysis:
1135
+ # If change analysis is requested, we need to find the nodes that have changes
1136
+ node_diff = self.get_change_analysis_cached(node_id)
1137
+ if node_diff is not None and node_diff.change is not None:
1138
+ extra_node_ids.add(node_id)
1139
+ if no_cll:
1140
+ if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
1141
+ anchor_node_ids.add(node_id)
1142
+ else:
1143
+ if node_diff.change.category in ["breaking", "unknown"]:
1144
+ anchor_node_ids.add(node_id)
1145
+ if node_diff.change.columns is not None:
1146
+ for column_name in node_diff.change.columns:
1147
+ anchor_node_ids.add(f"{node_id}_{column_name}")
1148
+ else:
1149
+ anchor_node_ids.add(node_id)
1150
+ else:
1151
+ anchor_node_ids.add(node_id)
1152
+ if not no_cll:
1153
+ node = nodes.get(node_id)
1154
+ if node:
1155
+ for column_name in node.columns:
1156
+ column_key = build_column_key(node_id, column_name)
1157
+ anchor_node_ids.add(column_key)
1158
+ else:
1159
+ anchor_node_ids.add(f"{node_id}_{column}")
1160
+
1161
+ cll_tracker.set_anchor_nodes(len(anchor_node_ids))
1162
+ result_node_ids = set(anchor_node_ids)
1163
+ if not no_upstream:
1164
+ result_node_ids = result_node_ids.union(find_upstream(anchor_node_ids, parent_map))
1165
+ if not no_downstream:
1166
+ result_node_ids = result_node_ids.union(find_downstream(anchor_node_ids, child_map))
1167
+
1168
+ # Filter the nodes and columns based on the anchor nodes
1169
+ if not no_filter:
1170
+ nodes = {k: v for k, v in nodes.items() if k in result_node_ids or k in extra_node_ids}
1171
+ columns = {k: v for k, v in columns.items() if k in result_node_ids or k in extra_node_ids}
1172
+
1173
+ for node in nodes.values():
1174
+ node.columns = {
1175
+ k: v for k, v in node.columns.items() if v.id in result_node_ids or v.id in extra_node_ids
1176
+ }
1177
+
1178
+ if change_analysis:
1179
+ node.impacted = node.id in result_node_ids
1180
+
1181
+ parent_map, child_map = filter_dependency_maps(parent_map, child_map, result_node_ids)
865
1182
 
866
1183
  cll_tracker.end_column_lineage()
867
- cll_tracker.set_total_nodes(len(nodes))
868
- log_performance('column level lineage', cll_tracker.to_dict())
1184
+ cll_tracker.set_total_nodes(len(nodes) + len(columns))
1185
+ log_performance("column level lineage", cll_tracker.to_dict())
869
1186
  cll_tracker.reset()
870
1187
 
871
- return dict(nodes=nodes)
1188
+ return CllData(
1189
+ nodes=nodes,
1190
+ columns=columns,
1191
+ parent_map=parent_map,
1192
+ child_map=child_map,
1193
+ )
872
1194
 
873
1195
  @lru_cache(maxsize=128)
874
- def get_cll_cached(self, node_id: str, base: Optional[bool] = False):
875
- nodes = self.get_lineage_nodes_metadata(base=base)
1196
+ def get_cll_cached(self, node_id: str, base: Optional[bool] = False) -> Optional[CllData]:
1197
+ cll_tracker = CLLPerformanceTracking()
876
1198
 
877
- manifest = self.curr_manifest if base is False else self.base_manifest
878
- manifest_dict = manifest.to_dict()
879
- parent_list = []
880
- if node_id in manifest_dict['parent_map']:
881
- parent_list = manifest_dict['parent_map'][node_id]
882
-
883
- node = deepcopy(nodes[node_id])
884
- self.append_column_lineage(node, parent_list, base)
885
- return node
886
-
887
- def append_column_lineage(self, node: Dict, parent_list: List, base: Optional[bool] = False):
888
- def _apply_all_columns(node, trans_type, depends_on):
889
- for col in node.get('columns', {}).values():
890
- col['transformation_type'] = trans_type
891
- col['depends_on'] = depends_on
892
-
893
- def _depend_node_to_id(column_lineage, nodes):
894
- for cl in column_lineage.values():
895
- for depend_on in cl.depends_on:
896
- if depend_on.node.startswith('__'):
897
- for n in nodes.values():
898
- if n.get('resource_type') != 'source':
899
- continue
900
- # __source__table -> source.table
901
- source_table = depend_on.node.lstrip("_").replace("__", ".", 1).lower()
902
- if source_table in n.get('id'):
903
- depend_on.node = n.get('id')
904
- break
905
- else:
906
- for n in nodes.values():
907
- if n.get('name') == depend_on.node.lower():
908
- depend_on.node = n.get('id')
909
- break
1199
+ node, parent_list = self.get_cll_node(node_id, base=base)
1200
+ if node is None:
1201
+ return None
1202
+
1203
+ cll_tracker.set_total_nodes(1)
1204
+ cll_tracker.start_column_lineage()
1205
+
1206
+ def _apply_all_columns(node: CllNode, transformation_type):
1207
+ cll_data = CllData()
1208
+ cll_data.nodes[node.id] = node
1209
+ cll_data.parent_map[node.id] = set(parent_list)
1210
+ for col in node.columns.values():
1211
+ column_id = f"{node.id}_{col.name}"
1212
+ col.transformation_type = transformation_type
1213
+ cll_data.columns[column_id] = col
1214
+ cll_data.parent_map[column_id] = set()
1215
+ return cll_data
910
1216
 
911
- cll_tracker = CLLPerformanceTracking()
912
- nodes = self.get_lineage_nodes_metadata(base=base)
913
1217
  manifest = as_manifest(self.get_manifest(base))
914
- resource_type = node.get('resource_type')
915
- if resource_type not in {'model', 'seed', 'source', 'snapshot'}:
916
- return
1218
+ catalog = self.curr_catalog if base is False else self.base_catalog
1219
+ resource_type = node.resource_type
1220
+ if resource_type not in {"model", "seed", "source", "snapshot"}:
1221
+ return _apply_all_columns(node, "unknown")
917
1222
 
918
- if resource_type == 'source' or resource_type == 'seed':
919
- _apply_all_columns(node, 'source', [])
920
- return
1223
+ if resource_type == "source" or resource_type == "seed":
1224
+ return _apply_all_columns(node, "source")
921
1225
 
922
- if node.get('raw_code') is None or self.is_python_model(node.get('id'), base=base):
923
- _apply_all_columns(node, 'unknown', [])
924
- return
1226
+ if node.raw_code is None or self.is_python_model(node.id, base=base):
1227
+ return _apply_all_columns(node, "unknown")
925
1228
 
926
- # dbt <= 1.8, MetricFlow expects the time spine table to be named metricflow_time_spine
927
- if node.get('name') == 'metricflow_time_spine':
928
- _apply_all_columns(node, 'source', [])
929
- return
1229
+ if node.name == "metricflow_time_spine":
1230
+ return _apply_all_columns(node, "source")
930
1231
 
931
- if not node.get('columns', {}):
932
- # no catalog
933
- return
1232
+ if not node.columns:
1233
+ return _apply_all_columns(node, "unknown")
1234
+
1235
+ table_id_map = {}
934
1236
 
935
1237
  def ref_func(*args):
1238
+ node_name: str = None
1239
+ project_or_package: str = None
1240
+
936
1241
  if len(args) == 1:
937
- node = args[0]
938
- elif len(args) > 1:
939
- node = args[1]
1242
+ node_name = args[0]
940
1243
  else:
941
- return None
942
- return node
1244
+ project_or_package = args[0]
1245
+ node_name = args[1]
943
1246
 
944
- def source_func(source_name, table_name):
945
- return f"__{source_name}__{table_name}"
1247
+ for key, n in manifest.nodes.items():
1248
+ if n.name != node_name:
1249
+ continue
1250
+ if project_or_package is not None and n.package_name != project_or_package:
1251
+ continue
1252
+
1253
+ # replace id "." to "_"
1254
+ unique_id = n.unique_id
1255
+ table_name = unique_id.replace(".", "_")
1256
+ table_id_map[table_name.lower()] = unique_id
1257
+ return table_name
1258
+
1259
+ raise ValueError(f"Cannot find node {node_name} in the manifest")
1260
+
1261
+ def source_func(source_name, name):
1262
+ for key, n in manifest.sources.items():
1263
+ if n.source_name != source_name:
1264
+ continue
1265
+ if n.name != name:
1266
+ continue
1267
+
1268
+ # replace id "." to "_"
1269
+ unique_id = n.unique_id
1270
+ table_name = unique_id.replace(".", "_")
1271
+ table_id_map[table_name.lower()] = unique_id
1272
+ return table_name
946
1273
 
947
- raw_code = node.get('raw_code')
1274
+ raise ValueError(f"Cannot find source {source_name}.{name} in the manifest")
1275
+
1276
+ raw_code = node.raw_code
948
1277
  jinja_context = dict(
949
1278
  ref=ref_func,
950
1279
  source=source_func,
951
1280
  )
952
1281
 
953
1282
  schema = {}
954
- for parent_id in parent_list:
955
- parent_node = nodes.get(parent_id)
956
- if parent_node is None:
957
- continue
958
- columns = parent_node.get('columns') or {}
959
- name = parent_node.get('name')
960
- if parent_node.get('resource_type') == 'source':
961
- parts = parent_id.split('.')
962
- source = parts[2]
963
- table = parts[3]
964
- name = f"__{source}__{table}"
965
- schema[name] = {
966
- name: column.get('type') for name, column in columns.items()
967
- }
1283
+ if catalog is not None:
1284
+ for parent_id in parent_list:
1285
+ table_name = parent_id.replace(".", "_")
1286
+ columns = {}
1287
+ if parent_id in catalog.nodes:
1288
+ for col_name, col_metadata in catalog.nodes[parent_id].columns.items():
1289
+ columns[col_name] = col_metadata.type
1290
+ if parent_id in catalog.sources:
1291
+ for col_name, col_metadata in catalog.sources[parent_id].columns.items():
1292
+ columns[col_name] = col_metadata.type
1293
+ schema[table_name] = columns
968
1294
 
969
1295
  try:
970
- # provide a manifest to speedup and not pollute the manifest
971
1296
  compiled_sql = self.generate_sql(raw_code, base=base, context=jinja_context, provided_manifest=manifest)
972
1297
  dialect = self.adapter.type()
973
- # find adapter type from the manifest, otherwise we use the adapter type from the adapter
974
1298
  if self.get_manifest(base).metadata.adapter_type is not None:
975
1299
  dialect = self.get_manifest(base).metadata.adapter_type
976
- column_lineage = cll(compiled_sql, schema=schema, dialect=dialect)
1300
+ m2c, c2c_map = cll(compiled_sql, schema=schema, dialect=dialect)
977
1301
  except RecceException:
978
- # TODO: provide parsing error message if needed
979
- _apply_all_columns(node, 'unknown', [])
980
1302
  cll_tracker.increment_sqlglot_error_nodes()
981
- return
1303
+ return _apply_all_columns(node, "unknown")
982
1304
  except Exception:
983
- _apply_all_columns(node, 'unknown', [])
984
1305
  cll_tracker.increment_other_error_nodes()
985
- return
986
-
987
- _depend_node_to_id(column_lineage, nodes)
1306
+ return _apply_all_columns(node, "unknown")
1307
+
1308
+ # Add cll dependency to the node.
1309
+ cll_data = CllData()
1310
+ cll_data.nodes[node.id] = node
1311
+ cll_data.columns = {f"{node.id}_{col.name}": col for col in node.columns.values()}
1312
+
1313
+ # parent map for node
1314
+ depends_on = set(parent_list)
1315
+ for d in m2c:
1316
+ parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
1317
+ depends_on.add(parent_key)
1318
+ cll_data.parent_map[node_id] = depends_on
1319
+
1320
+ # parent map for columns
1321
+ for name, column in node.columns.items():
1322
+ depends_on = set()
1323
+ column_id = f"{node.id}_{name}"
1324
+ if name in c2c_map:
1325
+ for d in c2c_map[name].depends_on:
1326
+ parent_key = f"{table_id_map[d.node.lower()]}_{d.column}"
1327
+ depends_on.add(parent_key)
1328
+ column.transformation_type = c2c_map[name].transformation_type
1329
+ cll_data.parent_map[column_id] = set(depends_on)
988
1330
 
989
- for name, column in node.get('columns', {}).items():
990
- if name in column_lineage:
991
- column['depends_on'] = column_lineage[name].depends_on
992
- column['transformation_type'] = column_lineage[name].type
1331
+ cll_tracker.end_column_lineage()
1332
+ log_performance("column level lineage per node", cll_tracker.to_dict())
1333
+ cll_tracker.reset()
1334
+ return cll_data
993
1335
 
994
- @lru_cache(maxsize=2)
995
- def get_lineage_nodes_metadata(self, base: Optional[bool] = False):
1336
+ def get_cll_node(self, node_id: str, base: Optional[bool] = False) -> Tuple[Optional[CllNode], list[str]]:
996
1337
  manifest = self.curr_manifest if base is False else self.base_manifest
997
1338
  catalog = self.curr_catalog if base is False else self.base_catalog
998
- manifest_dict = manifest.to_dict()
1339
+ parent_list = []
1340
+ node = None
999
1341
 
1000
- nodes = {}
1001
- for node in manifest_dict['nodes'].values():
1002
- unique_id = node['unique_id']
1003
- resource_type = node['resource_type']
1342
+ # model, seed, snapshot
1343
+ if node_id in manifest.nodes:
1344
+ found = manifest.nodes[node_id]
1345
+ unique_id = found.unique_id
1346
+ node = CllNode.build_cll_node(manifest, "nodes", node_id)
1347
+ if hasattr(found.depends_on, "nodes"):
1348
+ parent_list = found.depends_on.nodes
1004
1349
 
1005
- if resource_type not in ['model', 'seed', 'exposure', 'snapshot']:
1006
- continue
1350
+ if catalog is not None and node is not None and unique_id in catalog.nodes:
1351
+ columns = {}
1352
+ for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
1353
+ column_id = f"{unique_id}_{col_name}"
1354
+ col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
1355
+ columns[col_name] = col
1356
+ node.columns = columns
1007
1357
 
1008
- nodes[unique_id] = {
1009
- 'id': node['unique_id'],
1010
- 'name': node['name'],
1011
- 'resource_type': node['resource_type'],
1012
- 'raw_code': node['raw_code'],
1013
- }
1358
+ # source
1359
+ if node_id in manifest.sources:
1360
+ found = manifest.sources[node_id]
1361
+ unique_id = found.unique_id
1362
+ node = CllNode.build_cll_node(manifest, "sources", node_id)
1363
+ parent_list = []
1014
1364
 
1015
- if catalog is not None and unique_id in catalog.nodes:
1365
+ if catalog is not None and node is not None and unique_id in catalog.sources:
1016
1366
  columns = {}
1017
- for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
1018
- col = dict(name=col_name, type=col_metadata.type)
1367
+ for col_name, col_metadata in catalog.sources[unique_id].columns.items():
1368
+ column_id = f"{unique_id}_{col_name}"
1369
+ col = CllColumn(id=column_id, name=col_name, table_id=unique_id, type=col_metadata.type)
1019
1370
  columns[col_name] = col
1020
- nodes[unique_id]['columns'] = columns
1371
+ node.columns = columns
1021
1372
 
1022
- for source in manifest_dict['sources'].values():
1023
- unique_id = source['unique_id']
1373
+ # exposure
1374
+ if node_id in manifest.exposures:
1375
+ found = manifest.exposures[node_id]
1376
+ node = CllNode.build_cll_node(manifest, "exposures", node_id)
1377
+ if hasattr(found.depends_on, "nodes"):
1378
+ parent_list = found.depends_on.nodes
1024
1379
 
1025
- nodes[unique_id] = {
1026
- 'id': source['unique_id'],
1027
- 'name': source['name'],
1028
- 'resource_type': source['resource_type'],
1029
- }
1380
+ if hasattr(manifest, "semantic_models") and node_id in manifest.semantic_models:
1381
+ found = manifest.semantic_models[node_id]
1382
+ node = CllNode.build_cll_node(manifest, "semantic_models", node_id)
1383
+ if hasattr(found.depends_on, "nodes"):
1384
+ parent_list = found.depends_on.nodes
1030
1385
 
1031
- if catalog is not None and unique_id in catalog.sources:
1032
- nodes[unique_id]['columns'] = {
1033
- col_name: {
1034
- 'name': col_name,
1035
- 'type': col_metadata.type
1036
- }
1037
- for col_name, col_metadata in catalog.sources[unique_id].columns.items()
1038
- }
1386
+ if node_id in manifest.metrics:
1387
+ found = manifest.metrics[node_id]
1388
+ node = CllNode.build_cll_node(manifest, "metrics", node_id)
1389
+ if hasattr(found.depends_on, "nodes"):
1390
+ parent_list = found.depends_on.nodes
1039
1391
 
1040
- return nodes
1392
+ return node, parent_list
1041
1393
 
1042
1394
  def get_manifests_by_id(self, unique_id: str):
1043
1395
  curr_manifest = self.get_manifest(base=False)
1044
1396
  base_manifest = self.get_manifest(base=True)
1045
1397
  if unique_id in curr_manifest.nodes.keys() or unique_id in base_manifest.nodes.keys():
1046
1398
  return {
1047
- 'current': curr_manifest.nodes.get(unique_id),
1048
- 'base': base_manifest.nodes.get(unique_id)
1399
+ "current": curr_manifest.nodes.get(unique_id),
1400
+ "base": base_manifest.nodes.get(unique_id),
1049
1401
  }
1050
1402
  return None
1051
1403
 
@@ -1068,39 +1420,40 @@ class DbtAdapter(BaseAdapter):
1068
1420
  if self.base_path:
1069
1421
  self.artifacts_observer.schedule(event_handler, self.base_path, recursive=False)
1070
1422
  self.artifacts_observer.start()
1071
- logger.info('Start monitoring dbt artifacts')
1423
+ logger.info("Start monitoring dbt artifacts")
1072
1424
 
1073
1425
  def stop_monitor_artifacts(self):
1074
1426
  if self.artifacts_files:
1075
1427
  self.artifacts_observer.stop()
1076
1428
  self.artifacts_observer.join()
1077
- logger.info('Stop monitoring artifacts')
1429
+ logger.info("Stop monitoring artifacts")
1078
1430
 
1079
1431
  def start_monitor_base_env(self, callback: Callable = None):
1080
- target_base_dir = os.path.join(self.runtime_config.project_root, 'target-base')
1432
+ target_base_dir = os.path.join(self.runtime_config.project_root, "target-base")
1081
1433
  base_env_files = {
1082
- os.path.join(target_base_dir, 'manifest.json'),
1083
- os.path.join(target_base_dir, 'catalog.json'),
1434
+ os.path.join(target_base_dir, "manifest.json"),
1435
+ os.path.join(target_base_dir, "catalog.json"),
1084
1436
  }
1085
1437
  event_handler = EnvironmentEventHandler(self.base_env_observer, base_env_files, callback=callback)
1086
1438
  self.base_env_observer.schedule(event_handler, self.runtime_config.project_root, recursive=True)
1087
1439
  self.base_env_observer.start()
1088
- logger.info('Start monitoring base environment')
1440
+ logger.info("Start monitoring base environment")
1089
1441
 
1090
1442
  def stop_monitor_base_env(self):
1091
1443
  if self.base_env_observer.is_alive():
1092
1444
  self.base_env_observer.stop()
1093
1445
  self.base_env_observer.join()
1094
- logger.info('Stop monitoring base environment')
1095
-
1096
- def set_artifacts(self,
1097
- base_manifest: WritableManifest,
1098
- curr_manifest: WritableManifest,
1099
- manifest: Manifest,
1100
- previous_manifest: Manifest,
1101
- base_catalog: CatalogArtifact,
1102
- curr_catalog: CatalogArtifact,
1103
- ):
1446
+ logger.info("Stop monitoring base environment")
1447
+
1448
+ def set_artifacts(
1449
+ self,
1450
+ base_manifest: WritableManifest,
1451
+ curr_manifest: WritableManifest,
1452
+ manifest: Manifest,
1453
+ previous_manifest: Manifest,
1454
+ base_catalog: CatalogArtifact,
1455
+ curr_catalog: CatalogArtifact,
1456
+ ):
1104
1457
  self.curr_manifest = curr_manifest
1105
1458
  self.base_manifest = base_manifest
1106
1459
  self.manifest = manifest
@@ -1109,7 +1462,7 @@ class DbtAdapter(BaseAdapter):
1109
1462
  self.previous_state = previous_state(
1110
1463
  Path(self.base_path),
1111
1464
  Path(self.runtime_config.target_path),
1112
- Path(self.runtime_config.project_root)
1465
+ Path(self.runtime_config.project_root),
1113
1466
  )
1114
1467
  self.previous_state.manifest = previous_manifest
1115
1468
 
@@ -1131,19 +1484,22 @@ class DbtAdapter(BaseAdapter):
1131
1484
  # we capture the original manifest as base and only update the current
1132
1485
  target_type = os.path.basename(os.path.dirname(refresh_file_path))
1133
1486
  if self.target_path and target_type == os.path.basename(self.target_path):
1134
- if refresh_file_path.endswith('manifest.json'):
1487
+ if refresh_file_path.endswith("manifest.json"):
1135
1488
  self.curr_manifest = load_manifest(path=refresh_file_path)
1136
1489
  self.manifest = as_manifest(self.curr_manifest)
1137
1490
  self.get_cll_cached.cache_clear()
1138
- self.get_lineage_nodes_metadata.cache_clear()
1139
- elif refresh_file_path.endswith('catalog.json'):
1491
+ self.get_change_analysis_cached.cache_clear()
1492
+ elif refresh_file_path.endswith("catalog.json"):
1140
1493
  self.curr_catalog = load_catalog(path=refresh_file_path)
1141
- self.get_lineage_nodes_metadata.cache_clear()
1494
+ self.get_cll_cached.cache_clear()
1495
+ self.get_change_analysis_cached.cache_clear()
1142
1496
  elif self.base_path and target_type == os.path.basename(self.base_path):
1143
- if refresh_file_path.endswith('manifest.json'):
1497
+ if refresh_file_path.endswith("manifest.json"):
1144
1498
  self.base_manifest = load_manifest(path=refresh_file_path)
1145
- elif refresh_file_path.endswith('catalog.json'):
1499
+ self.get_change_analysis_cached.cache_clear()
1500
+ elif refresh_file_path.endswith("catalog.json"):
1146
1501
  self.base_catalog = load_catalog(path=refresh_file_path)
1502
+ self.get_change_analysis_cached.cache_clear()
1147
1503
 
1148
1504
  def create_relation(self, model, base=False):
1149
1505
  node = self.find_node_by_name(model, base)
@@ -1157,42 +1513,69 @@ class DbtAdapter(BaseAdapter):
1157
1513
  select: Optional[str] = None,
1158
1514
  exclude: Optional[str] = None,
1159
1515
  packages: Optional[list[str]] = None,
1160
- view_mode: Optional[Literal['all', 'changed_models']] = None,
1516
+ view_mode: Optional[Literal["all", "changed_models"]] = None,
1161
1517
  ) -> Set[str]:
1162
- from dbt.graph import NodeSelector
1163
- from dbt.compilation import Compiler
1164
- from dbt.graph import parse_difference, SelectionIntersection, SelectionUnion
1165
1518
  import dbt.compilation
1519
+ from dbt.compilation import Compiler
1520
+ from dbt.graph import (
1521
+ NodeSelector,
1522
+ SelectionIntersection,
1523
+ SelectionUnion,
1524
+ parse_difference,
1525
+ )
1166
1526
 
1167
1527
  select_list = [select] if select else None
1168
1528
  exclude_list = [exclude] if exclude else None
1169
1529
 
1170
1530
  def _parse_difference(include, exclude):
1171
- if dbt_version < 'v1.8':
1531
+ if dbt_version < "v1.8":
1172
1532
  return parse_difference(include, exclude, "eager")
1173
1533
  else:
1174
1534
  return parse_difference(include, exclude)
1175
1535
 
1176
1536
  specs = [_parse_difference(select_list, exclude_list)]
1177
1537
 
1538
+ # If packages is not provided, use the project name from manifest metadata as default
1539
+ if packages is None:
1540
+ if (
1541
+ self.manifest.metadata
1542
+ and hasattr(self.manifest.metadata, "project_name")
1543
+ and self.manifest.metadata.project_name
1544
+ ):
1545
+ packages = [self.manifest.metadata.project_name]
1546
+
1178
1547
  if packages is not None:
1179
- package_spec = SelectionUnion([_parse_difference([f'package:{p}'], None) for p in packages])
1548
+ package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
1180
1549
  specs.append(package_spec)
1181
- if view_mode and view_mode == 'changed_models':
1182
- specs.append(_parse_difference(['1+state:modified+'], None))
1550
+ if view_mode and view_mode == "changed_models":
1551
+ specs.append(_parse_difference(["1+state:modified+"], None))
1183
1552
  spec = SelectionIntersection(specs)
1184
1553
 
1185
1554
  manifest = Manifest()
1555
+ manifest.metadata.adapter_type = self.adapter.type()
1186
1556
  manifest_prev = self.previous_state.manifest
1187
1557
  manifest_curr = self.manifest
1188
1558
 
1189
- manifest.nodes = {**manifest_prev.nodes, **manifest_curr.nodes}
1559
+ manifest.nodes = {**manifest_curr.nodes}
1560
+ # # mark a node is removed if the node id is no in the curr nodes
1561
+ for node_id, node in manifest_prev.nodes.items():
1562
+ if node_id not in manifest.nodes:
1563
+ node_dict = node.to_dict()
1564
+ if "raw_code" in node_dict:
1565
+ node_dict["raw_code"] = "__removed__"
1566
+ node_class = type(node)
1567
+ removed_node = node_class.from_dict(node_dict)
1568
+ manifest.nodes[node_id] = removed_node
1569
+
1190
1570
  manifest.macros = {**manifest_prev.macros, **manifest_curr.macros}
1191
1571
  manifest.sources = {**manifest_prev.sources, **manifest_curr.sources}
1192
1572
  manifest.exposures = {**manifest_prev.exposures, **manifest_curr.exposures}
1193
1573
  manifest.metrics = {**manifest_prev.metrics, **manifest_curr.metrics}
1194
- if hasattr(manifest_prev, 'semantic_models'):
1195
- manifest.semantic_models = {**manifest_prev.semantic_models, **manifest_curr.semantic_models}
1574
+ if hasattr(manifest_prev, "semantic_models"):
1575
+ manifest.semantic_models = {
1576
+ **manifest_prev.semantic_models,
1577
+ **manifest_curr.semantic_models,
1578
+ }
1196
1579
 
1197
1580
  compiler = Compiler(self.runtime_config)
1198
1581
  # disable to print compile states
@@ -1207,28 +1590,28 @@ class DbtAdapter(BaseAdapter):
1207
1590
  return selector.get_selected(spec)
1208
1591
 
1209
1592
  def export_artifacts(self) -> ArtifactsRoot:
1210
- '''
1593
+ """
1211
1594
  Export the artifacts from the current state
1212
- '''
1595
+ """
1213
1596
  artifacts = ArtifactsRoot()
1214
1597
 
1215
1598
  def _load_artifact(artifact):
1216
1599
  return artifact.to_dict() if artifact else None
1217
1600
 
1218
1601
  artifacts.base = {
1219
- 'manifest': _load_artifact(self.base_manifest),
1220
- 'catalog': _load_artifact(self.base_catalog),
1602
+ "manifest": _load_artifact(self.base_manifest),
1603
+ "catalog": _load_artifact(self.base_catalog),
1221
1604
  }
1222
1605
  artifacts.current = {
1223
- 'manifest': _load_artifact(self.curr_manifest),
1224
- 'catalog': _load_artifact(self.curr_catalog),
1606
+ "manifest": _load_artifact(self.curr_manifest),
1607
+ "catalog": _load_artifact(self.curr_catalog),
1225
1608
  }
1226
1609
  return artifacts
1227
1610
 
1228
1611
  def export_artifacts_from_file(self) -> ArtifactsRoot:
1229
- '''
1612
+ """
1230
1613
  Export the artifacts from the state file. This is the old implementation
1231
- '''
1614
+ """
1232
1615
  artifacts = ArtifactsRoot()
1233
1616
  target_path = self.runtime_config.target_path
1234
1617
  target_base_path = self.base_path
@@ -1237,18 +1620,18 @@ class DbtAdapter(BaseAdapter):
1237
1620
  if not os.path.isfile(path):
1238
1621
  return None
1239
1622
 
1240
- with open(path, 'r') as f:
1623
+ with open(path, "r", encoding="utf-8") as f:
1241
1624
  json_content = f.read()
1242
1625
  return json.loads(json_content)
1243
1626
 
1244
1627
  project_root = self.runtime_config.project_root
1245
1628
  artifacts.base = {
1246
- 'manifest': _load_artifact(os.path.join(project_root, target_base_path, 'manifest.json')),
1247
- 'catalog': _load_artifact(os.path.join(project_root, target_base_path, 'catalog.json')),
1629
+ "manifest": _load_artifact(os.path.join(project_root, target_base_path, "manifest.json")),
1630
+ "catalog": _load_artifact(os.path.join(project_root, target_base_path, "catalog.json")),
1248
1631
  }
1249
1632
  artifacts.current = {
1250
- 'manifest': _load_artifact(os.path.join(project_root, target_path, 'manifest.json')),
1251
- 'catalog': _load_artifact(os.path.join(project_root, target_path, 'catalog.json')),
1633
+ "manifest": _load_artifact(os.path.join(project_root, target_path, "manifest.json")),
1634
+ "catalog": _load_artifact(os.path.join(project_root, target_path, "catalog.json")),
1252
1635
  }
1253
1636
  return artifacts
1254
1637
 
@@ -1256,7 +1639,7 @@ class DbtAdapter(BaseAdapter):
1256
1639
  # Merge the artifacts from the state file or cloud
1257
1640
  def _select_artifact(
1258
1641
  original: Union[WritableManifest, CatalogArtifact],
1259
- new: Union[WritableManifest, CatalogArtifact]
1642
+ new: Union[WritableManifest, CatalogArtifact],
1260
1643
  ):
1261
1644
  if merge:
1262
1645
  if not original:
@@ -1267,16 +1650,16 @@ class DbtAdapter(BaseAdapter):
1267
1650
  else:
1268
1651
  return new
1269
1652
 
1270
- self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get('manifest')))
1271
- self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get('manifest')))
1272
- self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get('catalog')))
1273
- self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get('catalog')))
1653
+ self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get("manifest")))
1654
+ self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get("manifest")))
1655
+ self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get("catalog")))
1656
+ self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get("catalog")))
1274
1657
 
1275
1658
  self.manifest = as_manifest(self.curr_manifest)
1276
1659
  self.previous_state = previous_state(
1277
1660
  Path(self.base_path),
1278
1661
  Path(self.runtime_config.target_path),
1279
- Path(self.runtime_config.project_root)
1662
+ Path(self.runtime_config.project_root),
1280
1663
  )
1281
1664
  self.previous_state.manifest = as_manifest(self.base_manifest)
1282
1665
 
@@ -1292,7 +1675,8 @@ class DbtAdapter(BaseAdapter):
1292
1675
 
1293
1676
  if not self.curr_manifest or not self.base_manifest:
1294
1677
  raise Exception(
1295
- 'No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state')
1678
+ "No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state"
1679
+ )
1296
1680
 
1297
1681
  @contextmanager
1298
1682
  def connection_named(self, name: str) -> Iterator[None]: