recce-nightly 1.10.0.20250625__py3-none-any.whl → 1.30.0.20251221__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (229) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +5 -0
  3. recce/adapter/dbt_adapter/__init__.py +343 -245
  4. recce/apis/check_api.py +20 -14
  5. recce/apis/check_events_api.py +353 -0
  6. recce/apis/check_func.py +5 -5
  7. recce/apis/run_func.py +32 -3
  8. recce/artifact.py +76 -3
  9. recce/cli.py +705 -82
  10. recce/config.py +2 -2
  11. recce/connect_to_cloud.py +1 -1
  12. recce/core.py +3 -3
  13. recce/data/404/index.html +2 -0
  14. recce/data/404.html +2 -22
  15. recce/data/__next.@lineage.!KHNsb3Qp.__PAGE__.txt +7 -0
  16. recce/data/__next.@lineage.!KHNsb3Qp.txt +4 -0
  17. recce/data/__next.__PAGE__.txt +6 -0
  18. recce/data/__next._full.txt +32 -0
  19. recce/data/__next._head.txt +8 -0
  20. recce/data/__next._index.txt +14 -0
  21. recce/data/__next._tree.txt +8 -0
  22. recce/data/_next/static/chunks/025a7e3e3f9f40ae.js +1 -0
  23. recce/data/_next/static/chunks/0ce56d67ef5779ca.js +4 -0
  24. recce/data/_next/static/chunks/1a6a78780155dac7.js +48 -0
  25. recce/data/_next/static/chunks/1de8485918b9182a.css +2 -0
  26. recce/data/_next/static/chunks/1e4b1b50d1e34993.js +1 -0
  27. recce/data/_next/static/chunks/206d5d181e4c738e.js +1 -0
  28. recce/data/_next/static/chunks/2c357efc34c5b859.js +25 -0
  29. recce/data/_next/static/chunks/2e9d95d2d48c479c.js +1 -0
  30. recce/data/_next/static/chunks/2f016dc4a3edad2e.js +2 -0
  31. recce/data/_next/static/chunks/313251962d698f7c.js +1 -0
  32. recce/data/_next/static/chunks/3a9f021f38eb5574.css +1 -0
  33. recce/data/_next/static/chunks/40079da8d2b8f651.js +1 -0
  34. recce/data/_next/static/chunks/4599182bffb64661.js +38 -0
  35. recce/data/_next/static/chunks/4e62f6e184173580.js +1 -0
  36. recce/data/_next/static/chunks/5c4dfb0d09eaa401.js +1 -0
  37. recce/data/_next/static/chunks/69e4f06ccfdfc3ac.js +1 -0
  38. recce/data/_next/static/chunks/6b206cb4707d6bee.js +1 -0
  39. recce/data/_next/static/chunks/6d8557f062aa4386.css +1 -0
  40. recce/data/_next/static/chunks/7fbe3650bd83b6b5.js +1 -0
  41. recce/data/_next/static/chunks/83fa823a825674f6.js +1 -0
  42. recce/data/_next/static/chunks/848a6c9b5f55f7ed.js +1 -0
  43. recce/data/_next/static/chunks/859462b0858aef88.css +2 -0
  44. recce/data/_next/static/chunks/923964f18c87d0f1.css +1 -0
  45. recce/data/_next/static/chunks/939390f911895d7c.js +48 -0
  46. recce/data/_next/static/chunks/99a9817237a07f43.js +1 -0
  47. recce/data/_next/static/chunks/9fed8b4b2b924054.js +5 -0
  48. recce/data/_next/static/chunks/b6949f6c5892110c.js +1 -0
  49. recce/data/_next/static/chunks/b851a1d3f8149828.js +1 -0
  50. recce/data/_next/static/chunks/c734f9ad957de0b4.js +1 -0
  51. recce/data/_next/static/chunks/cdde321b0ec75717.js +2 -0
  52. recce/data/_next/static/chunks/d0f91117d77ff844.css +1 -0
  53. recce/data/_next/static/chunks/d6c8667911c2500f.js +1 -0
  54. recce/data/_next/static/chunks/da8dab68c02752cf.js +74 -0
  55. recce/data/_next/static/chunks/dc074049c9d12d97.js +109 -0
  56. recce/data/_next/static/chunks/ee7f1a8227342421.js +1 -0
  57. recce/data/_next/static/chunks/fa2f4e56c2fccc73.js +1 -0
  58. recce/data/_next/static/chunks/turbopack-1fad664f62979b93.js +3 -0
  59. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  60. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  61. recce/data/_next/static/media/{montserrat-cyrillic-800-normal.bd5c9f50.woff → montserrat-cyrillic-800-normal.f9d58125.woff} +0 -0
  62. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  63. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  64. recce/data/_next/static/media/{montserrat-latin-800-normal.fc315020.woff → montserrat-latin-800-normal.d5761935.woff} +0 -0
  65. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  66. recce/data/_next/static/media/{montserrat-latin-ext-800-normal.2e5381b2.woff → montserrat-latin-ext-800-normal.b671449b.woff} +0 -0
  67. recce/data/_next/static/media/{montserrat-vietnamese-800-normal.20c545e6.woff → montserrat-vietnamese-800-normal.9f7b8541.woff} +0 -0
  68. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  69. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_buildManifest.js +11 -0
  70. recce/data/_next/static/nX-Uz0AH6Tc6hIQUFGqaB/_clientMiddlewareManifest.json +1 -0
  71. recce/data/_not-found/__next._full.txt +24 -0
  72. recce/data/_not-found/__next._head.txt +8 -0
  73. recce/data/_not-found/__next._index.txt +13 -0
  74. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  75. recce/data/_not-found/__next._not-found.txt +4 -0
  76. recce/data/_not-found/__next._tree.txt +6 -0
  77. recce/data/_not-found/index.html +2 -0
  78. recce/data/_not-found/index.txt +24 -0
  79. recce/data/auth_callback.html +1 -1
  80. recce/data/checks/__next.@lineage.__DEFAULT__.txt +7 -0
  81. recce/data/checks/__next._full.txt +39 -0
  82. recce/data/checks/__next._head.txt +8 -0
  83. recce/data/checks/__next._index.txt +14 -0
  84. recce/data/checks/__next._tree.txt +8 -0
  85. recce/data/checks/__next.checks.__PAGE__.txt +10 -0
  86. recce/data/checks/__next.checks.txt +4 -0
  87. recce/data/checks/index.html +2 -0
  88. recce/data/checks/index.txt +39 -0
  89. recce/data/index.html +2 -27
  90. recce/data/index.txt +32 -8
  91. recce/data/lineage/__next.@lineage.__DEFAULT__.txt +7 -0
  92. recce/data/lineage/__next._full.txt +39 -0
  93. recce/data/lineage/__next._head.txt +8 -0
  94. recce/data/lineage/__next._index.txt +14 -0
  95. recce/data/lineage/__next._tree.txt +8 -0
  96. recce/data/lineage/__next.lineage.__PAGE__.txt +10 -0
  97. recce/data/lineage/__next.lineage.txt +4 -0
  98. recce/data/lineage/index.html +2 -0
  99. recce/data/lineage/index.txt +39 -0
  100. recce/data/query/__next.@lineage.__DEFAULT__.txt +7 -0
  101. recce/data/query/__next._full.txt +37 -0
  102. recce/data/query/__next._head.txt +8 -0
  103. recce/data/query/__next._index.txt +14 -0
  104. recce/data/query/__next._tree.txt +8 -0
  105. recce/data/query/__next.query.__PAGE__.txt +9 -0
  106. recce/data/query/__next.query.txt +4 -0
  107. recce/data/query/index.html +2 -0
  108. recce/data/query/index.txt +37 -0
  109. recce/event/CONFIG.bak +1 -0
  110. recce/event/__init__.py +9 -8
  111. recce/event/collector.py +6 -2
  112. recce/event/track.py +10 -0
  113. recce/github.py +1 -1
  114. recce/mcp_server.py +725 -0
  115. recce/models/check.py +433 -15
  116. recce/models/types.py +61 -2
  117. recce/pull_request.py +1 -1
  118. recce/run.py +37 -17
  119. recce/server.py +216 -21
  120. recce/state/__init__.py +31 -0
  121. recce/state/cloud.py +644 -0
  122. recce/state/const.py +26 -0
  123. recce/state/local.py +56 -0
  124. recce/state/state.py +119 -0
  125. recce/state/state_loader.py +174 -0
  126. recce/summary.py +25 -3
  127. recce/tasks/dataframe.py +63 -1
  128. recce/tasks/query.py +40 -3
  129. recce/tasks/rowcount.py +4 -1
  130. recce/tasks/schema.py +4 -1
  131. recce/tasks/utils.py +147 -0
  132. recce/tasks/valuediff.py +85 -57
  133. recce/util/api_token.py +11 -2
  134. recce/util/breaking.py +10 -1
  135. recce/util/cll.py +1 -2
  136. recce/util/cloud/__init__.py +15 -0
  137. recce/util/cloud/base.py +115 -0
  138. recce/util/cloud/check_events.py +190 -0
  139. recce/util/cloud/checks.py +242 -0
  140. recce/util/io.py +2 -2
  141. recce/util/lineage.py +19 -18
  142. recce/util/perf_tracking.py +85 -0
  143. recce/util/recce_cloud.py +254 -5
  144. recce/util/startup_perf.py +121 -0
  145. recce/yaml/__init__.py +2 -2
  146. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/METADATA +91 -71
  147. recce_nightly-1.30.0.20251221.dist-info/RECORD +183 -0
  148. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/WHEEL +1 -2
  149. recce/data/_next/static/abCX3x3UoIdRLEDWxx4xd/_buildManifest.js +0 -1
  150. recce/data/_next/static/chunks/181-acc61ddada3bc0ca.js +0 -43
  151. recce/data/_next/static/chunks/1bff33f1-1ef85cf5e658a751.js +0 -1
  152. recce/data/_next/static/chunks/217-879a84d70f7a907c.js +0 -2
  153. recce/data/_next/static/chunks/29e3cc0d-60045b2e47aa3916.js +0 -1
  154. recce/data/_next/static/chunks/36e1c10d-8e7be4a6c1f6ab2d.js +0 -1
  155. recce/data/_next/static/chunks/3998a672-03adacad07b346ac.js +0 -1
  156. recce/data/_next/static/chunks/3a92ee20-1081c360214f9602.js +0 -1
  157. recce/data/_next/static/chunks/42-cd3c06533f5fd47c.js +0 -9
  158. recce/data/_next/static/chunks/450c323b-fd94e7ffaa4a5efa.js +0 -1
  159. recce/data/_next/static/chunks/47d8844f-929aed9b1c73a905.js +0 -1
  160. recce/data/_next/static/chunks/608-3b079b544e5d5f5e.js +0 -15
  161. recce/data/_next/static/chunks/6dc81886-adbfa45836061d79.js +0 -1
  162. recce/data/_next/static/chunks/7a8a3e83-edf6dc64b5d5f0a5.js +0 -1
  163. recce/data/_next/static/chunks/7f27ae6c-d5f0438edd5c2a5b.js +0 -1
  164. recce/data/_next/static/chunks/86730205-cfb14e3f051bab35.js +0 -1
  165. recce/data/_next/static/chunks/8d700b6a.8bb140898499c512.js +0 -1
  166. recce/data/_next/static/chunks/92-607cd1af83c41f43.js +0 -1
  167. recce/data/_next/static/chunks/9746af58-a42b7d169cacadf0.js +0 -1
  168. recce/data/_next/static/chunks/a30376cd-de84559016d7e133.js +0 -1
  169. recce/data/_next/static/chunks/app/_not-found/page-01ed58b7f971d311.js +0 -1
  170. recce/data/_next/static/chunks/app/layout-177a410a97e0d018.js +0 -1
  171. recce/data/_next/static/chunks/app/page-da6e046a8235dbfc.js +0 -1
  172. recce/data/_next/static/chunks/b63b1b3f-4282bdcf459e075c.js +0 -1
  173. recce/data/_next/static/chunks/bbda5537-9ec25eb1dd62348a.js +0 -1
  174. recce/data/_next/static/chunks/c132bf7d-08cb668a789d6afd.js +0 -1
  175. recce/data/_next/static/chunks/ce84277d-2e5d1d46910cf052.js +0 -1
  176. recce/data/_next/static/chunks/febdd86e-c6b525341634b860.js +0 -54
  177. recce/data/_next/static/chunks/fee69bc6-2dbccaf9b90474e6.js +0 -1
  178. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  179. recce/data/_next/static/chunks/main-app-39061b0166c47f55.js +0 -1
  180. recce/data/_next/static/chunks/main-b5b3ae20a1405261.js +0 -1
  181. recce/data/_next/static/chunks/pages/_app-437c455677d62394.js +0 -1
  182. recce/data/_next/static/chunks/pages/_error-e7650df18ca04bde.js +0 -1
  183. recce/data/_next/static/chunks/webpack-7b49d5ba7e3a434d.js +0 -1
  184. recce/data/_next/static/css/17a96168e3a9db13.css +0 -1
  185. recce/data/_next/static/css/1b121dc4d36aeb4d.css +0 -3
  186. recce/data/_next/static/css/35c6679a098e1e34.css +0 -1
  187. recce/data/_next/static/css/951e2e0eea2d4a5b.css +0 -14
  188. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  189. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  190. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  191. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  192. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  193. recce/data/_next/static/media/reload-image.79aabb7d.svg +0 -4
  194. recce/state.py +0 -786
  195. recce_nightly-1.10.0.20250625.dist-info/RECORD +0 -154
  196. recce_nightly-1.10.0.20250625.dist-info/top_level.txt +0 -2
  197. tests/__init__.py +0 -0
  198. tests/adapter/__init__.py +0 -0
  199. tests/adapter/dbt_adapter/__init__.py +0 -0
  200. tests/adapter/dbt_adapter/conftest.py +0 -17
  201. tests/adapter/dbt_adapter/dbt_test_helper.py +0 -298
  202. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -25
  203. tests/adapter/dbt_adapter/test_dbt_cll.py +0 -384
  204. tests/adapter/dbt_adapter/test_selector.py +0 -202
  205. tests/tasks/__init__.py +0 -0
  206. tests/tasks/conftest.py +0 -4
  207. tests/tasks/test_histogram.py +0 -129
  208. tests/tasks/test_lineage.py +0 -55
  209. tests/tasks/test_preset_checks.py +0 -64
  210. tests/tasks/test_profile.py +0 -397
  211. tests/tasks/test_query.py +0 -151
  212. tests/tasks/test_row_count.py +0 -135
  213. tests/tasks/test_schema.py +0 -122
  214. tests/tasks/test_top_k.py +0 -77
  215. tests/tasks/test_valuediff.py +0 -85
  216. tests/test_cli.py +0 -133
  217. tests/test_config.py +0 -43
  218. tests/test_connect_to_cloud.py +0 -82
  219. tests/test_core.py +0 -29
  220. tests/test_dbt.py +0 -36
  221. tests/test_pull_request.py +0 -130
  222. tests/test_server.py +0 -104
  223. tests/test_state.py +0 -134
  224. tests/test_summary.py +0 -65
  225. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  226. /recce/data/_next/static/media/{montserrat-cyrillic-ext-800-normal.e6e0d8d0.woff → montserrat-cyrillic-ext-800-normal.a4fa76b5.woff} +0 -0
  227. /recce/data/_next/static/{abCX3x3UoIdRLEDWxx4xd → nX-Uz0AH6Tc6hIQUFGqaB}/_ssgManifest.js +0 -0
  228. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/entry_points.txt +0 -0
  229. {recce_nightly-1.10.0.20250625.dist-info → recce_nightly-1.30.0.20251221.dist-info}/licenses/LICENSE +0 -0
@@ -3,6 +3,7 @@ import logging
3
3
  import os
4
4
  import uuid
5
5
  from contextlib import contextmanager
6
+ from copy import deepcopy
6
7
  from dataclasses import dataclass, fields
7
8
  from errno import ENOENT
8
9
  from functools import lru_cache
@@ -25,12 +26,13 @@ from recce.event import log_performance
25
26
  from recce.exceptions import RecceException
26
27
  from recce.util.cll import CLLPerformanceTracking, cll
27
28
  from recce.util.lineage import (
29
+ build_column_key,
28
30
  filter_dependency_maps,
29
- filter_lineage_vertices,
30
- find_column_dependencies,
31
31
  find_downstream,
32
32
  find_upstream,
33
33
  )
34
+ from recce.util.perf_tracking import LineagePerfTracker
35
+ from recce.util.startup_perf import track_timing
34
36
 
35
37
  from ...tasks.profile import ProfileTask
36
38
  from ...util.breaking import BreakingPerformanceTracking, parse_change_category
@@ -109,6 +111,7 @@ from dbt.config.runtime import RuntimeConfig # noqa: E402
109
111
  from dbt.contracts.graph.manifest import ( # noqa: E402
110
112
  MacroManifest,
111
113
  Manifest,
114
+ ManifestMetadata,
112
115
  WritableManifest,
113
116
  )
114
117
  from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
@@ -208,9 +211,12 @@ def as_manifest(m: WritableManifest) -> Manifest:
208
211
  new_data = {k: v for k, v in data.items() if k in all_fields}
209
212
  return Manifest(**new_data)
210
213
  else:
211
- return Manifest.from_writable_manifest(m)
214
+ result = Manifest.from_writable_manifest(m)
215
+ result.metadata = ManifestMetadata(**m.metadata.__dict__)
216
+ return result
212
217
 
213
218
 
219
+ @track_timing(record_size=True)
214
220
  def load_manifest(path: str = None, data: dict = None):
215
221
  if path is not None:
216
222
  if not os.path.isfile(path):
@@ -220,6 +226,7 @@ def load_manifest(path: str = None, data: dict = None):
220
226
  return WritableManifest.upgrade_schema_version(data)
221
227
 
222
228
 
229
+ @track_timing(record_size=True)
223
230
  def load_catalog(path: str = None, data: dict = None):
224
231
  if path is not None:
225
232
  if not os.path.isfile(path):
@@ -278,7 +285,7 @@ class DbtArgs:
278
285
  target_path: Optional[str] = (None,)
279
286
  project_only_flags: Optional[Dict[str, Any]] = None
280
287
  which: Optional[str] = None
281
- state_modified_compare_more_unrendered_values: Optional[bool] = False # new flag added since dbt v1.9
288
+ state_modified_compare_more_unrendered_values: Optional[bool] = True # new flag added since dbt v1.9
282
289
 
283
290
 
284
291
  @dataclass
@@ -407,7 +414,7 @@ class DbtAdapter(BaseAdapter):
407
414
 
408
415
  if self.adapter.connections.TYPE == "databricks":
409
416
  # reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
410
- from dbt.adapters.databricks import DatabricksColumn
417
+ from dbt.adapters.databricks.column import DatabricksColumn
411
418
 
412
419
  rows = columns
413
420
  columns = []
@@ -472,6 +479,7 @@ class DbtAdapter(BaseAdapter):
472
479
 
473
480
  return result
474
481
 
482
+ @track_timing("artifact_load")
475
483
  def load_artifacts(self):
476
484
  """
477
485
  Load the artifacts from the 'target' and 'target-base' directory
@@ -487,16 +495,20 @@ class DbtAdapter(BaseAdapter):
487
495
 
488
496
  # load the artifacts
489
497
  path = os.path.join(project_root, target_path, "manifest.json")
490
- curr_manifest = load_manifest(path=path)
498
+ curr_manifest = load_manifest(path=path, timing_name="curr_manifest")
491
499
  if curr_manifest is None:
492
500
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
493
501
  path = os.path.join(project_root, target_base_path, "manifest.json")
494
- base_manifest = load_manifest(path=path)
502
+ base_manifest = load_manifest(path=path, timing_name="base_manifest")
495
503
  if base_manifest is None:
496
504
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
497
505
 
498
- curr_catalog = load_catalog(path=os.path.join(project_root, target_path, "catalog.json"))
499
- base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, "catalog.json"))
506
+ curr_catalog = load_catalog(
507
+ path=os.path.join(project_root, target_path, "catalog.json"), timing_name="curr_catalog"
508
+ )
509
+ base_catalog = load_catalog(
510
+ path=os.path.join(project_root, target_base_path, "catalog.json"), timing_name="base_catalog"
511
+ )
500
512
 
501
513
  # set the value if all the artifacts are loaded successfully
502
514
  self.curr_manifest = curr_manifest
@@ -599,7 +611,15 @@ class DbtAdapter(BaseAdapter):
599
611
  return node.compiled_code
600
612
  else:
601
613
  from dbt.clients import jinja
602
- from dbt.context.providers import generate_runtime_model_context
614
+ from dbt.context.providers import (
615
+ generate_runtime_macro_context,
616
+ generate_runtime_model_context,
617
+ )
618
+
619
+ # Set up macro resolver for dbt >= 1.8
620
+ macro_manifest = MacroManifest(manifest.macros)
621
+ self.adapter.set_macro_resolver(macro_manifest)
622
+ self.adapter.set_macro_context_generator(generate_runtime_macro_context)
603
623
 
604
624
  jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
605
625
  jinja_ctx.update(context)
@@ -658,8 +678,8 @@ class DbtAdapter(BaseAdapter):
658
678
  @lru_cache(maxsize=2)
659
679
  def get_lineage_cached(self, base: Optional[bool] = False, cache_key=0):
660
680
  if base is False:
661
- cll_tracker = CLLPerformanceTracking()
662
- cll_tracker.start_lineage()
681
+ perf_tracker = LineagePerfTracker()
682
+ perf_tracker.start_lineage()
663
683
 
664
684
  manifest = self.curr_manifest if base is False else self.base_manifest
665
685
  catalog = self.curr_catalog if base is False else self.base_catalog
@@ -736,6 +756,7 @@ class DbtAdapter(BaseAdapter):
736
756
  nodes[unique_id] = {
737
757
  "id": source["unique_id"],
738
758
  "name": source["name"],
759
+ "source_name": source["source_name"],
739
760
  "resource_type": source["resource_type"],
740
761
  "package_name": source["package_name"],
741
762
  "config": source["config"],
@@ -777,10 +798,10 @@ class DbtAdapter(BaseAdapter):
777
798
  parent_map = self.build_parent_map(nodes, base)
778
799
 
779
800
  if base is False:
780
- cll_tracker.end_lineage()
781
- cll_tracker.set_total_nodes(len(nodes))
782
- log_performance("model lineage", cll_tracker.to_dict())
783
- cll_tracker.reset()
801
+ perf_tracker.end_lineage()
802
+ perf_tracker.set_total_nodes(len(nodes))
803
+ log_performance("model lineage", perf_tracker.to_dict())
804
+ perf_tracker.reset()
784
805
 
785
806
  return dict(
786
807
  parent_map=parent_map,
@@ -793,15 +814,43 @@ class DbtAdapter(BaseAdapter):
793
814
  def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
794
815
  base = self.get_lineage(base=True)
795
816
  current = self.get_lineage(base=False)
796
- keys = {*base.get("nodes", {}).keys(), *current.get("nodes", {}).keys()}
797
817
 
798
- # Start to diff
799
- perf_tracking = BreakingPerformanceTracking()
800
- perf_tracking.start_lineage_diff()
818
+ modified_nodes = self.select_nodes(select="state:modified")
819
+ diff = {}
820
+ for node_id in modified_nodes:
821
+ base_node = base.get("nodes", {}).get(node_id)
822
+ curr_node = current.get("nodes", {}).get(node_id)
823
+ if base_node and curr_node:
824
+ diff[node_id] = NodeDiff(change_status="modified")
825
+ elif base_node:
826
+ diff[node_id] = NodeDiff(change_status="removed")
827
+ elif curr_node:
828
+ diff[node_id] = NodeDiff(change_status="added")
829
+
830
+ return LineageDiff(
831
+ base=base,
832
+ current=current,
833
+ diff=diff,
834
+ )
835
+
836
+ @lru_cache(maxsize=128)
837
+ def get_change_analysis_cached(self, node_id: str):
838
+ breaking_perf_tracker = BreakingPerformanceTracking()
839
+ lineage_diff = self.get_lineage_diff()
840
+ diff = lineage_diff.diff
841
+
842
+ if node_id not in diff or diff[node_id].change_status != "modified":
843
+ return diff.get(node_id)
844
+
845
+ breaking_perf_tracker.increment_modified_nodes()
846
+ breaking_perf_tracker.start_lineage_diff()
847
+
848
+ base = lineage_diff.base
849
+ current = lineage_diff.current
801
850
 
802
851
  base_manifest = as_manifest(self.get_manifest(True))
803
852
  curr_manifest = as_manifest(self.get_manifest(False))
804
- perf_tracking.record_checkpoint("manifest")
853
+ breaking_perf_tracker.record_checkpoint("manifest")
805
854
 
806
855
  def ref_func(*args):
807
856
  if len(args) == 1:
@@ -821,111 +870,106 @@ class DbtAdapter(BaseAdapter):
821
870
  source=source_func,
822
871
  )
823
872
 
824
- # for each node, compare the base and current lineage
825
- diff = {}
826
- for key in keys:
827
- base_node = base.get("nodes", {}).get(key)
828
- curr_node = current.get("nodes", {}).get(key)
829
- if base_node and curr_node:
830
- base_checksum = base_node.get("checksum", {}).get("checksum")
831
- curr_checksum = curr_node.get("checksum", {}).get("checksum")
832
- change = None
833
- if base_checksum is None or curr_checksum is None or base_checksum == curr_checksum:
834
- continue
835
-
836
- if curr_node.get("resource_type") == "model":
837
- try:
838
- perf_tracking.increment_modified_nodes()
839
-
840
- def _get_schema(lineage):
841
- schema = {}
842
- nodes = lineage["nodes"]
843
- parent_list = lineage["parent_map"].get(key, [])
844
- for parent_id in parent_list:
845
- parent_node = nodes.get(parent_id)
846
- if parent_node is None:
847
- continue
848
- columns = parent_node.get("columns") or {}
849
- name = parent_node.get("name")
850
- if parent_node.get("resource_type") == "source":
851
- parts = parent_id.split(".")
852
- source = parts[2]
853
- table = parts[3]
854
- source = source.replace("-", "_")
855
- name = f"__{source}__{table}"
856
- schema[name] = {name: column.get("type") for name, column in columns.items()}
857
- return schema
858
-
859
- base_sql = self.generate_sql(
860
- base_node.get("raw_code"),
861
- context=jinja_context,
862
- provided_manifest=base_manifest,
863
- )
864
- curr_sql = self.generate_sql(
865
- curr_node.get("raw_code"),
866
- context=jinja_context,
867
- provided_manifest=curr_manifest,
868
- )
869
- base_schema = _get_schema(base)
870
- curr_schema = _get_schema(current)
871
- dialect = self.adapter.connections.TYPE
872
- if curr_manifest.metadata.adapter_type is not None:
873
- dialect = curr_manifest.metadata.adapter_type
874
-
875
- change = parse_change_category(
876
- base_sql,
877
- curr_sql,
878
- old_schema=base_schema,
879
- new_schema=curr_schema,
880
- dialect=dialect,
881
- perf_tracking=perf_tracking,
882
- )
883
-
884
- # Make sure that the case of the column names are the same
885
- changed_columns = {
886
- column.lower(): change_status for column, change_status in (change.columns or {}).items()
887
- }
888
- changed_columns_names = set(changed_columns)
889
- changed_columns_final = {}
890
-
891
- base_columns = base_node.get("columns") or {}
892
- curr_columns = curr_node.get("columns") or {}
893
- columns_names = set(base_columns) | set(curr_columns)
873
+ base_node = base.get("nodes", {}).get(node_id)
874
+ curr_node = current.get("nodes", {}).get(node_id)
875
+ change = NodeChange(category="unknown")
876
+ if (
877
+ curr_node.get("resource_type") in ["model", "snapshot"]
878
+ and curr_node.get("raw_code") is not None
879
+ and base_node.get("raw_code") is not None
880
+ ):
881
+ try:
882
+
883
+ def _get_schema(lineage):
884
+ schema = {}
885
+ nodes = lineage["nodes"]
886
+ parent_list = lineage["parent_map"].get(node_id, [])
887
+ for parent_id in parent_list:
888
+ parent_node = nodes.get(parent_id)
889
+ if parent_node is None:
890
+ continue
891
+ columns = parent_node.get("columns") or {}
892
+ name = parent_node.get("name")
893
+ if parent_node.get("resource_type") == "source":
894
+ parts = parent_id.split(".")
895
+ source = parts[2]
896
+ table = parts[3]
897
+ source = source.replace("-", "_")
898
+ name = f"__{source}__{table}"
899
+ schema[name] = {name: column.get("type") for name, column in columns.items()}
900
+ return schema
901
+
902
+ base_sql = self.generate_sql(
903
+ base_node.get("raw_code"),
904
+ context=jinja_context,
905
+ provided_manifest=base_manifest,
906
+ )
907
+ curr_sql = self.generate_sql(
908
+ curr_node.get("raw_code"),
909
+ context=jinja_context,
910
+ provided_manifest=curr_manifest,
911
+ )
912
+ base_schema = _get_schema(base)
913
+ curr_schema = _get_schema(current)
914
+ dialect = self.adapter.connections.TYPE
915
+ if curr_manifest.metadata.adapter_type is not None:
916
+ dialect = curr_manifest.metadata.adapter_type
917
+
918
+ change = parse_change_category(
919
+ base_sql,
920
+ curr_sql,
921
+ old_schema=base_schema,
922
+ new_schema=curr_schema,
923
+ dialect=dialect,
924
+ perf_tracking=breaking_perf_tracker,
925
+ )
894
926
 
895
- for column_name in columns_names:
896
- if column_name.lower() in changed_columns_names:
897
- changed_columns_final[column_name] = changed_columns[column_name.lower()]
927
+ # Make sure that the case of the column names are the same
928
+ changed_columns = {
929
+ column.lower(): change_status for column, change_status in (change.columns or {}).items()
930
+ }
931
+ changed_columns_names = set(changed_columns)
932
+ changed_columns_final = {}
898
933
 
899
- change.columns = changed_columns_final
900
- except Exception:
901
- change = NodeChange(category="unknown")
934
+ base_columns = base_node.get("columns") or {}
935
+ curr_columns = curr_node.get("columns") or {}
936
+ columns_names = set(base_columns) | set(curr_columns)
902
937
 
903
- diff[key] = NodeDiff(change_status="modified", change=change)
904
- elif base_node:
905
- diff[key] = NodeDiff(change_status="removed")
906
- elif curr_node:
907
- diff[key] = NodeDiff(change_status="added")
938
+ for column_name in columns_names:
939
+ if column_name.lower() in changed_columns_names:
940
+ changed_columns_final[column_name] = changed_columns[column_name.lower()]
908
941
 
909
- perf_tracking.end_lineage_diff()
910
- log_performance("model lineage diff", perf_tracking.to_dict())
942
+ change.columns = changed_columns_final
943
+ except Exception:
944
+ # TODO: telemetry
945
+ pass
911
946
 
912
- return LineageDiff(
913
- base=base,
914
- current=current,
915
- diff=diff,
916
- )
947
+ breaking_perf_tracker.end_lineage_diff()
948
+ log_performance("change analysis per node", breaking_perf_tracker.to_dict())
949
+ breaking_perf_tracker.reset()
950
+ node_diff = diff.get(node_id)
951
+ node_diff.change = change
952
+ return node_diff
917
953
 
918
954
  def get_cll(
919
955
  self,
920
956
  node_id: Optional[str] = None,
921
957
  column: Optional[str] = None,
922
958
  change_analysis: Optional[bool] = False,
923
- cll: Optional[bool] = True,
924
- upstream: Optional[bool] = True,
925
- downstream: Optional[bool] = True,
959
+ no_cll: Optional[bool] = False,
960
+ no_upstream: Optional[bool] = False,
961
+ no_downstream: Optional[bool] = False,
926
962
  no_filter: Optional[bool] = False,
927
963
  ) -> CllData:
928
- cll_tracker = CLLPerformanceTracking()
964
+ cll_tracker = LineagePerfTracker()
965
+ cll_tracker.set_params(
966
+ has_node=node_id is not None,
967
+ has_column=column is not None,
968
+ change_analysis=change_analysis,
969
+ no_cll=no_cll,
970
+ no_upstream=no_upstream,
971
+ no_downstream=no_downstream,
972
+ )
929
973
  cll_tracker.start_column_lineage()
930
974
 
931
975
  manifest = self.curr_manifest
@@ -936,47 +980,114 @@ class DbtAdapter(BaseAdapter):
936
980
  cll_node_ids = {node_id}
937
981
  else:
938
982
  lineage_diff = self.get_lineage_diff()
939
- cll_node_ids = lineage_diff.diff.keys()
983
+ cll_node_ids = set(lineage_diff.diff.keys())
984
+
985
+ cll_tracker.set_init_nodes(len(cll_node_ids))
940
986
 
941
987
  nodes = {}
942
988
  columns = {}
943
989
  parent_map = {}
944
990
  child_map = {}
945
991
 
946
- if upstream:
992
+ if not no_upstream:
947
993
  cll_node_ids = cll_node_ids.union(find_upstream(cll_node_ids, manifest_dict.get("parent_map")))
948
- if downstream:
994
+ if not no_downstream:
949
995
  cll_node_ids = cll_node_ids.union(find_downstream(cll_node_ids, manifest_dict.get("child_map")))
950
996
 
951
- if cll:
997
+ if not no_cll:
998
+ allowed_related_nodes = set()
999
+ for key in ["sources", "nodes", "exposures", "metrics"]:
1000
+ attr = getattr(manifest, key)
1001
+ allowed_related_nodes.update(set(attr.keys()))
1002
+ if hasattr(manifest, "semantic_models"):
1003
+ attr = getattr(manifest, "semantic_models")
1004
+ allowed_related_nodes.update(set(attr.keys()))
952
1005
  for cll_node_id in cll_node_ids:
953
- if (
954
- cll_node_id not in manifest.sources
955
- and cll_node_id not in manifest.nodes
956
- and cll_node_id not in manifest.exposures
957
- ):
1006
+ if cll_node_id not in allowed_related_nodes:
958
1007
  continue
959
- cll_data_one = self.get_cll_cached(cll_node_id, base=False)
1008
+ cll_data_one = deepcopy(self.get_cll_cached(cll_node_id, base=False))
1009
+ cll_tracker.increment_cll_nodes()
960
1010
  if cll_data_one is None:
961
1011
  continue
962
1012
 
963
- node_diff = self.get_lineage_diff().diff.get(cll_node_id) if change_analysis else None
964
- for n_id, n in cll_data_one.nodes.items():
965
- nodes[n_id] = n
966
-
967
- if node_diff is not None:
968
- n.change_status = node_diff.change_status
969
- if node_diff.change is not None:
970
- n.change_category = node_diff.change.category
1013
+ nodes[cll_node_id] = cll_data_one.nodes.get(cll_node_id)
1014
+ node_diff = None
1015
+ if change_analysis:
1016
+ node_diff = self.get_change_analysis_cached(cll_node_id)
1017
+ cll_tracker.increment_change_analysis_nodes()
1018
+ if node_diff is not None:
1019
+ nodes[cll_node_id].change_status = node_diff.change_status
1020
+ if node_diff.change is not None:
1021
+ nodes[cll_node_id].change_category = node_diff.change.category
971
1022
  for c_id, c in cll_data_one.columns.items():
972
1023
  columns[c_id] = c
973
- if node_diff is not None and node_diff.change is not None:
974
- column_diff = node_diff.change.columns.get(c.name)
975
- if column_diff:
976
- c.change_status = column_diff
1024
+ if node_diff is not None:
1025
+ if node_diff.change_status == "added":
1026
+ c.change_status = "added"
1027
+ elif node_diff.change_status == "removed":
1028
+ c.change_status = "removed"
1029
+ elif node_diff.change is not None and node_diff.change.columns is not None:
1030
+ column_diff = node_diff.change.columns.get(c.name)
1031
+ if column_diff:
1032
+ c.change_status = column_diff
977
1033
 
978
1034
  for p_id, parents in cll_data_one.parent_map.items():
979
1035
  parent_map[p_id] = parents
1036
+ else:
1037
+ for cll_node_id in cll_node_ids:
1038
+ cll_node = None
1039
+ cll_node_columns: Dict[str, CllColumn] = {}
1040
+
1041
+ if cll_node_id in manifest.sources:
1042
+ cll_node = CllNode.build_cll_node(manifest, "sources", cll_node_id)
1043
+ if self.curr_catalog and cll_node_id in self.curr_catalog.sources:
1044
+ cll_node_columns = {
1045
+ column.name: CllColumn(
1046
+ id=f"{cll_node_id}_{column.name}",
1047
+ table_id=cll_node_id,
1048
+ name=column.name,
1049
+ type=column.type,
1050
+ )
1051
+ for column in self.curr_catalog.sources[cll_node_id].columns.values()
1052
+ }
1053
+ elif cll_node_id in manifest.nodes:
1054
+ cll_node = CllNode.build_cll_node(manifest, "nodes", cll_node_id)
1055
+ if self.curr_catalog and cll_node_id in self.curr_catalog.nodes:
1056
+ cll_node_columns = {
1057
+ column.name: CllColumn(
1058
+ id=f"{cll_node_id}_{column.name}",
1059
+ table_id=cll_node_id,
1060
+ name=column.name,
1061
+ type=column.type,
1062
+ )
1063
+ for column in self.curr_catalog.nodes[cll_node_id].columns.values()
1064
+ }
1065
+ elif cll_node_id in manifest.exposures:
1066
+ cll_node = CllNode.build_cll_node(manifest, "exposures", cll_node_id)
1067
+ elif hasattr(manifest, "semantic_models") and cll_node_id in manifest.semantic_models:
1068
+ cll_node = CllNode.build_cll_node(manifest, "semantic_models", cll_node_id)
1069
+ elif cll_node_id in manifest.metrics:
1070
+ cll_node = CllNode.build_cll_node(manifest, "metrics", cll_node_id)
1071
+
1072
+ if not cll_node:
1073
+ continue
1074
+ nodes[cll_node_id] = cll_node
1075
+
1076
+ node_diff = None
1077
+ if change_analysis:
1078
+ node_diff = self.get_change_analysis_cached(cll_node_id)
1079
+ cll_tracker.increment_change_analysis_nodes()
1080
+ if node_diff is not None:
1081
+ cll_node.change_status = node_diff.change_status
1082
+ if node_diff.change is not None:
1083
+ cll_node.change_category = node_diff.change.category
1084
+ for c, cll_column in cll_node_columns.items():
1085
+ cll_node.columns[c] = cll_column
1086
+ columns[cll_column.id] = cll_column
1087
+ if node_diff.change.columns and c in node_diff.change.columns:
1088
+ cll_column.change_status = node_diff.change.columns[c]
1089
+
1090
+ parent_map[cll_node_id] = manifest.parent_map.get(cll_node_id, [])
980
1091
 
981
1092
  # build the child map
982
1093
  for parent_id, parents in parent_map.items():
@@ -987,47 +1098,90 @@ class DbtAdapter(BaseAdapter):
987
1098
 
988
1099
  # Find the anchor nodes
989
1100
  anchor_node_ids = set()
1101
+ extra_node_ids = set()
990
1102
  if node_id is None and column is None:
991
1103
  if change_analysis:
992
1104
  # If change analysis is requested, we need to find the nodes that have changes
993
- for node_id, node_diff in self.get_lineage_diff().diff.items():
994
- if node_diff.change.category == "breaking":
995
- anchor_node_ids.add(node_id)
996
- for column_name in node_diff.change.columns:
997
- anchor_node_ids.add(f"{node_id}_{column_name}")
1105
+ lineage_diff = self.get_lineage_diff()
1106
+ for nid, nd in lineage_diff.diff.items():
1107
+ if nd.change_status == "added":
1108
+ anchor_node_ids.add(nid)
1109
+ n = lineage_diff.current["nodes"].get(nid)
1110
+ n_columns = n.get("columns", {})
1111
+ for c in n_columns:
1112
+ anchor_node_ids.add(build_column_key(nid, c))
1113
+ continue
1114
+ if nd.change_status == "removed":
1115
+ extra_node_ids.add(nid)
1116
+ continue
1117
+
1118
+ node_diff = self.get_change_analysis_cached(nid)
1119
+ if node_diff is not None and node_diff.change is not None:
1120
+ extra_node_ids.add(nid)
1121
+ if no_cll:
1122
+ if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
1123
+ anchor_node_ids.add(nid)
1124
+ else:
1125
+ if node_diff.change.category in ["breaking", "unknown"]:
1126
+ anchor_node_ids.add(nid)
1127
+ if node_diff.change.columns is not None:
1128
+ for column_name in node_diff.change.columns:
1129
+ anchor_node_ids.add(f"{nid}_{column_name}")
998
1130
  else:
999
1131
  lineage_diff = self.get_lineage_diff()
1000
1132
  anchor_node_ids = lineage_diff.diff.keys()
1001
1133
  elif node_id is not None and column is None:
1002
1134
  if change_analysis:
1003
1135
  # If change analysis is requested, we need to find the nodes that have changes
1004
- node_diff = self.get_lineage_diff().diff.get(node_id)
1005
- if node_diff:
1006
- if node_diff.change.category == "breaking":
1007
- anchor_node_ids.add(node_id)
1008
- for column_name in node_diff.change.columns:
1009
- anchor_node_ids.add(f"{node_id}_{column_name}")
1136
+ node_diff = self.get_change_analysis_cached(node_id)
1137
+ if node_diff is not None and node_diff.change is not None:
1138
+ extra_node_ids.add(node_id)
1139
+ if no_cll:
1140
+ if node_diff.change.category in ["breaking", "partial_breaking", "unknown"]:
1141
+ anchor_node_ids.add(node_id)
1142
+ else:
1143
+ if node_diff.change.category in ["breaking", "unknown"]:
1144
+ anchor_node_ids.add(node_id)
1145
+ if node_diff.change.columns is not None:
1146
+ for column_name in node_diff.change.columns:
1147
+ anchor_node_ids.add(f"{node_id}_{column_name}")
1010
1148
  else:
1011
1149
  anchor_node_ids.add(node_id)
1012
1150
  else:
1013
1151
  anchor_node_ids.add(node_id)
1152
+ if not no_cll:
1153
+ node = nodes.get(node_id)
1154
+ if node:
1155
+ for column_name in node.columns:
1156
+ column_key = build_column_key(node_id, column_name)
1157
+ anchor_node_ids.add(column_key)
1014
1158
  else:
1015
1159
  anchor_node_ids.add(f"{node_id}_{column}")
1016
1160
 
1161
+ cll_tracker.set_anchor_nodes(len(anchor_node_ids))
1017
1162
  result_node_ids = set(anchor_node_ids)
1018
- if upstream:
1163
+ if not no_upstream:
1019
1164
  result_node_ids = result_node_ids.union(find_upstream(anchor_node_ids, parent_map))
1020
- if downstream:
1165
+ if not no_downstream:
1021
1166
  result_node_ids = result_node_ids.union(find_downstream(anchor_node_ids, child_map))
1022
1167
 
1023
1168
  # Filter the nodes and columns based on the anchor nodes
1024
1169
  if not no_filter:
1025
- nodes = {k: v for k, v in nodes.items() if k in result_node_ids}
1026
- columns = {k: v for k, v in columns.items() if k in result_node_ids}
1170
+ nodes = {k: v for k, v in nodes.items() if k in result_node_ids or k in extra_node_ids}
1171
+ columns = {k: v for k, v in columns.items() if k in result_node_ids or k in extra_node_ids}
1172
+
1173
+ for node in nodes.values():
1174
+ node.columns = {
1175
+ k: v for k, v in node.columns.items() if v.id in result_node_ids or v.id in extra_node_ids
1176
+ }
1177
+
1178
+ if change_analysis:
1179
+ node.impacted = node.id in result_node_ids
1180
+
1027
1181
  parent_map, child_map = filter_dependency_maps(parent_map, child_map, result_node_ids)
1028
1182
 
1029
1183
  cll_tracker.end_column_lineage()
1030
- cll_tracker.set_total_nodes(len(nodes))
1184
+ cll_tracker.set_total_nodes(len(nodes) + len(columns))
1031
1185
  log_performance("column level lineage", cll_tracker.to_dict())
1032
1186
  cll_tracker.reset()
1033
1187
 
@@ -1046,6 +1200,9 @@ class DbtAdapter(BaseAdapter):
1046
1200
  if node is None:
1047
1201
  return None
1048
1202
 
1203
+ cll_tracker.set_total_nodes(1)
1204
+ cll_tracker.start_column_lineage()
1205
+
1049
1206
  def _apply_all_columns(node: CllNode, transformation_type):
1050
1207
  cll_data = CllData()
1051
1208
  cll_data.nodes[node.id] = node
@@ -1170,6 +1327,10 @@ class DbtAdapter(BaseAdapter):
1170
1327
  depends_on.add(parent_key)
1171
1328
  column.transformation_type = c2c_map[name].transformation_type
1172
1329
  cll_data.parent_map[column_id] = set(depends_on)
1330
+
1331
+ cll_tracker.end_column_lineage()
1332
+ log_performance("column level lineage per node", cll_tracker.to_dict())
1333
+ cll_tracker.reset()
1173
1334
  return cll_data
1174
1335
 
1175
1336
  def get_cll_node(self, node_id: str, base: Optional[bool] = False) -> Tuple[Optional[CllNode], list[str]]:
@@ -1181,21 +1342,12 @@ class DbtAdapter(BaseAdapter):
1181
1342
  # model, seed, snapshot
1182
1343
  if node_id in manifest.nodes:
1183
1344
  found = manifest.nodes[node_id]
1184
- if found.resource_type not in ["model", "seed", "snapshot"]:
1185
- return None, []
1186
-
1187
1345
  unique_id = found.unique_id
1188
- node = CllNode(
1189
- id=found.unique_id,
1190
- name=found.name,
1191
- package_name=found.package_name,
1192
- resource_type=found.resource_type,
1193
- raw_code=found.raw_code,
1194
- )
1346
+ node = CllNode.build_cll_node(manifest, "nodes", node_id)
1195
1347
  if hasattr(found.depends_on, "nodes"):
1196
1348
  parent_list = found.depends_on.nodes
1197
1349
 
1198
- if catalog is not None and unique_id in catalog.nodes:
1350
+ if catalog is not None and node is not None and unique_id in catalog.nodes:
1199
1351
  columns = {}
1200
1352
  for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
1201
1353
  column_id = f"{unique_id}_{col_name}"
@@ -1207,17 +1359,10 @@ class DbtAdapter(BaseAdapter):
1207
1359
  if node_id in manifest.sources:
1208
1360
  found = manifest.sources[node_id]
1209
1361
  unique_id = found.unique_id
1210
-
1211
- node = CllNode(
1212
- id=found.unique_id,
1213
- name=found.name,
1214
- package_name=found.package_name,
1215
- resource_type=found.resource_type,
1216
- source_name=found.source_name,
1217
- )
1362
+ node = CllNode.build_cll_node(manifest, "sources", node_id)
1218
1363
  parent_list = []
1219
1364
 
1220
- if catalog is not None and unique_id in catalog.sources:
1365
+ if catalog is not None and node is not None and unique_id in catalog.sources:
1221
1366
  columns = {}
1222
1367
  for col_name, col_metadata in catalog.sources[unique_id].columns.items():
1223
1368
  column_id = f"{unique_id}_{col_name}"
@@ -1228,13 +1373,19 @@ class DbtAdapter(BaseAdapter):
1228
1373
  # exposure
1229
1374
  if node_id in manifest.exposures:
1230
1375
  found = manifest.exposures[node_id]
1376
+ node = CllNode.build_cll_node(manifest, "exposures", node_id)
1377
+ if hasattr(found.depends_on, "nodes"):
1378
+ parent_list = found.depends_on.nodes
1231
1379
 
1232
- node = CllNode(
1233
- id=found.unique_id,
1234
- name=found.name,
1235
- package_name=found.package_name,
1236
- resource_type=found.resource_type,
1237
- )
1380
+ if hasattr(manifest, "semantic_models") and node_id in manifest.semantic_models:
1381
+ found = manifest.semantic_models[node_id]
1382
+ node = CllNode.build_cll_node(manifest, "semantic_models", node_id)
1383
+ if hasattr(found.depends_on, "nodes"):
1384
+ parent_list = found.depends_on.nodes
1385
+
1386
+ if node_id in manifest.metrics:
1387
+ found = manifest.metrics[node_id]
1388
+ node = CllNode.build_cll_node(manifest, "metrics", node_id)
1238
1389
  if hasattr(found.depends_on, "nodes"):
1239
1390
  parent_list = found.depends_on.nodes
1240
1391
 
@@ -1250,73 +1401,6 @@ class DbtAdapter(BaseAdapter):
1250
1401
  }
1251
1402
  return None
1252
1403
 
1253
- def get_impact_radius(self, node_id: str) -> CllData:
1254
- impacted_nodes = self.get_impacted_nodes(node_id)
1255
- impacted_cll = self.get_impacted_cll(node_id)
1256
-
1257
- # merge impact radius
1258
- return self._merge_cll_data(impacted_nodes, impacted_cll)
1259
-
1260
- def get_impacted_nodes(self, node_id: str) -> CllData:
1261
- lineage_diff = self.get_lineage_diff()
1262
- diff_info = lineage_diff.diff.get(node_id)
1263
- if diff_info is None:
1264
- return CllData()
1265
- change_category = diff_info.change.category
1266
-
1267
- if change_category == "breaking":
1268
- cll = self.get_cll(node_id, no_filter=True)
1269
- _, downstream = find_column_dependencies(node_id, cll.parent_map, cll.child_map)
1270
- relevant_columns = {node_id}
1271
- relevant_columns.update(downstream)
1272
- nodes, columns = filter_lineage_vertices(cll.nodes, cll.columns, relevant_columns)
1273
- p_map, c_map = filter_dependency_maps(cll.parent_map, cll.child_map, relevant_columns)
1274
-
1275
- return CllData(nodes=nodes, columns=columns, parent_map=p_map, child_map=c_map)
1276
-
1277
- return CllData()
1278
-
1279
- def get_impacted_cll(self, node_id: str) -> CllData:
1280
- lineage_diff = self.get_lineage_diff()
1281
- diff_info = lineage_diff.diff.get(node_id)
1282
- if diff_info is None:
1283
- return CllData()
1284
- change_columns = diff_info.change.columns
1285
-
1286
- cll = self.get_cll(node_id, no_filter=True)
1287
- relevant_columns = set()
1288
- for col, change_status in change_columns.items():
1289
- if change_status == "removed":
1290
- continue
1291
- target_column = f"{node_id}_{col}"
1292
- _, downstream = find_column_dependencies(target_column, cll.parent_map, cll.child_map)
1293
- relevant_columns.add(target_column)
1294
- relevant_columns.update(downstream)
1295
-
1296
- nodes, columns = filter_lineage_vertices(cll.nodes, cll.columns, relevant_columns)
1297
- p_map, c_map = filter_dependency_maps(cll.parent_map, cll.child_map, relevant_columns)
1298
-
1299
- return CllData(nodes=nodes, columns=columns, parent_map=p_map, child_map=c_map)
1300
-
1301
- @staticmethod
1302
- def _merge_cll_data(base: CllData, target: CllData) -> CllData:
1303
- merged_nodes = {**base.nodes, **target.nodes}
1304
- merged_columns = {**base.columns, **target.columns}
1305
-
1306
- merged_parent_map = {}
1307
- merged_keys = set(base.parent_map.keys()).union(set(target.parent_map.keys()))
1308
- for key in merged_keys:
1309
- merged_parent_map[key] = base.parent_map.get(key, set()).union(target.parent_map.get(key, set()))
1310
-
1311
- merged_child_map = {}
1312
- merged_keys = set(base.child_map.keys()).union(set(target.child_map.keys()))
1313
- for key in merged_keys:
1314
- merged_child_map[key] = base.child_map.get(key, set()).union(target.child_map.get(key, set()))
1315
-
1316
- return CllData(
1317
- nodes=merged_nodes, columns=merged_columns, parent_map=merged_parent_map, child_map=merged_child_map
1318
- )
1319
-
1320
1404
  def build_name_to_unique_id_index(self) -> Dict[str, str]:
1321
1405
  name_to_unique_id = {}
1322
1406
  curr_manifest = self.get_manifest(base=False)
@@ -1404,13 +1488,18 @@ class DbtAdapter(BaseAdapter):
1404
1488
  self.curr_manifest = load_manifest(path=refresh_file_path)
1405
1489
  self.manifest = as_manifest(self.curr_manifest)
1406
1490
  self.get_cll_cached.cache_clear()
1491
+ self.get_change_analysis_cached.cache_clear()
1407
1492
  elif refresh_file_path.endswith("catalog.json"):
1408
1493
  self.curr_catalog = load_catalog(path=refresh_file_path)
1494
+ self.get_cll_cached.cache_clear()
1495
+ self.get_change_analysis_cached.cache_clear()
1409
1496
  elif self.base_path and target_type == os.path.basename(self.base_path):
1410
1497
  if refresh_file_path.endswith("manifest.json"):
1411
1498
  self.base_manifest = load_manifest(path=refresh_file_path)
1499
+ self.get_change_analysis_cached.cache_clear()
1412
1500
  elif refresh_file_path.endswith("catalog.json"):
1413
1501
  self.base_catalog = load_catalog(path=refresh_file_path)
1502
+ self.get_change_analysis_cached.cache_clear()
1414
1503
 
1415
1504
  def create_relation(self, model, base=False):
1416
1505
  node = self.find_node_by_name(model, base)
@@ -1446,6 +1535,15 @@ class DbtAdapter(BaseAdapter):
1446
1535
 
1447
1536
  specs = [_parse_difference(select_list, exclude_list)]
1448
1537
 
1538
+ # If packages is not provided, use the project name from manifest metadata as default
1539
+ if packages is None:
1540
+ if (
1541
+ self.manifest.metadata
1542
+ and hasattr(self.manifest.metadata, "project_name")
1543
+ and self.manifest.metadata.project_name
1544
+ ):
1545
+ packages = [self.manifest.metadata.project_name]
1546
+
1449
1547
  if packages is not None:
1450
1548
  package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
1451
1549
  specs.append(package_spec)
@@ -1522,7 +1620,7 @@ class DbtAdapter(BaseAdapter):
1522
1620
  if not os.path.isfile(path):
1523
1621
  return None
1524
1622
 
1525
- with open(path, "r") as f:
1623
+ with open(path, "r", encoding="utf-8") as f:
1526
1624
  json_content = f.read()
1527
1625
  return json.loads(json_content)
1528
1626