recce-nightly 1.3.0.20250507__py3-none-any.whl → 1.4.0.20250515__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (93) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +22 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +355 -316
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +44 -49
  12. recce/cli.py +484 -285
  13. recce/config.py +42 -33
  14. recce/core.py +52 -44
  15. recce/data/404.html +1 -1
  16. recce/data/_next/static/chunks/{368-7587b306577df275.js → 778-aef312bffb4c0312.js} +15 -15
  17. recce/data/_next/static/chunks/8d700b6a.ed11a130057c7a47.js +1 -0
  18. recce/data/_next/static/chunks/app/layout-c713a2829d3279e4.js +1 -0
  19. recce/data/_next/static/chunks/app/page-7086764277331fcb.js +1 -0
  20. recce/data/_next/static/chunks/{cd9f8d63-cf0d5a7b0f7a92e8.js → cd9f8d63-e020f408095ed77c.js} +3 -3
  21. recce/data/_next/static/chunks/webpack-b787cb1a4f2293de.js +1 -0
  22. recce/data/_next/static/css/88b8abc134cfd59a.css +3 -0
  23. recce/data/index.html +2 -2
  24. recce/data/index.txt +2 -2
  25. recce/diff.py +6 -12
  26. recce/event/__init__.py +74 -72
  27. recce/event/collector.py +27 -20
  28. recce/event/track.py +39 -27
  29. recce/exceptions.py +1 -1
  30. recce/git.py +7 -7
  31. recce/github.py +57 -53
  32. recce/models/__init__.py +1 -1
  33. recce/models/check.py +6 -7
  34. recce/models/run.py +1 -0
  35. recce/models/types.py +27 -27
  36. recce/pull_request.py +26 -24
  37. recce/run.py +148 -111
  38. recce/server.py +103 -89
  39. recce/state.py +209 -177
  40. recce/summary.py +168 -143
  41. recce/tasks/__init__.py +3 -3
  42. recce/tasks/core.py +11 -13
  43. recce/tasks/dataframe.py +19 -17
  44. recce/tasks/histogram.py +69 -34
  45. recce/tasks/lineage.py +2 -2
  46. recce/tasks/profile.py +147 -86
  47. recce/tasks/query.py +139 -87
  48. recce/tasks/rowcount.py +33 -30
  49. recce/tasks/schema.py +14 -14
  50. recce/tasks/top_k.py +35 -35
  51. recce/tasks/valuediff.py +216 -152
  52. recce/util/breaking.py +77 -84
  53. recce/util/cll.py +55 -51
  54. recce/util/io.py +19 -17
  55. recce/util/logger.py +1 -1
  56. recce/util/recce_cloud.py +70 -72
  57. recce/util/singleton.py +4 -4
  58. recce/yaml/__init__.py +7 -10
  59. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/METADATA +5 -2
  60. recce_nightly-1.4.0.20250515.dist-info/RECORD +143 -0
  61. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/WHEEL +1 -1
  62. tests/adapter/dbt_adapter/conftest.py +1 -0
  63. tests/adapter/dbt_adapter/dbt_test_helper.py +28 -18
  64. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  65. tests/adapter/dbt_adapter/test_dbt_cll.py +39 -32
  66. tests/adapter/dbt_adapter/test_selector.py +22 -21
  67. tests/tasks/test_histogram.py +58 -66
  68. tests/tasks/test_lineage.py +36 -23
  69. tests/tasks/test_preset_checks.py +45 -31
  70. tests/tasks/test_profile.py +340 -15
  71. tests/tasks/test_query.py +40 -40
  72. tests/tasks/test_row_count.py +65 -46
  73. tests/tasks/test_schema.py +65 -42
  74. tests/tasks/test_top_k.py +22 -18
  75. tests/tasks/test_valuediff.py +43 -32
  76. tests/test_cli.py +71 -58
  77. tests/test_config.py +7 -9
  78. tests/test_core.py +5 -3
  79. tests/test_dbt.py +7 -7
  80. tests/test_pull_request.py +1 -1
  81. tests/test_server.py +19 -13
  82. tests/test_state.py +40 -27
  83. tests/test_summary.py +18 -14
  84. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  85. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  86. recce/data/_next/static/chunks/app/page-92f13c8fad9fae3d.js +0 -1
  87. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  88. recce_nightly-1.3.0.20250507.dist-info/RECORD +0 -142
  89. /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → q0Xsc9Sd6PDuo1lshYpLu}/_buildManifest.js +0 -0
  90. /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → q0Xsc9Sd6PDuo1lshYpLu}/_ssgManifest.js +0 -0
  91. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/entry_points.txt +0 -0
  92. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/licenses/LICENSE +0 -0
  93. {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250515.dist-info}/top_level.txt +0 -0
@@ -8,14 +8,27 @@ from dataclasses import dataclass, fields
8
8
  from errno import ENOENT
9
9
  from functools import lru_cache
10
10
  from pathlib import Path
11
- from typing import Callable, Dict, List, Optional, Tuple, Iterator, Any, Set, Union, Literal, Type
11
+ from typing import (
12
+ Any,
13
+ Callable,
14
+ Dict,
15
+ Iterator,
16
+ List,
17
+ Literal,
18
+ Optional,
19
+ Set,
20
+ Tuple,
21
+ Type,
22
+ Union,
23
+ )
12
24
 
13
25
  from recce.event import log_performance
14
26
  from recce.exceptions import RecceException
15
- from recce.util.cll import cll, CLLPerformanceTracking
16
- from recce.util.lineage import find_upstream, find_downstream
27
+ from recce.util.cll import CLLPerformanceTracking, cll
28
+ from recce.util.lineage import find_downstream, find_upstream
29
+
17
30
  from ...tasks.profile import ProfileTask
18
- from ...util.breaking import parse_change_category, BreakingPerformanceTracking
31
+ from ...util.breaking import BreakingPerformanceTracking, parse_change_category
19
32
 
20
33
  try:
21
34
  import agate
@@ -30,11 +43,23 @@ from watchdog.observers import Observer
30
43
 
31
44
  from recce.adapter.base import BaseAdapter
32
45
  from recce.state import ArtifactsRoot
33
- from .dbt_version import DbtVersion
46
+
34
47
  from ...models import RunType
35
- from ...models.types import LineageDiff, NodeDiff, NodeChange
36
- from ...tasks import Task, QueryTask, QueryBaseTask, QueryDiffTask, ValueDiffTask, ValueDiffDetailTask, ProfileDiffTask, \
37
- RowCountTask, RowCountDiffTask, TopKDiffTask, HistogramDiffTask
48
+ from ...models.types import LineageDiff, NodeChange, NodeDiff
49
+ from ...tasks import (
50
+ HistogramDiffTask,
51
+ ProfileDiffTask,
52
+ QueryBaseTask,
53
+ QueryDiffTask,
54
+ QueryTask,
55
+ RowCountDiffTask,
56
+ RowCountTask,
57
+ Task,
58
+ TopKDiffTask,
59
+ ValueDiffDetailTask,
60
+ ValueDiffTask,
61
+ )
62
+ from .dbt_version import DbtVersion
38
63
 
39
64
  dbt_supported_registry: Dict[RunType, Type[Task]] = {
40
65
  RunType.QUERY: QueryTask,
@@ -56,7 +81,7 @@ get_adapter_orig = dbt.adapters.factory.get_adapter
56
81
 
57
82
 
58
83
  def get_adapter(config):
59
- if hasattr(config, 'adapter'):
84
+ if hasattr(config, "adapter"):
60
85
  return config.adapter
61
86
  else:
62
87
  return get_adapter_orig(config)
@@ -69,7 +94,11 @@ from dbt.adapters.base import Column # noqa: E402
69
94
  from dbt.adapters.factory import get_adapter_class_by_name # noqa: E402
70
95
  from dbt.adapters.sql import SQLAdapter # noqa: E402
71
96
  from dbt.config.runtime import RuntimeConfig # noqa: E402
72
- from dbt.contracts.graph.manifest import Manifest, WritableManifest, MacroManifest # noqa: E402
97
+ from dbt.contracts.graph.manifest import ( # noqa: E402
98
+ MacroManifest,
99
+ Manifest,
100
+ WritableManifest,
101
+ )
73
102
  from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
74
103
  from dbt.contracts.results import CatalogArtifact # noqa: E402
75
104
  from dbt.flags import set_from_args # noqa: E402
@@ -78,7 +107,7 @@ from dbt.parser.sql import SqlBlockParser # noqa: E402
78
107
 
79
108
  dbt_version = DbtVersion()
80
109
 
81
- if dbt_version < 'v1.8':
110
+ if dbt_version < "v1.8":
82
111
  from dbt.contracts.connection import Connection
83
112
  else:
84
113
  from dbt.adapters.contracts.connection import Connection
@@ -86,19 +115,21 @@ else:
86
115
 
87
116
  @contextmanager
88
117
  def silence_no_nodes_warning():
89
- if dbt_version >= 'v1.8':
118
+ if dbt_version >= "v1.8":
90
119
  from dbt.events.types import NoNodesForSelectionCriteria
91
120
  from dbt_common.events.functions import WARN_ERROR_OPTIONS
121
+
92
122
  WARN_ERROR_OPTIONS.silence.append(NoNodesForSelectionCriteria.__name__)
93
123
  try:
94
124
  yield
95
125
  finally:
96
- if dbt_version >= 'v1.8':
126
+ if dbt_version >= "v1.8":
97
127
  from dbt_common.events.functions import WARN_ERROR_OPTIONS
128
+
98
129
  WARN_ERROR_OPTIONS.silence.pop()
99
130
 
100
131
 
101
- logger = logging.getLogger('uvicorn')
132
+ logger = logging.getLogger("uvicorn")
102
133
 
103
134
 
104
135
  class ArtifactsEventHandler(FileSystemEventHandler):
@@ -147,16 +178,18 @@ class EnvironmentEventHandler(FileSystemEventHandler):
147
178
 
148
179
 
149
180
  def merge_tables(tables: List[agate.Table]) -> agate.Table:
150
- if dbt_version < 'v1.8':
181
+ if dbt_version < "v1.8":
151
182
  from dbt.clients.agate_helper import merge_tables
183
+
152
184
  return merge_tables(tables)
153
185
  else:
154
186
  from dbt_common.clients.agate_helper import merge_tables
187
+
155
188
  return merge_tables(tables)
156
189
 
157
190
 
158
191
  def as_manifest(m: WritableManifest) -> Manifest:
159
- if dbt_version < 'v1.8':
192
+ if dbt_version < "v1.8":
160
193
  data = m.__dict__
161
194
  all_fields = set([x.name for x in fields(Manifest)])
162
195
  new_data = {k: v for k, v in data.items() if k in all_fields}
@@ -184,12 +217,13 @@ def load_catalog(path: str = None, data: dict = None):
184
217
 
185
218
 
186
219
  def previous_state(state_path: Path, target_path: Path, project_root: Path) -> PreviousState:
187
- if dbt_version < 'v1.5.2':
220
+ if dbt_version < "v1.5.2":
188
221
  return PreviousState(state_path, target_path)
189
222
  else:
190
223
  try:
191
224
  # Overwrite the level_tag method temporarily to avoid the warning message
192
- from dbt.events.types import WarnStateTargetEqual, EventLevel
225
+ from dbt.events.types import EventLevel, WarnStateTargetEqual
226
+
193
227
  original_level_tag_func = WarnStateTargetEqual.level_tag
194
228
  WarnStateTargetEqual.level_tag = lambda x: EventLevel.DEBUG
195
229
  except ImportError:
@@ -209,12 +243,12 @@ def previous_state(state_path: Path, target_path: Path, project_root: Path) -> P
209
243
  def default_profiles_dir():
210
244
  # Precedence: DBT_PROFILES_DIR > current working directory > ~/.dbt/
211
245
  # https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
212
- if os.getenv('DBT_PROFILES_DIR'):
213
- return os.getenv('DBT_PROFILES_DIR')
214
- elif os.path.exists(os.path.join(os.getcwd(), 'profiles.yml')):
246
+ if os.getenv("DBT_PROFILES_DIR"):
247
+ return os.getenv("DBT_PROFILES_DIR")
248
+ elif os.path.exists(os.path.join(os.getcwd(), "profiles.yml")):
215
249
  return os.getcwd()
216
250
  else:
217
- return os.path.expanduser('~/.dbt/')
251
+ return os.path.expanduser("~/.dbt/")
218
252
 
219
253
 
220
254
  @dataclass()
@@ -222,12 +256,13 @@ class DbtArgs:
222
256
  """
223
257
  Used for RuntimeConfig.from_args
224
258
  """
225
- threads: Optional[int] = 1,
226
- target: Optional[str] = None,
227
- profiles_dir: Optional[str] = None,
228
- project_dir: Optional[str] = None,
229
- profile: Optional[str] = None,
230
- target_path: Optional[str] = None,
259
+
260
+ threads: Optional[int] = (1,)
261
+ target: Optional[str] = (None,)
262
+ profiles_dir: Optional[str] = (None,)
263
+ project_dir: Optional[str] = (None,)
264
+ profile: Optional[str] = (None,)
265
+ target_path: Optional[str] = (None,)
231
266
  project_only_flags: Optional[Dict[str, Any]] = None
232
267
  which: Optional[str] = None
233
268
  state_modified_compare_more_unrendered_values: Optional[bool] = False # new flag added since dbt v1.9
@@ -258,32 +293,18 @@ class DbtAdapter(BaseAdapter):
258
293
 
259
294
  def support_tasks(self):
260
295
  support_map = {run_type.value: True for run_type in dbt_supported_registry}
261
- supported_dbt_packages = set([package.package_name for package in self.manifest.macros.values()])
262
-
263
- if 'dbt_profiler' not in supported_dbt_packages:
264
- support_map[RunType.PROFILE_DIFF.value] = False
265
- support_map[RunType.PROFILE.value] = False
266
-
267
- if 'audit_helper' not in supported_dbt_packages:
268
- support_map[RunType.VALUE_DIFF.value] = False
269
- support_map[RunType.VALUE_DIFF_DETAIL.value] = False
270
- support_map['query_diff_with_primary_key'] = False
271
296
 
272
297
  return support_map
273
298
 
274
299
  @classmethod
275
- def load(cls,
276
- no_artifacts=False,
277
- review=False,
278
- **kwargs):
300
+ def load(cls, no_artifacts=False, review=False, **kwargs):
301
+ target = kwargs.get("target")
302
+ target_path = kwargs.get("target_path", "target")
303
+ target_base_path = kwargs.get("target_base_path", "target-base")
279
304
 
280
- target = kwargs.get('target')
281
- target_path = kwargs.get('target_path', 'target')
282
- target_base_path = kwargs.get('target_base_path', 'target-base')
283
-
284
- profile_name = kwargs.get('profile')
285
- project_dir = kwargs.get('project_dir')
286
- profiles_dir = kwargs.get('profiles_dir')
305
+ profile_name = kwargs.get("profile")
306
+ project_dir = kwargs.get("project_dir")
307
+ profiles_dir = kwargs.get("profiles_dir")
287
308
 
288
309
  if profiles_dir is None:
289
310
  profiles_dir = default_profiles_dir()
@@ -297,21 +318,25 @@ class DbtAdapter(BaseAdapter):
297
318
  profiles_dir=profiles_dir,
298
319
  profile=profile_name,
299
320
  project_only_flags={},
300
- which='list'
321
+ which="list",
301
322
  )
302
323
  set_from_args(args, args)
303
324
 
304
325
  from dbt.exceptions import DbtProjectError
326
+
305
327
  try:
306
328
  # adapter
307
- if dbt_version < 'v1.8':
329
+ if dbt_version < "v1.8":
308
330
  runtime_config = RuntimeConfig.from_args(args)
309
331
  adapter_name = runtime_config.credentials.type
310
332
  adapter_cls = get_adapter_class_by_name(adapter_name)
311
333
  adapter: SQLAdapter = adapter_cls(runtime_config)
312
334
  else:
313
- from dbt_common.context import set_invocation_context, get_invocation_context
314
335
  from dbt.mp_context import get_mp_context
336
+ from dbt_common.context import (
337
+ get_invocation_context,
338
+ set_invocation_context,
339
+ )
315
340
 
316
341
  set_invocation_context({})
317
342
  get_invocation_context()._env = dict(os.environ)
@@ -320,6 +345,7 @@ class DbtAdapter(BaseAdapter):
320
345
  adapter_cls = get_adapter_class_by_name(adapter_name)
321
346
  adapter: SQLAdapter = adapter_cls(runtime_config, get_mp_context())
322
347
  from dbt.adapters.factory import FACTORY
348
+
323
349
  FACTORY.adapters[adapter_name] = adapter
324
350
 
325
351
  adapter.connections.set_connection_name()
@@ -329,7 +355,7 @@ class DbtAdapter(BaseAdapter):
329
355
  runtime_config=runtime_config,
330
356
  adapter=adapter,
331
357
  review_mode=review,
332
- base_path=target_base_path
358
+ base_path=target_base_path,
333
359
  )
334
360
  except DbtProjectError as e:
335
361
  raise e
@@ -350,27 +376,26 @@ class DbtAdapter(BaseAdapter):
350
376
 
351
377
  def get_columns(self, model: str, base=False) -> List[Column]:
352
378
  relation = self.create_relation(model, base)
353
- get_columns_macro = 'get_columns_in_relation'
354
- if self.adapter.connections.TYPE == 'databricks':
355
- get_columns_macro = 'get_columns_comments'
379
+ get_columns_macro = "get_columns_in_relation"
380
+ if self.adapter.connections.TYPE == "databricks":
381
+ get_columns_macro = "get_columns_comments"
356
382
 
357
- if dbt_version < 'v1.8':
383
+ if dbt_version < "v1.8":
358
384
  columns = self.adapter.execute_macro(
359
- get_columns_macro,
360
- kwargs={"relation": relation},
361
- manifest=self.manifest)
385
+ get_columns_macro, kwargs={"relation": relation}, manifest=self.manifest
386
+ )
362
387
  else:
363
388
  from dbt.context.providers import generate_runtime_macro_context
389
+
364
390
  macro_manifest = MacroManifest(self.manifest.macros)
365
391
  self.adapter.set_macro_resolver(macro_manifest)
366
392
  self.adapter.set_macro_context_generator(generate_runtime_macro_context)
367
- columns = self.adapter.execute_macro(
368
- get_columns_macro,
369
- kwargs={"relation": relation})
393
+ columns = self.adapter.execute_macro(get_columns_macro, kwargs={"relation": relation})
370
394
 
371
- if self.adapter.connections.TYPE == 'databricks':
395
+ if self.adapter.connections.TYPE == "databricks":
372
396
  # reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
373
397
  from dbt.adapters.databricks import DatabricksColumn
398
+
374
399
  rows = columns
375
400
  columns = []
376
401
  for row in rows:
@@ -378,7 +403,9 @@ class DbtAdapter(BaseAdapter):
378
403
  break
379
404
  columns.append(
380
405
  DatabricksColumn(
381
- column=row["col_name"], dtype=row["data_type"], comment=row["comment"]
406
+ column=row["col_name"],
407
+ dtype=row["data_type"],
408
+ comment=row["comment"],
382
409
  )
383
410
  )
384
411
  return columns
@@ -389,29 +416,29 @@ class DbtAdapter(BaseAdapter):
389
416
  manifest = self.curr_manifest if base is False else self.base_manifest
390
417
  manifest_dict = manifest.to_dict()
391
418
 
392
- node = manifest_dict['nodes'].get(model_id)
419
+ node = manifest_dict["nodes"].get(model_id)
393
420
  if node is None:
394
421
  return {}
395
422
 
396
- node_name = node['name']
397
- with self.adapter.connection_named('model'):
423
+ node_name = node["name"]
424
+ with self.adapter.connection_named("model"):
398
425
  columns = [column for column in self.get_columns(node_name, base=base)]
399
426
 
400
- child_map: List[str] = manifest_dict['child_map'][model_id]
427
+ child_map: List[str] = manifest_dict["child_map"][model_id]
401
428
  cols_not_null = []
402
429
  cols_unique = []
403
430
 
404
431
  for child in child_map:
405
- comps = child.split('.')
432
+ comps = child.split(".")
406
433
  child_type = comps[0]
407
434
  child_name = comps[2]
408
435
 
409
- not_null_prefix = f'not_null_{node_name}_'
410
- if child_type == 'test' and child_name.startswith(not_null_prefix):
411
- cols_not_null.append(child_name[len(not_null_prefix):])
412
- unique_prefix = f'unique_{node_name}_'
413
- if child_type == 'test' and child_name.startswith(unique_prefix):
414
- cols_unique.append(child_name[len(unique_prefix):])
436
+ not_null_prefix = f"not_null_{node_name}_"
437
+ if child_type == "test" and child_name.startswith(not_null_prefix):
438
+ cols_not_null.append(child_name[len(not_null_prefix) :])
439
+ unique_prefix = f"unique_{node_name}_"
440
+ if child_type == "test" and child_name.startswith(unique_prefix):
441
+ cols_unique.append(child_name[len(unique_prefix) :])
415
442
 
416
443
  columns_info = {}
417
444
  primary_key = None
@@ -419,16 +446,16 @@ class DbtAdapter(BaseAdapter):
419
446
  col_name = c.column
420
447
  col = dict(name=col_name, type=c.dtype)
421
448
  if col_name in cols_not_null:
422
- col['not_null'] = True
449
+ col["not_null"] = True
423
450
  if col_name in cols_unique:
424
- col['unique'] = True
451
+ col["unique"] = True
425
452
  if not primary_key:
426
453
  primary_key = col_name
427
454
  columns_info[col_name] = col
428
455
 
429
456
  result = dict(columns=columns_info)
430
457
  if primary_key:
431
- result['primary_key'] = primary_key
458
+ result["primary_key"] = primary_key
432
459
 
433
460
  return result
434
461
 
@@ -437,7 +464,7 @@ class DbtAdapter(BaseAdapter):
437
464
  Load the artifacts from the 'target' and 'target-base' directory
438
465
  """
439
466
  if self.runtime_config is None:
440
- raise Exception('Cannot find the dbt project configuration')
467
+ raise Exception("Cannot find the dbt project configuration")
441
468
 
442
469
  project_root = self.runtime_config.project_root
443
470
  target_path = self.runtime_config.target_path
@@ -446,17 +473,17 @@ class DbtAdapter(BaseAdapter):
446
473
  self.base_path = os.path.join(project_root, target_base_path)
447
474
 
448
475
  # load the artifacts
449
- path = os.path.join(project_root, target_path, 'manifest.json')
476
+ path = os.path.join(project_root, target_path, "manifest.json")
450
477
  curr_manifest = load_manifest(path=path)
451
478
  if curr_manifest is None:
452
479
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
453
- path = os.path.join(project_root, target_base_path, 'manifest.json')
480
+ path = os.path.join(project_root, target_base_path, "manifest.json")
454
481
  base_manifest = load_manifest(path=path)
455
482
  if base_manifest is None:
456
483
  raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
457
484
 
458
- curr_catalog = load_catalog(path=os.path.join(project_root, target_path, 'catalog.json'))
459
- base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, 'catalog.json'))
485
+ curr_catalog = load_catalog(path=os.path.join(project_root, target_path, "catalog.json"))
486
+ base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, "catalog.json"))
460
487
 
461
488
  # set the value if all the artifacts are loaded successfully
462
489
  self.curr_manifest = curr_manifest
@@ -474,22 +501,21 @@ class DbtAdapter(BaseAdapter):
474
501
 
475
502
  # set the file paths to watch
476
503
  self.artifacts_files = [
477
- os.path.join(project_root, target_path, 'manifest.json'),
478
- os.path.join(project_root, target_path, 'catalog.json'),
479
- os.path.join(project_root, target_base_path, 'manifest.json'),
480
- os.path.join(project_root, target_base_path, 'catalog.json'),
504
+ os.path.join(project_root, target_path, "manifest.json"),
505
+ os.path.join(project_root, target_path, "catalog.json"),
506
+ os.path.join(project_root, target_base_path, "manifest.json"),
507
+ os.path.join(project_root, target_base_path, "catalog.json"),
481
508
  ]
482
509
 
483
510
  def is_python_model(self, node_id: str, base: Optional[bool] = False):
484
511
  manifest = self.curr_manifest if base is False else self.base_manifest
485
512
  model = manifest.nodes.get(node_id)
486
- if hasattr(model, 'language'):
487
- return model.language == 'python'
513
+ if hasattr(model, "language"):
514
+ return model.language == "python"
488
515
 
489
516
  return False
490
517
 
491
518
  def find_node_by_name(self, node_name, base=False) -> Optional[ManifestNode]:
492
-
493
519
  manifest = self.curr_manifest if base is False else self.base_manifest
494
520
 
495
521
  for key, node in manifest.nodes.items():
@@ -499,22 +525,22 @@ class DbtAdapter(BaseAdapter):
499
525
  return None
500
526
 
501
527
  def get_node_name_by_id(self, unique_id):
502
- if unique_id.startswith('source.'):
528
+ if unique_id.startswith("source."):
503
529
  if unique_id in self.curr_manifest.sources:
504
530
  return self.curr_manifest.sources[unique_id].name
505
531
  elif unique_id in self.base_manifest.sources:
506
532
  return self.base_manifest.sources[unique_id].name
507
- elif unique_id.startswith('metric.'):
533
+ elif unique_id.startswith("metric."):
508
534
  if unique_id in self.curr_manifest.metrics:
509
535
  return self.curr_manifest.metrics[unique_id].name
510
536
  elif unique_id in self.base_manifest.metrics:
511
537
  return self.base_manifest.metrics[unique_id].name
512
- elif unique_id.startswith('exposure.'):
538
+ elif unique_id.startswith("exposure."):
513
539
  if unique_id in self.curr_manifest.exposures:
514
540
  return self.curr_manifest.exposures[unique_id].name
515
541
  elif unique_id in self.base_manifest.exposures:
516
542
  return self.base_manifest.exposures[unique_id].name
517
- elif unique_id.startswith('semantic_model.'):
543
+ elif unique_id.startswith("semantic_model."):
518
544
  if unique_id in self.curr_manifest.semantic_models:
519
545
  return self.curr_manifest.semantic_models[unique_id].name
520
546
  elif unique_id in self.base_manifest.semantic_models:
@@ -529,14 +555,24 @@ class DbtAdapter(BaseAdapter):
529
555
  def get_manifest(self, base: bool):
530
556
  return self.curr_manifest if base is False else self.base_manifest
531
557
 
532
- def generate_sql(self, sql_template: str, base: bool = False, context=None, provided_manifest=None):
558
+ def generate_sql(
559
+ self,
560
+ sql_template: str,
561
+ base: bool = False,
562
+ context=None,
563
+ provided_manifest=None,
564
+ ):
533
565
  if context is None:
534
566
  context = {}
535
567
  manifest = provided_manifest if provided_manifest is not None else as_manifest(self.get_manifest(base))
536
568
  parser = SqlBlockParser(self.runtime_config, manifest, self.runtime_config)
537
569
 
538
- if dbt_version >= dbt_version.parse('v1.8'):
539
- from dbt_common.context import set_invocation_context, get_invocation_context
570
+ if dbt_version >= dbt_version.parse("v1.8"):
571
+ from dbt_common.context import (
572
+ get_invocation_context,
573
+ set_invocation_context,
574
+ )
575
+
540
576
  set_invocation_context({})
541
577
  get_invocation_context()._env = dict(os.environ)
542
578
 
@@ -544,21 +580,27 @@ class DbtAdapter(BaseAdapter):
544
580
  node = parser.parse_remote(sql_template, node_id)
545
581
  process_node(self.runtime_config, manifest, node)
546
582
 
547
- if dbt_version < dbt_version.parse('v1.8'):
583
+ if dbt_version < dbt_version.parse("v1.8"):
548
584
  compiler = self.adapter.get_compiler()
549
585
  compiler.compile_node(node, manifest, context)
550
586
  return node.compiled_code
551
587
  else:
552
- from dbt.context.providers import generate_runtime_model_context
553
588
  from dbt.clients import jinja
589
+ from dbt.context.providers import generate_runtime_model_context
590
+
554
591
  jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
555
592
  jinja_ctx.update(context)
556
593
  compiled_code = jinja.get_rendered(sql_template, jinja_ctx, node)
557
594
  return compiled_code
558
595
 
559
- def execute(self, sql: str, auto_begin: bool = False, fetch: bool = False, limit: Optional[int] = None) -> Tuple[
560
- any, agate.Table]:
561
- if dbt_version < dbt_version.parse('v1.6'):
596
+ def execute(
597
+ self,
598
+ sql: str,
599
+ auto_begin: bool = False,
600
+ fetch: bool = False,
601
+ limit: Optional[int] = None,
602
+ ) -> Tuple[any, agate.Table]:
603
+ if dbt_version < dbt_version.parse("v1.6"):
562
604
  return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch)
563
605
 
564
606
  return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
@@ -569,7 +611,7 @@ class DbtAdapter(BaseAdapter):
569
611
 
570
612
  node_ids = nodes.keys()
571
613
  parent_map = {}
572
- for k, parents in manifest_dict['parent_map'].items():
614
+ for k, parents in manifest_dict["parent_map"].items():
573
615
  if k not in node_ids:
574
616
  continue
575
617
  parent_map[k] = [parent for parent in parents if parent in node_ids]
@@ -580,8 +622,8 @@ class DbtAdapter(BaseAdapter):
580
622
  manifest = self.curr_manifest if base is False else self.base_manifest
581
623
  manifest_dict = manifest.to_dict()
582
624
 
583
- if node_id in manifest_dict['parent_map']:
584
- return manifest_dict['parent_map'][node_id]
625
+ if node_id in manifest_dict["parent_map"]:
626
+ return manifest_dict["parent_map"][node_id]
585
627
 
586
628
  def get_lineage(self, base: Optional[bool] = False):
587
629
  manifest = self.curr_manifest if base is False else self.base_manifest
@@ -590,12 +632,14 @@ class DbtAdapter(BaseAdapter):
590
632
  return self.get_lineage_cached(base, cache_key)
591
633
 
592
634
  def get_lineage_diff(self) -> LineageDiff:
593
- cache_key = hash((
594
- id(self.base_manifest),
595
- id(self.base_catalog),
596
- id(self.curr_manifest),
597
- id(self.curr_catalog),
598
- ))
635
+ cache_key = hash(
636
+ (
637
+ id(self.base_manifest),
638
+ id(self.base_catalog),
639
+ id(self.curr_manifest),
640
+ id(self.curr_catalog),
641
+ )
642
+ )
599
643
  return self._get_lineage_diff_cached(cache_key)
600
644
 
601
645
  @lru_cache(maxsize=2)
@@ -614,35 +658,35 @@ class DbtAdapter(BaseAdapter):
614
658
 
615
659
  nodes = {}
616
660
 
617
- for node in manifest_dict['nodes'].values():
618
- unique_id = node['unique_id']
619
- resource_type = node['resource_type']
661
+ for node in manifest_dict["nodes"].values():
662
+ unique_id = node["unique_id"]
663
+ resource_type = node["resource_type"]
620
664
 
621
- if resource_type not in ['model', 'seed', 'exposure', 'snapshot']:
665
+ if resource_type not in ["model", "seed", "exposure", "snapshot"]:
622
666
  continue
623
667
 
624
668
  nodes[unique_id] = {
625
- 'id': node['unique_id'],
626
- 'name': node['name'],
627
- 'resource_type': node['resource_type'],
628
- 'package_name': node['package_name'],
629
- 'schema': node['schema'],
630
- 'config': node['config'],
631
- 'checksum': node['checksum'],
632
- 'raw_code': node['raw_code'],
669
+ "id": node["unique_id"],
670
+ "name": node["name"],
671
+ "resource_type": node["resource_type"],
672
+ "package_name": node["package_name"],
673
+ "schema": node["schema"],
674
+ "config": node["config"],
675
+ "checksum": node["checksum"],
676
+ "raw_code": node["raw_code"],
633
677
  }
634
678
 
635
679
  # List of <type>.<package_name>.<node_name>.<hash>
636
680
  # model.jaffle_shop.customer_segments
637
681
  # test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d
638
682
  # test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1
639
- child_map: List[str] = manifest_dict['child_map'][unique_id]
683
+ child_map: List[str] = manifest_dict["child_map"][unique_id]
640
684
  cols_not_null = []
641
685
  cols_unique = []
642
686
 
643
687
  for child in child_map:
644
- node_name = node['name']
645
- comps = child.split('.')
688
+ node_name = node["name"]
689
+ comps = child.split(".")
646
690
  if len(comps) < 2:
647
691
  # only happens in unittest
648
692
  continue
@@ -650,12 +694,12 @@ class DbtAdapter(BaseAdapter):
650
694
  child_type = comps[0]
651
695
  child_name = comps[2]
652
696
 
653
- not_null_prefix = f'not_null_{node_name}_'
654
- if child_type == 'test' and child_name.startswith(not_null_prefix):
655
- cols_not_null.append(child_name[len(not_null_prefix):])
656
- unique_prefix = f'unique_{node_name}_'
657
- if child_type == 'test' and child_name.startswith(unique_prefix):
658
- cols_unique.append(child_name[len(unique_prefix):])
697
+ not_null_prefix = f"not_null_{node_name}_"
698
+ if child_type == "test" and child_name.startswith(not_null_prefix):
699
+ cols_not_null.append(child_name[len(not_null_prefix) :])
700
+ unique_prefix = f"unique_{node_name}_"
701
+ if child_type == "test" and child_name.startswith(unique_prefix):
702
+ cols_unique.append(child_name[len(unique_prefix) :])
659
703
 
660
704
  if catalog is not None and unique_id in catalog.nodes:
661
705
  columns = {}
@@ -663,61 +707,58 @@ class DbtAdapter(BaseAdapter):
663
707
  for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
664
708
  col = dict(name=col_name, type=col_metadata.type)
665
709
  if col_name in cols_not_null:
666
- col['not_null'] = True
710
+ col["not_null"] = True
667
711
  if col_name in cols_unique:
668
- col['unique'] = True
712
+ col["unique"] = True
669
713
  if not primary_key:
670
714
  primary_key = col_name
671
715
  columns[col_name] = col
672
- nodes[unique_id]['columns'] = columns
716
+ nodes[unique_id]["columns"] = columns
673
717
  if primary_key:
674
- nodes[unique_id]['primary_key'] = primary_key
718
+ nodes[unique_id]["primary_key"] = primary_key
675
719
 
676
- for source in manifest_dict['sources'].values():
677
- unique_id = source['unique_id']
720
+ for source in manifest_dict["sources"].values():
721
+ unique_id = source["unique_id"]
678
722
 
679
723
  nodes[unique_id] = {
680
- 'id': source['unique_id'],
681
- 'name': source['name'],
682
- 'resource_type': source['resource_type'],
683
- 'package_name': source['package_name'],
684
- 'config': source['config'],
724
+ "id": source["unique_id"],
725
+ "name": source["name"],
726
+ "resource_type": source["resource_type"],
727
+ "package_name": source["package_name"],
728
+ "config": source["config"],
685
729
  }
686
730
 
687
731
  if catalog is not None and unique_id in catalog.sources:
688
- nodes[unique_id]['columns'] = {
689
- col_name: {
690
- 'name': col_name,
691
- 'type': col_metadata.type
692
- }
732
+ nodes[unique_id]["columns"] = {
733
+ col_name: {"name": col_name, "type": col_metadata.type}
693
734
  for col_name, col_metadata in catalog.sources[unique_id].columns.items()
694
735
  }
695
736
 
696
- for exposure in manifest_dict['exposures'].values():
697
- nodes[exposure['unique_id']] = {
698
- 'id': exposure['unique_id'],
699
- 'name': exposure['name'],
700
- 'resource_type': exposure['resource_type'],
701
- 'package_name': exposure['package_name'],
702
- 'config': exposure['config'],
737
+ for exposure in manifest_dict["exposures"].values():
738
+ nodes[exposure["unique_id"]] = {
739
+ "id": exposure["unique_id"],
740
+ "name": exposure["name"],
741
+ "resource_type": exposure["resource_type"],
742
+ "package_name": exposure["package_name"],
743
+ "config": exposure["config"],
703
744
  }
704
- for metric in manifest_dict['metrics'].values():
705
- nodes[metric['unique_id']] = {
706
- 'id': metric['unique_id'],
707
- 'name': metric['name'],
708
- 'resource_type': metric['resource_type'],
709
- 'package_name': metric['package_name'],
710
- 'config': metric['config'],
745
+ for metric in manifest_dict["metrics"].values():
746
+ nodes[metric["unique_id"]] = {
747
+ "id": metric["unique_id"],
748
+ "name": metric["name"],
749
+ "resource_type": metric["resource_type"],
750
+ "package_name": metric["package_name"],
751
+ "config": metric["config"],
711
752
  }
712
753
 
713
- if 'semantic_models' in manifest_dict:
714
- for semantic_models in manifest_dict['semantic_models'].values():
715
- nodes[semantic_models['unique_id']] = {
716
- 'id': semantic_models['unique_id'],
717
- 'name': semantic_models['name'],
718
- 'resource_type': semantic_models['resource_type'],
719
- 'package_name': semantic_models['package_name'],
720
- 'config': semantic_models['config'],
754
+ if "semantic_models" in manifest_dict:
755
+ for semantic_models in manifest_dict["semantic_models"].values():
756
+ nodes[semantic_models["unique_id"]] = {
757
+ "id": semantic_models["unique_id"],
758
+ "name": semantic_models["name"],
759
+ "resource_type": semantic_models["resource_type"],
760
+ "package_name": semantic_models["package_name"],
761
+ "config": semantic_models["config"],
721
762
  }
722
763
 
723
764
  parent_map = self.build_parent_map(nodes, base)
@@ -725,7 +766,7 @@ class DbtAdapter(BaseAdapter):
725
766
  if base is False:
726
767
  cll_tracker.end_lineage()
727
768
  cll_tracker.set_total_nodes(len(nodes))
728
- log_performance('model lineage', cll_tracker.to_dict())
769
+ log_performance("model lineage", cll_tracker.to_dict())
729
770
  cll_tracker.reset()
730
771
 
731
772
  return dict(
@@ -739,10 +780,7 @@ class DbtAdapter(BaseAdapter):
739
780
  def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
740
781
  base = self.get_lineage(base=True)
741
782
  current = self.get_lineage(base=False)
742
- keys = {
743
- *base.get('nodes', {}).keys(),
744
- *current.get('nodes', {}).keys()
745
- }
783
+ keys = {*base.get("nodes", {}).keys(), *current.get("nodes", {}).keys()}
746
784
 
747
785
  # Start to diff
748
786
  perf_tracking = BreakingPerformanceTracking()
@@ -750,7 +788,7 @@ class DbtAdapter(BaseAdapter):
750
788
 
751
789
  base_manifest = as_manifest(self.get_manifest(True))
752
790
  curr_manifest = as_manifest(self.get_manifest(False))
753
- perf_tracking.record_checkpoint('manifest')
791
+ perf_tracking.record_checkpoint("manifest")
754
792
 
755
793
  def ref_func(*args):
756
794
  if len(args) == 1:
@@ -762,7 +800,7 @@ class DbtAdapter(BaseAdapter):
762
800
  return node
763
801
 
764
802
  def source_func(source_name, table_name):
765
- source_name = source_name.replace('-', '_')
803
+ source_name = source_name.replace("-", "_")
766
804
  return f"__{source_name}__{table_name}"
767
805
 
768
806
  jinja_context = dict(
@@ -773,49 +811,47 @@ class DbtAdapter(BaseAdapter):
773
811
  # for each node, compare the base and current lineage
774
812
  diff = {}
775
813
  for key in keys:
776
- base_node = base.get('nodes', {}).get(key)
777
- curr_node = current.get('nodes', {}).get(key)
814
+ base_node = base.get("nodes", {}).get(key)
815
+ curr_node = current.get("nodes", {}).get(key)
778
816
  if base_node and curr_node:
779
- base_checksum = base_node.get('checksum', {}).get('checksum')
780
- curr_checksum = curr_node.get('checksum', {}).get('checksum')
817
+ base_checksum = base_node.get("checksum", {}).get("checksum")
818
+ curr_checksum = curr_node.get("checksum", {}).get("checksum")
781
819
  change = None
782
820
  if base_checksum is None or curr_checksum is None or base_checksum == curr_checksum:
783
821
  continue
784
822
 
785
- if curr_node.get('resource_type') == 'model':
823
+ if curr_node.get("resource_type") == "model":
786
824
  try:
787
825
  perf_tracking.increment_modified_nodes()
788
826
 
789
827
  def _get_schema(lineage):
790
828
  schema = {}
791
- nodes = lineage['nodes']
792
- parent_list = lineage['parent_map'].get(key, [])
829
+ nodes = lineage["nodes"]
830
+ parent_list = lineage["parent_map"].get(key, [])
793
831
  for parent_id in parent_list:
794
832
  parent_node = nodes.get(parent_id)
795
833
  if parent_node is None:
796
834
  continue
797
- columns = parent_node.get('columns') or {}
798
- name = parent_node.get('name')
799
- if parent_node.get('resource_type') == 'source':
800
- parts = parent_id.split('.')
835
+ columns = parent_node.get("columns") or {}
836
+ name = parent_node.get("name")
837
+ if parent_node.get("resource_type") == "source":
838
+ parts = parent_id.split(".")
801
839
  source = parts[2]
802
840
  table = parts[3]
803
- source = source.replace('-', '_')
841
+ source = source.replace("-", "_")
804
842
  name = f"__{source}__{table}"
805
- schema[name] = {
806
- name: column.get('type') for name, column in columns.items()
807
- }
843
+ schema[name] = {name: column.get("type") for name, column in columns.items()}
808
844
  return schema
809
845
 
810
846
  base_sql = self.generate_sql(
811
- base_node.get('raw_code'),
847
+ base_node.get("raw_code"),
812
848
  context=jinja_context,
813
- provided_manifest=base_manifest
849
+ provided_manifest=base_manifest,
814
850
  )
815
851
  curr_sql = self.generate_sql(
816
- curr_node.get('raw_code'),
852
+ curr_node.get("raw_code"),
817
853
  context=jinja_context,
818
- provided_manifest=curr_manifest
854
+ provided_manifest=curr_manifest,
819
855
  )
820
856
  base_schema = _get_schema(base)
821
857
  curr_schema = _get_schema(current)
@@ -834,14 +870,13 @@ class DbtAdapter(BaseAdapter):
834
870
 
835
871
  # Make sure that the case of the column names are the same
836
872
  changed_columns = {
837
- column.lower(): change_status
838
- for column, change_status in (change.columns or {}).items()
873
+ column.lower(): change_status for column, change_status in (change.columns or {}).items()
839
874
  }
840
875
  changed_columns_names = set(changed_columns)
841
876
  changed_columns_final = {}
842
877
 
843
- base_columns = base_node.get('columns') or {}
844
- curr_columns = curr_node.get('columns') or {}
878
+ base_columns = base_node.get("columns") or {}
879
+ curr_columns = curr_node.get("columns") or {}
845
880
  columns_names = set(base_columns) | set(curr_columns)
846
881
 
847
882
  for column_name in columns_names:
@@ -850,16 +885,16 @@ class DbtAdapter(BaseAdapter):
850
885
 
851
886
  change.columns = changed_columns_final
852
887
  except Exception:
853
- change = NodeChange(category='unknown')
888
+ change = NodeChange(category="unknown")
854
889
 
855
- diff[key] = NodeDiff(change_status='modified', change=change)
890
+ diff[key] = NodeDiff(change_status="modified", change=change)
856
891
  elif base_node:
857
- diff[key] = NodeDiff(change_status='removed')
892
+ diff[key] = NodeDiff(change_status="removed")
858
893
  elif curr_node:
859
- diff[key] = NodeDiff(change_status='added')
894
+ diff[key] = NodeDiff(change_status="added")
860
895
 
861
896
  perf_tracking.end_lineage_diff()
862
- log_performance('model lineage diff', perf_tracking.to_dict())
897
+ log_performance("model lineage diff", perf_tracking.to_dict())
863
898
 
864
899
  return LineageDiff(
865
900
  base=base,
@@ -874,8 +909,8 @@ class DbtAdapter(BaseAdapter):
874
909
  manifest = self.curr_manifest if base is False else self.base_manifest
875
910
  manifest_dict = manifest.to_dict()
876
911
 
877
- parent_ids = find_upstream(node_id, manifest_dict.get('parent_map'))
878
- child_ids = find_downstream(node_id, manifest_dict.get('child_map'))
912
+ parent_ids = find_upstream(node_id, manifest_dict.get("parent_map"))
913
+ child_ids = find_downstream(node_id, manifest_dict.get("child_map"))
879
914
  cll_node_ids = parent_ids.union(child_ids)
880
915
  cll_node_ids.add(node_id)
881
916
 
@@ -888,7 +923,7 @@ class DbtAdapter(BaseAdapter):
888
923
 
889
924
  cll_tracker.end_column_lineage()
890
925
  cll_tracker.set_total_nodes(len(nodes))
891
- log_performance('column level lineage', cll_tracker.to_dict())
926
+ log_performance("column level lineage", cll_tracker.to_dict())
892
927
  cll_tracker.reset()
893
928
 
894
929
  return dict(nodes=nodes)
@@ -900,8 +935,8 @@ class DbtAdapter(BaseAdapter):
900
935
  manifest = self.curr_manifest if base is False else self.base_manifest
901
936
  manifest_dict = manifest.to_dict()
902
937
  parent_list = []
903
- if node_id in manifest_dict['parent_map']:
904
- parent_list = manifest_dict['parent_map'][node_id]
938
+ if node_id in manifest_dict["parent_map"]:
939
+ parent_list = manifest_dict["parent_map"][node_id]
905
940
 
906
941
  node = deepcopy(nodes[node_id])
907
942
  self.append_column_lineage(node, parent_list, base)
@@ -909,49 +944,49 @@ class DbtAdapter(BaseAdapter):
909
944
 
910
945
  def append_column_lineage(self, node: Dict, parent_list: List, base: Optional[bool] = False):
911
946
  def _apply_all_columns(node, trans_type, depends_on):
912
- for col in node.get('columns', {}).values():
913
- col['transformation_type'] = trans_type
914
- col['depends_on'] = depends_on
947
+ for col in node.get("columns", {}).values():
948
+ col["transformation_type"] = trans_type
949
+ col["depends_on"] = depends_on
915
950
 
916
951
  def _depend_node_to_id(column_lineage, nodes):
917
952
  for cl in column_lineage.values():
918
953
  for depend_on in cl.depends_on:
919
- if depend_on.node.startswith('__'):
954
+ if depend_on.node.startswith("__"):
920
955
  for n in nodes.values():
921
- if n.get('resource_type') != 'source':
956
+ if n.get("resource_type") != "source":
922
957
  continue
923
958
  # __source__table -> source.table
924
959
  source_table = depend_on.node.lstrip("_").replace("__", ".", 1).lower()
925
- if source_table in n.get('id'):
926
- depend_on.node = n.get('id')
960
+ if source_table in n.get("id"):
961
+ depend_on.node = n.get("id")
927
962
  break
928
963
  else:
929
964
  for n in nodes.values():
930
- if n.get('name') == depend_on.node.lower():
931
- depend_on.node = n.get('id')
965
+ if n.get("name") == depend_on.node.lower():
966
+ depend_on.node = n.get("id")
932
967
  break
933
968
 
934
969
  cll_tracker = CLLPerformanceTracking()
935
970
  nodes = self.get_lineage_nodes_metadata(base=base)
936
971
  manifest = as_manifest(self.get_manifest(base))
937
- resource_type = node.get('resource_type')
938
- if resource_type not in {'model', 'seed', 'source', 'snapshot'}:
972
+ resource_type = node.get("resource_type")
973
+ if resource_type not in {"model", "seed", "source", "snapshot"}:
939
974
  return
940
975
 
941
- if resource_type == 'source' or resource_type == 'seed':
942
- _apply_all_columns(node, 'source', [])
976
+ if resource_type == "source" or resource_type == "seed":
977
+ _apply_all_columns(node, "source", [])
943
978
  return
944
979
 
945
- if node.get('raw_code') is None or self.is_python_model(node.get('id'), base=base):
946
- _apply_all_columns(node, 'unknown', [])
980
+ if node.get("raw_code") is None or self.is_python_model(node.get("id"), base=base):
981
+ _apply_all_columns(node, "unknown", [])
947
982
  return
948
983
 
949
984
  # dbt <= 1.8, MetricFlow expects the time spine table to be named metricflow_time_spine
950
- if node.get('name') == 'metricflow_time_spine':
951
- _apply_all_columns(node, 'source', [])
985
+ if node.get("name") == "metricflow_time_spine":
986
+ _apply_all_columns(node, "source", [])
952
987
  return
953
988
 
954
- if not node.get('columns', {}):
989
+ if not node.get("columns", {}):
955
990
  # no catalog
956
991
  return
957
992
 
@@ -967,7 +1002,7 @@ class DbtAdapter(BaseAdapter):
967
1002
  def source_func(source_name, table_name):
968
1003
  return f"__{source_name}__{table_name}"
969
1004
 
970
- raw_code = node.get('raw_code')
1005
+ raw_code = node.get("raw_code")
971
1006
  jinja_context = dict(
972
1007
  ref=ref_func,
973
1008
  source=source_func,
@@ -978,16 +1013,14 @@ class DbtAdapter(BaseAdapter):
978
1013
  parent_node = nodes.get(parent_id)
979
1014
  if parent_node is None:
980
1015
  continue
981
- columns = parent_node.get('columns') or {}
982
- name = parent_node.get('name')
983
- if parent_node.get('resource_type') == 'source':
984
- parts = parent_id.split('.')
1016
+ columns = parent_node.get("columns") or {}
1017
+ name = parent_node.get("name")
1018
+ if parent_node.get("resource_type") == "source":
1019
+ parts = parent_id.split(".")
985
1020
  source = parts[2]
986
1021
  table = parts[3]
987
1022
  name = f"__{source}__{table}"
988
- schema[name] = {
989
- name: column.get('type') for name, column in columns.items()
990
- }
1023
+ schema[name] = {name: column.get("type") for name, column in columns.items()}
991
1024
 
992
1025
  try:
993
1026
  # provide a manifest to speedup and not pollute the manifest
@@ -999,20 +1032,20 @@ class DbtAdapter(BaseAdapter):
999
1032
  column_lineage = cll(compiled_sql, schema=schema, dialect=dialect)
1000
1033
  except RecceException:
1001
1034
  # TODO: provide parsing error message if needed
1002
- _apply_all_columns(node, 'unknown', [])
1035
+ _apply_all_columns(node, "unknown", [])
1003
1036
  cll_tracker.increment_sqlglot_error_nodes()
1004
1037
  return
1005
1038
  except Exception:
1006
- _apply_all_columns(node, 'unknown', [])
1039
+ _apply_all_columns(node, "unknown", [])
1007
1040
  cll_tracker.increment_other_error_nodes()
1008
1041
  return
1009
1042
 
1010
1043
  _depend_node_to_id(column_lineage, nodes)
1011
1044
 
1012
- for name, column in node.get('columns', {}).items():
1045
+ for name, column in node.get("columns", {}).items():
1013
1046
  if name in column_lineage:
1014
- column['depends_on'] = column_lineage[name].depends_on
1015
- column['transformation_type'] = column_lineage[name].type
1047
+ column["depends_on"] = column_lineage[name].depends_on
1048
+ column["transformation_type"] = column_lineage[name].type
1016
1049
 
1017
1050
  @lru_cache(maxsize=2)
1018
1051
  def get_lineage_nodes_metadata(self, base: Optional[bool] = False):
@@ -1021,18 +1054,18 @@ class DbtAdapter(BaseAdapter):
1021
1054
  manifest_dict = manifest.to_dict()
1022
1055
 
1023
1056
  nodes = {}
1024
- for node in manifest_dict['nodes'].values():
1025
- unique_id = node['unique_id']
1026
- resource_type = node['resource_type']
1057
+ for node in manifest_dict["nodes"].values():
1058
+ unique_id = node["unique_id"]
1059
+ resource_type = node["resource_type"]
1027
1060
 
1028
- if resource_type not in ['model', 'seed', 'exposure', 'snapshot']:
1061
+ if resource_type not in ["model", "seed", "exposure", "snapshot"]:
1029
1062
  continue
1030
1063
 
1031
1064
  nodes[unique_id] = {
1032
- 'id': node['unique_id'],
1033
- 'name': node['name'],
1034
- 'resource_type': node['resource_type'],
1035
- 'raw_code': node['raw_code'],
1065
+ "id": node["unique_id"],
1066
+ "name": node["name"],
1067
+ "resource_type": node["resource_type"],
1068
+ "raw_code": node["raw_code"],
1036
1069
  }
1037
1070
 
1038
1071
  if catalog is not None and unique_id in catalog.nodes:
@@ -1040,23 +1073,20 @@ class DbtAdapter(BaseAdapter):
1040
1073
  for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
1041
1074
  col = dict(name=col_name, type=col_metadata.type)
1042
1075
  columns[col_name] = col
1043
- nodes[unique_id]['columns'] = columns
1076
+ nodes[unique_id]["columns"] = columns
1044
1077
 
1045
- for source in manifest_dict['sources'].values():
1046
- unique_id = source['unique_id']
1078
+ for source in manifest_dict["sources"].values():
1079
+ unique_id = source["unique_id"]
1047
1080
 
1048
1081
  nodes[unique_id] = {
1049
- 'id': source['unique_id'],
1050
- 'name': source['name'],
1051
- 'resource_type': source['resource_type'],
1082
+ "id": source["unique_id"],
1083
+ "name": source["name"],
1084
+ "resource_type": source["resource_type"],
1052
1085
  }
1053
1086
 
1054
1087
  if catalog is not None and unique_id in catalog.sources:
1055
- nodes[unique_id]['columns'] = {
1056
- col_name: {
1057
- 'name': col_name,
1058
- 'type': col_metadata.type
1059
- }
1088
+ nodes[unique_id]["columns"] = {
1089
+ col_name: {"name": col_name, "type": col_metadata.type}
1060
1090
  for col_name, col_metadata in catalog.sources[unique_id].columns.items()
1061
1091
  }
1062
1092
 
@@ -1067,8 +1097,8 @@ class DbtAdapter(BaseAdapter):
1067
1097
  base_manifest = self.get_manifest(base=True)
1068
1098
  if unique_id in curr_manifest.nodes.keys() or unique_id in base_manifest.nodes.keys():
1069
1099
  return {
1070
- 'current': curr_manifest.nodes.get(unique_id),
1071
- 'base': base_manifest.nodes.get(unique_id)
1100
+ "current": curr_manifest.nodes.get(unique_id),
1101
+ "base": base_manifest.nodes.get(unique_id),
1072
1102
  }
1073
1103
  return None
1074
1104
 
@@ -1091,39 +1121,40 @@ class DbtAdapter(BaseAdapter):
1091
1121
  if self.base_path:
1092
1122
  self.artifacts_observer.schedule(event_handler, self.base_path, recursive=False)
1093
1123
  self.artifacts_observer.start()
1094
- logger.info('Start monitoring dbt artifacts')
1124
+ logger.info("Start monitoring dbt artifacts")
1095
1125
 
1096
1126
  def stop_monitor_artifacts(self):
1097
1127
  if self.artifacts_files:
1098
1128
  self.artifacts_observer.stop()
1099
1129
  self.artifacts_observer.join()
1100
- logger.info('Stop monitoring artifacts')
1130
+ logger.info("Stop monitoring artifacts")
1101
1131
 
1102
1132
  def start_monitor_base_env(self, callback: Callable = None):
1103
- target_base_dir = os.path.join(self.runtime_config.project_root, 'target-base')
1133
+ target_base_dir = os.path.join(self.runtime_config.project_root, "target-base")
1104
1134
  base_env_files = {
1105
- os.path.join(target_base_dir, 'manifest.json'),
1106
- os.path.join(target_base_dir, 'catalog.json'),
1135
+ os.path.join(target_base_dir, "manifest.json"),
1136
+ os.path.join(target_base_dir, "catalog.json"),
1107
1137
  }
1108
1138
  event_handler = EnvironmentEventHandler(self.base_env_observer, base_env_files, callback=callback)
1109
1139
  self.base_env_observer.schedule(event_handler, self.runtime_config.project_root, recursive=True)
1110
1140
  self.base_env_observer.start()
1111
- logger.info('Start monitoring base environment')
1141
+ logger.info("Start monitoring base environment")
1112
1142
 
1113
1143
  def stop_monitor_base_env(self):
1114
1144
  if self.base_env_observer.is_alive():
1115
1145
  self.base_env_observer.stop()
1116
1146
  self.base_env_observer.join()
1117
- logger.info('Stop monitoring base environment')
1118
-
1119
- def set_artifacts(self,
1120
- base_manifest: WritableManifest,
1121
- curr_manifest: WritableManifest,
1122
- manifest: Manifest,
1123
- previous_manifest: Manifest,
1124
- base_catalog: CatalogArtifact,
1125
- curr_catalog: CatalogArtifact,
1126
- ):
1147
+ logger.info("Stop monitoring base environment")
1148
+
1149
+ def set_artifacts(
1150
+ self,
1151
+ base_manifest: WritableManifest,
1152
+ curr_manifest: WritableManifest,
1153
+ manifest: Manifest,
1154
+ previous_manifest: Manifest,
1155
+ base_catalog: CatalogArtifact,
1156
+ curr_catalog: CatalogArtifact,
1157
+ ):
1127
1158
  self.curr_manifest = curr_manifest
1128
1159
  self.base_manifest = base_manifest
1129
1160
  self.manifest = manifest
@@ -1132,7 +1163,7 @@ class DbtAdapter(BaseAdapter):
1132
1163
  self.previous_state = previous_state(
1133
1164
  Path(self.base_path),
1134
1165
  Path(self.runtime_config.target_path),
1135
- Path(self.runtime_config.project_root)
1166
+ Path(self.runtime_config.project_root),
1136
1167
  )
1137
1168
  self.previous_state.manifest = previous_manifest
1138
1169
 
@@ -1154,18 +1185,18 @@ class DbtAdapter(BaseAdapter):
1154
1185
  # we capture the original manifest as base and only update the current
1155
1186
  target_type = os.path.basename(os.path.dirname(refresh_file_path))
1156
1187
  if self.target_path and target_type == os.path.basename(self.target_path):
1157
- if refresh_file_path.endswith('manifest.json'):
1188
+ if refresh_file_path.endswith("manifest.json"):
1158
1189
  self.curr_manifest = load_manifest(path=refresh_file_path)
1159
1190
  self.manifest = as_manifest(self.curr_manifest)
1160
1191
  self.get_cll_cached.cache_clear()
1161
1192
  self.get_lineage_nodes_metadata.cache_clear()
1162
- elif refresh_file_path.endswith('catalog.json'):
1193
+ elif refresh_file_path.endswith("catalog.json"):
1163
1194
  self.curr_catalog = load_catalog(path=refresh_file_path)
1164
1195
  self.get_lineage_nodes_metadata.cache_clear()
1165
1196
  elif self.base_path and target_type == os.path.basename(self.base_path):
1166
- if refresh_file_path.endswith('manifest.json'):
1197
+ if refresh_file_path.endswith("manifest.json"):
1167
1198
  self.base_manifest = load_manifest(path=refresh_file_path)
1168
- elif refresh_file_path.endswith('catalog.json'):
1199
+ elif refresh_file_path.endswith("catalog.json"):
1169
1200
  self.base_catalog = load_catalog(path=refresh_file_path)
1170
1201
 
1171
1202
  def create_relation(self, model, base=False):
@@ -1180,18 +1211,22 @@ class DbtAdapter(BaseAdapter):
1180
1211
  select: Optional[str] = None,
1181
1212
  exclude: Optional[str] = None,
1182
1213
  packages: Optional[list[str]] = None,
1183
- view_mode: Optional[Literal['all', 'changed_models']] = None,
1214
+ view_mode: Optional[Literal["all", "changed_models"]] = None,
1184
1215
  ) -> Set[str]:
1185
- from dbt.graph import NodeSelector
1186
- from dbt.compilation import Compiler
1187
- from dbt.graph import parse_difference, SelectionIntersection, SelectionUnion
1188
1216
  import dbt.compilation
1217
+ from dbt.compilation import Compiler
1218
+ from dbt.graph import (
1219
+ NodeSelector,
1220
+ SelectionIntersection,
1221
+ SelectionUnion,
1222
+ parse_difference,
1223
+ )
1189
1224
 
1190
1225
  select_list = [select] if select else None
1191
1226
  exclude_list = [exclude] if exclude else None
1192
1227
 
1193
1228
  def _parse_difference(include, exclude):
1194
- if dbt_version < 'v1.8':
1229
+ if dbt_version < "v1.8":
1195
1230
  return parse_difference(include, exclude, "eager")
1196
1231
  else:
1197
1232
  return parse_difference(include, exclude)
@@ -1199,10 +1234,10 @@ class DbtAdapter(BaseAdapter):
1199
1234
  specs = [_parse_difference(select_list, exclude_list)]
1200
1235
 
1201
1236
  if packages is not None:
1202
- package_spec = SelectionUnion([_parse_difference([f'package:{p}'], None) for p in packages])
1237
+ package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
1203
1238
  specs.append(package_spec)
1204
- if view_mode and view_mode == 'changed_models':
1205
- specs.append(_parse_difference(['1+state:modified+'], None))
1239
+ if view_mode and view_mode == "changed_models":
1240
+ specs.append(_parse_difference(["1+state:modified+"], None))
1206
1241
  spec = SelectionIntersection(specs)
1207
1242
 
1208
1243
  manifest = Manifest()
@@ -1215,8 +1250,8 @@ class DbtAdapter(BaseAdapter):
1215
1250
  for node_id, node in manifest_prev.nodes.items():
1216
1251
  if node_id not in manifest.nodes:
1217
1252
  node_dict = node.to_dict()
1218
- if 'raw_code' in node_dict:
1219
- node_dict['raw_code'] = "__removed__"
1253
+ if "raw_code" in node_dict:
1254
+ node_dict["raw_code"] = "__removed__"
1220
1255
  node_class = type(node)
1221
1256
  removed_node = node_class.from_dict(node_dict)
1222
1257
  manifest.nodes[node_id] = removed_node
@@ -1225,8 +1260,11 @@ class DbtAdapter(BaseAdapter):
1225
1260
  manifest.sources = {**manifest_prev.sources, **manifest_curr.sources}
1226
1261
  manifest.exposures = {**manifest_prev.exposures, **manifest_curr.exposures}
1227
1262
  manifest.metrics = {**manifest_prev.metrics, **manifest_curr.metrics}
1228
- if hasattr(manifest_prev, 'semantic_models'):
1229
- manifest.semantic_models = {**manifest_prev.semantic_models, **manifest_curr.semantic_models}
1263
+ if hasattr(manifest_prev, "semantic_models"):
1264
+ manifest.semantic_models = {
1265
+ **manifest_prev.semantic_models,
1266
+ **manifest_curr.semantic_models,
1267
+ }
1230
1268
 
1231
1269
  compiler = Compiler(self.runtime_config)
1232
1270
  # disable to print compile states
@@ -1241,28 +1279,28 @@ class DbtAdapter(BaseAdapter):
1241
1279
  return selector.get_selected(spec)
1242
1280
 
1243
1281
  def export_artifacts(self) -> ArtifactsRoot:
1244
- '''
1282
+ """
1245
1283
  Export the artifacts from the current state
1246
- '''
1284
+ """
1247
1285
  artifacts = ArtifactsRoot()
1248
1286
 
1249
1287
  def _load_artifact(artifact):
1250
1288
  return artifact.to_dict() if artifact else None
1251
1289
 
1252
1290
  artifacts.base = {
1253
- 'manifest': _load_artifact(self.base_manifest),
1254
- 'catalog': _load_artifact(self.base_catalog),
1291
+ "manifest": _load_artifact(self.base_manifest),
1292
+ "catalog": _load_artifact(self.base_catalog),
1255
1293
  }
1256
1294
  artifacts.current = {
1257
- 'manifest': _load_artifact(self.curr_manifest),
1258
- 'catalog': _load_artifact(self.curr_catalog),
1295
+ "manifest": _load_artifact(self.curr_manifest),
1296
+ "catalog": _load_artifact(self.curr_catalog),
1259
1297
  }
1260
1298
  return artifacts
1261
1299
 
1262
1300
  def export_artifacts_from_file(self) -> ArtifactsRoot:
1263
- '''
1301
+ """
1264
1302
  Export the artifacts from the state file. This is the old implementation
1265
- '''
1303
+ """
1266
1304
  artifacts = ArtifactsRoot()
1267
1305
  target_path = self.runtime_config.target_path
1268
1306
  target_base_path = self.base_path
@@ -1271,18 +1309,18 @@ class DbtAdapter(BaseAdapter):
1271
1309
  if not os.path.isfile(path):
1272
1310
  return None
1273
1311
 
1274
- with open(path, 'r') as f:
1312
+ with open(path, "r") as f:
1275
1313
  json_content = f.read()
1276
1314
  return json.loads(json_content)
1277
1315
 
1278
1316
  project_root = self.runtime_config.project_root
1279
1317
  artifacts.base = {
1280
- 'manifest': _load_artifact(os.path.join(project_root, target_base_path, 'manifest.json')),
1281
- 'catalog': _load_artifact(os.path.join(project_root, target_base_path, 'catalog.json')),
1318
+ "manifest": _load_artifact(os.path.join(project_root, target_base_path, "manifest.json")),
1319
+ "catalog": _load_artifact(os.path.join(project_root, target_base_path, "catalog.json")),
1282
1320
  }
1283
1321
  artifacts.current = {
1284
- 'manifest': _load_artifact(os.path.join(project_root, target_path, 'manifest.json')),
1285
- 'catalog': _load_artifact(os.path.join(project_root, target_path, 'catalog.json')),
1322
+ "manifest": _load_artifact(os.path.join(project_root, target_path, "manifest.json")),
1323
+ "catalog": _load_artifact(os.path.join(project_root, target_path, "catalog.json")),
1286
1324
  }
1287
1325
  return artifacts
1288
1326
 
@@ -1290,7 +1328,7 @@ class DbtAdapter(BaseAdapter):
1290
1328
  # Merge the artifacts from the state file or cloud
1291
1329
  def _select_artifact(
1292
1330
  original: Union[WritableManifest, CatalogArtifact],
1293
- new: Union[WritableManifest, CatalogArtifact]
1331
+ new: Union[WritableManifest, CatalogArtifact],
1294
1332
  ):
1295
1333
  if merge:
1296
1334
  if not original:
@@ -1301,16 +1339,16 @@ class DbtAdapter(BaseAdapter):
1301
1339
  else:
1302
1340
  return new
1303
1341
 
1304
- self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get('manifest')))
1305
- self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get('manifest')))
1306
- self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get('catalog')))
1307
- self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get('catalog')))
1342
+ self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get("manifest")))
1343
+ self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get("manifest")))
1344
+ self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get("catalog")))
1345
+ self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get("catalog")))
1308
1346
 
1309
1347
  self.manifest = as_manifest(self.curr_manifest)
1310
1348
  self.previous_state = previous_state(
1311
1349
  Path(self.base_path),
1312
1350
  Path(self.runtime_config.target_path),
1313
- Path(self.runtime_config.project_root)
1351
+ Path(self.runtime_config.project_root),
1314
1352
  )
1315
1353
  self.previous_state.manifest = as_manifest(self.base_manifest)
1316
1354
 
@@ -1326,7 +1364,8 @@ class DbtAdapter(BaseAdapter):
1326
1364
 
1327
1365
  if not self.curr_manifest or not self.base_manifest:
1328
1366
  raise Exception(
1329
- 'No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state')
1367
+ "No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state"
1368
+ )
1330
1369
 
1331
1370
  @contextmanager
1332
1371
  def connection_named(self, name: str) -> Iterator[None]: