recce-nightly 1.3.0.20250507__py3-none-any.whl → 1.4.0.20250514__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recce-nightly might be problematic. Click here for more details.
- recce/VERSION +1 -1
- recce/__init__.py +22 -22
- recce/adapter/base.py +11 -14
- recce/adapter/dbt_adapter/__init__.py +355 -316
- recce/adapter/dbt_adapter/dbt_version.py +3 -0
- recce/adapter/sqlmesh_adapter.py +24 -35
- recce/apis/check_api.py +39 -28
- recce/apis/check_func.py +33 -27
- recce/apis/run_api.py +25 -19
- recce/apis/run_func.py +29 -23
- recce/artifact.py +44 -49
- recce/cli.py +484 -285
- recce/config.py +42 -33
- recce/core.py +52 -44
- recce/data/404.html +1 -1
- recce/data/_next/static/chunks/{368-7587b306577df275.js → 778-aef312bffb4c0312.js} +15 -15
- recce/data/_next/static/chunks/8d700b6a.ed11a130057c7a47.js +1 -0
- recce/data/_next/static/chunks/app/layout-c713a2829d3279e4.js +1 -0
- recce/data/_next/static/chunks/app/page-7086764277331fcb.js +1 -0
- recce/data/_next/static/chunks/{cd9f8d63-cf0d5a7b0f7a92e8.js → cd9f8d63-e020f408095ed77c.js} +3 -3
- recce/data/_next/static/chunks/webpack-b787cb1a4f2293de.js +1 -0
- recce/data/_next/static/css/88b8abc134cfd59a.css +3 -0
- recce/data/index.html +2 -2
- recce/data/index.txt +2 -2
- recce/diff.py +6 -12
- recce/event/__init__.py +74 -72
- recce/event/collector.py +27 -20
- recce/event/track.py +39 -27
- recce/exceptions.py +1 -1
- recce/git.py +7 -7
- recce/github.py +57 -53
- recce/models/__init__.py +1 -1
- recce/models/check.py +6 -7
- recce/models/run.py +1 -0
- recce/models/types.py +27 -27
- recce/pull_request.py +26 -24
- recce/run.py +148 -111
- recce/server.py +103 -89
- recce/state.py +209 -177
- recce/summary.py +168 -143
- recce/tasks/__init__.py +3 -3
- recce/tasks/core.py +11 -13
- recce/tasks/dataframe.py +19 -17
- recce/tasks/histogram.py +69 -34
- recce/tasks/lineage.py +2 -2
- recce/tasks/profile.py +147 -86
- recce/tasks/query.py +139 -87
- recce/tasks/rowcount.py +33 -30
- recce/tasks/schema.py +14 -14
- recce/tasks/top_k.py +35 -35
- recce/tasks/valuediff.py +216 -152
- recce/util/breaking.py +77 -84
- recce/util/cll.py +55 -51
- recce/util/io.py +19 -17
- recce/util/logger.py +1 -1
- recce/util/recce_cloud.py +70 -72
- recce/util/singleton.py +4 -4
- recce/yaml/__init__.py +7 -10
- {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/METADATA +5 -2
- recce_nightly-1.4.0.20250514.dist-info/RECORD +143 -0
- {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/WHEEL +1 -1
- tests/adapter/dbt_adapter/conftest.py +1 -0
- tests/adapter/dbt_adapter/dbt_test_helper.py +28 -18
- tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
- tests/adapter/dbt_adapter/test_dbt_cll.py +39 -32
- tests/adapter/dbt_adapter/test_selector.py +22 -21
- tests/tasks/test_histogram.py +58 -66
- tests/tasks/test_lineage.py +36 -23
- tests/tasks/test_preset_checks.py +45 -31
- tests/tasks/test_profile.py +340 -15
- tests/tasks/test_query.py +40 -40
- tests/tasks/test_row_count.py +65 -46
- tests/tasks/test_schema.py +65 -42
- tests/tasks/test_top_k.py +22 -18
- tests/tasks/test_valuediff.py +43 -32
- tests/test_cli.py +71 -58
- tests/test_config.py +7 -9
- tests/test_core.py +5 -3
- tests/test_dbt.py +7 -7
- tests/test_pull_request.py +1 -1
- tests/test_server.py +19 -13
- tests/test_state.py +40 -27
- tests/test_summary.py +18 -14
- recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
- recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
- recce/data/_next/static/chunks/app/page-92f13c8fad9fae3d.js +0 -1
- recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
- recce_nightly-1.3.0.20250507.dist-info/RECORD +0 -142
- /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → E_HPXsXdrqHg2YEHmU3mK}/_buildManifest.js +0 -0
- /recce/data/_next/static/{K5iKlCYhdcpq8Ea6ck9J_ → E_HPXsXdrqHg2YEHmU3mK}/_ssgManifest.js +0 -0
- {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/entry_points.txt +0 -0
- {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/licenses/LICENSE +0 -0
- {recce_nightly-1.3.0.20250507.dist-info → recce_nightly-1.4.0.20250514.dist-info}/top_level.txt +0 -0
|
@@ -8,14 +8,27 @@ from dataclasses import dataclass, fields
|
|
|
8
8
|
from errno import ENOENT
|
|
9
9
|
from functools import lru_cache
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import
|
|
11
|
+
from typing import (
|
|
12
|
+
Any,
|
|
13
|
+
Callable,
|
|
14
|
+
Dict,
|
|
15
|
+
Iterator,
|
|
16
|
+
List,
|
|
17
|
+
Literal,
|
|
18
|
+
Optional,
|
|
19
|
+
Set,
|
|
20
|
+
Tuple,
|
|
21
|
+
Type,
|
|
22
|
+
Union,
|
|
23
|
+
)
|
|
12
24
|
|
|
13
25
|
from recce.event import log_performance
|
|
14
26
|
from recce.exceptions import RecceException
|
|
15
|
-
from recce.util.cll import
|
|
16
|
-
from recce.util.lineage import
|
|
27
|
+
from recce.util.cll import CLLPerformanceTracking, cll
|
|
28
|
+
from recce.util.lineage import find_downstream, find_upstream
|
|
29
|
+
|
|
17
30
|
from ...tasks.profile import ProfileTask
|
|
18
|
-
from ...util.breaking import
|
|
31
|
+
from ...util.breaking import BreakingPerformanceTracking, parse_change_category
|
|
19
32
|
|
|
20
33
|
try:
|
|
21
34
|
import agate
|
|
@@ -30,11 +43,23 @@ from watchdog.observers import Observer
|
|
|
30
43
|
|
|
31
44
|
from recce.adapter.base import BaseAdapter
|
|
32
45
|
from recce.state import ArtifactsRoot
|
|
33
|
-
|
|
46
|
+
|
|
34
47
|
from ...models import RunType
|
|
35
|
-
from ...models.types import LineageDiff,
|
|
36
|
-
from ...tasks import
|
|
37
|
-
|
|
48
|
+
from ...models.types import LineageDiff, NodeChange, NodeDiff
|
|
49
|
+
from ...tasks import (
|
|
50
|
+
HistogramDiffTask,
|
|
51
|
+
ProfileDiffTask,
|
|
52
|
+
QueryBaseTask,
|
|
53
|
+
QueryDiffTask,
|
|
54
|
+
QueryTask,
|
|
55
|
+
RowCountDiffTask,
|
|
56
|
+
RowCountTask,
|
|
57
|
+
Task,
|
|
58
|
+
TopKDiffTask,
|
|
59
|
+
ValueDiffDetailTask,
|
|
60
|
+
ValueDiffTask,
|
|
61
|
+
)
|
|
62
|
+
from .dbt_version import DbtVersion
|
|
38
63
|
|
|
39
64
|
dbt_supported_registry: Dict[RunType, Type[Task]] = {
|
|
40
65
|
RunType.QUERY: QueryTask,
|
|
@@ -56,7 +81,7 @@ get_adapter_orig = dbt.adapters.factory.get_adapter
|
|
|
56
81
|
|
|
57
82
|
|
|
58
83
|
def get_adapter(config):
|
|
59
|
-
if hasattr(config,
|
|
84
|
+
if hasattr(config, "adapter"):
|
|
60
85
|
return config.adapter
|
|
61
86
|
else:
|
|
62
87
|
return get_adapter_orig(config)
|
|
@@ -69,7 +94,11 @@ from dbt.adapters.base import Column # noqa: E402
|
|
|
69
94
|
from dbt.adapters.factory import get_adapter_class_by_name # noqa: E402
|
|
70
95
|
from dbt.adapters.sql import SQLAdapter # noqa: E402
|
|
71
96
|
from dbt.config.runtime import RuntimeConfig # noqa: E402
|
|
72
|
-
from dbt.contracts.graph.manifest import
|
|
97
|
+
from dbt.contracts.graph.manifest import ( # noqa: E402
|
|
98
|
+
MacroManifest,
|
|
99
|
+
Manifest,
|
|
100
|
+
WritableManifest,
|
|
101
|
+
)
|
|
73
102
|
from dbt.contracts.graph.nodes import ManifestNode # noqa: E402
|
|
74
103
|
from dbt.contracts.results import CatalogArtifact # noqa: E402
|
|
75
104
|
from dbt.flags import set_from_args # noqa: E402
|
|
@@ -78,7 +107,7 @@ from dbt.parser.sql import SqlBlockParser # noqa: E402
|
|
|
78
107
|
|
|
79
108
|
dbt_version = DbtVersion()
|
|
80
109
|
|
|
81
|
-
if dbt_version <
|
|
110
|
+
if dbt_version < "v1.8":
|
|
82
111
|
from dbt.contracts.connection import Connection
|
|
83
112
|
else:
|
|
84
113
|
from dbt.adapters.contracts.connection import Connection
|
|
@@ -86,19 +115,21 @@ else:
|
|
|
86
115
|
|
|
87
116
|
@contextmanager
|
|
88
117
|
def silence_no_nodes_warning():
|
|
89
|
-
if dbt_version >=
|
|
118
|
+
if dbt_version >= "v1.8":
|
|
90
119
|
from dbt.events.types import NoNodesForSelectionCriteria
|
|
91
120
|
from dbt_common.events.functions import WARN_ERROR_OPTIONS
|
|
121
|
+
|
|
92
122
|
WARN_ERROR_OPTIONS.silence.append(NoNodesForSelectionCriteria.__name__)
|
|
93
123
|
try:
|
|
94
124
|
yield
|
|
95
125
|
finally:
|
|
96
|
-
if dbt_version >=
|
|
126
|
+
if dbt_version >= "v1.8":
|
|
97
127
|
from dbt_common.events.functions import WARN_ERROR_OPTIONS
|
|
128
|
+
|
|
98
129
|
WARN_ERROR_OPTIONS.silence.pop()
|
|
99
130
|
|
|
100
131
|
|
|
101
|
-
logger = logging.getLogger(
|
|
132
|
+
logger = logging.getLogger("uvicorn")
|
|
102
133
|
|
|
103
134
|
|
|
104
135
|
class ArtifactsEventHandler(FileSystemEventHandler):
|
|
@@ -147,16 +178,18 @@ class EnvironmentEventHandler(FileSystemEventHandler):
|
|
|
147
178
|
|
|
148
179
|
|
|
149
180
|
def merge_tables(tables: List[agate.Table]) -> agate.Table:
|
|
150
|
-
if dbt_version <
|
|
181
|
+
if dbt_version < "v1.8":
|
|
151
182
|
from dbt.clients.agate_helper import merge_tables
|
|
183
|
+
|
|
152
184
|
return merge_tables(tables)
|
|
153
185
|
else:
|
|
154
186
|
from dbt_common.clients.agate_helper import merge_tables
|
|
187
|
+
|
|
155
188
|
return merge_tables(tables)
|
|
156
189
|
|
|
157
190
|
|
|
158
191
|
def as_manifest(m: WritableManifest) -> Manifest:
|
|
159
|
-
if dbt_version <
|
|
192
|
+
if dbt_version < "v1.8":
|
|
160
193
|
data = m.__dict__
|
|
161
194
|
all_fields = set([x.name for x in fields(Manifest)])
|
|
162
195
|
new_data = {k: v for k, v in data.items() if k in all_fields}
|
|
@@ -184,12 +217,13 @@ def load_catalog(path: str = None, data: dict = None):
|
|
|
184
217
|
|
|
185
218
|
|
|
186
219
|
def previous_state(state_path: Path, target_path: Path, project_root: Path) -> PreviousState:
|
|
187
|
-
if dbt_version <
|
|
220
|
+
if dbt_version < "v1.5.2":
|
|
188
221
|
return PreviousState(state_path, target_path)
|
|
189
222
|
else:
|
|
190
223
|
try:
|
|
191
224
|
# Overwrite the level_tag method temporarily to avoid the warning message
|
|
192
|
-
from dbt.events.types import
|
|
225
|
+
from dbt.events.types import EventLevel, WarnStateTargetEqual
|
|
226
|
+
|
|
193
227
|
original_level_tag_func = WarnStateTargetEqual.level_tag
|
|
194
228
|
WarnStateTargetEqual.level_tag = lambda x: EventLevel.DEBUG
|
|
195
229
|
except ImportError:
|
|
@@ -209,12 +243,12 @@ def previous_state(state_path: Path, target_path: Path, project_root: Path) -> P
|
|
|
209
243
|
def default_profiles_dir():
|
|
210
244
|
# Precedence: DBT_PROFILES_DIR > current working directory > ~/.dbt/
|
|
211
245
|
# https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
|
|
212
|
-
if os.getenv(
|
|
213
|
-
return os.getenv(
|
|
214
|
-
elif os.path.exists(os.path.join(os.getcwd(),
|
|
246
|
+
if os.getenv("DBT_PROFILES_DIR"):
|
|
247
|
+
return os.getenv("DBT_PROFILES_DIR")
|
|
248
|
+
elif os.path.exists(os.path.join(os.getcwd(), "profiles.yml")):
|
|
215
249
|
return os.getcwd()
|
|
216
250
|
else:
|
|
217
|
-
return os.path.expanduser(
|
|
251
|
+
return os.path.expanduser("~/.dbt/")
|
|
218
252
|
|
|
219
253
|
|
|
220
254
|
@dataclass()
|
|
@@ -222,12 +256,13 @@ class DbtArgs:
|
|
|
222
256
|
"""
|
|
223
257
|
Used for RuntimeConfig.from_args
|
|
224
258
|
"""
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
259
|
+
|
|
260
|
+
threads: Optional[int] = (1,)
|
|
261
|
+
target: Optional[str] = (None,)
|
|
262
|
+
profiles_dir: Optional[str] = (None,)
|
|
263
|
+
project_dir: Optional[str] = (None,)
|
|
264
|
+
profile: Optional[str] = (None,)
|
|
265
|
+
target_path: Optional[str] = (None,)
|
|
231
266
|
project_only_flags: Optional[Dict[str, Any]] = None
|
|
232
267
|
which: Optional[str] = None
|
|
233
268
|
state_modified_compare_more_unrendered_values: Optional[bool] = False # new flag added since dbt v1.9
|
|
@@ -258,32 +293,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
258
293
|
|
|
259
294
|
def support_tasks(self):
|
|
260
295
|
support_map = {run_type.value: True for run_type in dbt_supported_registry}
|
|
261
|
-
supported_dbt_packages = set([package.package_name for package in self.manifest.macros.values()])
|
|
262
|
-
|
|
263
|
-
if 'dbt_profiler' not in supported_dbt_packages:
|
|
264
|
-
support_map[RunType.PROFILE_DIFF.value] = False
|
|
265
|
-
support_map[RunType.PROFILE.value] = False
|
|
266
|
-
|
|
267
|
-
if 'audit_helper' not in supported_dbt_packages:
|
|
268
|
-
support_map[RunType.VALUE_DIFF.value] = False
|
|
269
|
-
support_map[RunType.VALUE_DIFF_DETAIL.value] = False
|
|
270
|
-
support_map['query_diff_with_primary_key'] = False
|
|
271
296
|
|
|
272
297
|
return support_map
|
|
273
298
|
|
|
274
299
|
@classmethod
|
|
275
|
-
def load(cls,
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
300
|
+
def load(cls, no_artifacts=False, review=False, **kwargs):
|
|
301
|
+
target = kwargs.get("target")
|
|
302
|
+
target_path = kwargs.get("target_path", "target")
|
|
303
|
+
target_base_path = kwargs.get("target_base_path", "target-base")
|
|
279
304
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
profile_name = kwargs.get('profile')
|
|
285
|
-
project_dir = kwargs.get('project_dir')
|
|
286
|
-
profiles_dir = kwargs.get('profiles_dir')
|
|
305
|
+
profile_name = kwargs.get("profile")
|
|
306
|
+
project_dir = kwargs.get("project_dir")
|
|
307
|
+
profiles_dir = kwargs.get("profiles_dir")
|
|
287
308
|
|
|
288
309
|
if profiles_dir is None:
|
|
289
310
|
profiles_dir = default_profiles_dir()
|
|
@@ -297,21 +318,25 @@ class DbtAdapter(BaseAdapter):
|
|
|
297
318
|
profiles_dir=profiles_dir,
|
|
298
319
|
profile=profile_name,
|
|
299
320
|
project_only_flags={},
|
|
300
|
-
which=
|
|
321
|
+
which="list",
|
|
301
322
|
)
|
|
302
323
|
set_from_args(args, args)
|
|
303
324
|
|
|
304
325
|
from dbt.exceptions import DbtProjectError
|
|
326
|
+
|
|
305
327
|
try:
|
|
306
328
|
# adapter
|
|
307
|
-
if dbt_version <
|
|
329
|
+
if dbt_version < "v1.8":
|
|
308
330
|
runtime_config = RuntimeConfig.from_args(args)
|
|
309
331
|
adapter_name = runtime_config.credentials.type
|
|
310
332
|
adapter_cls = get_adapter_class_by_name(adapter_name)
|
|
311
333
|
adapter: SQLAdapter = adapter_cls(runtime_config)
|
|
312
334
|
else:
|
|
313
|
-
from dbt_common.context import set_invocation_context, get_invocation_context
|
|
314
335
|
from dbt.mp_context import get_mp_context
|
|
336
|
+
from dbt_common.context import (
|
|
337
|
+
get_invocation_context,
|
|
338
|
+
set_invocation_context,
|
|
339
|
+
)
|
|
315
340
|
|
|
316
341
|
set_invocation_context({})
|
|
317
342
|
get_invocation_context()._env = dict(os.environ)
|
|
@@ -320,6 +345,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
320
345
|
adapter_cls = get_adapter_class_by_name(adapter_name)
|
|
321
346
|
adapter: SQLAdapter = adapter_cls(runtime_config, get_mp_context())
|
|
322
347
|
from dbt.adapters.factory import FACTORY
|
|
348
|
+
|
|
323
349
|
FACTORY.adapters[adapter_name] = adapter
|
|
324
350
|
|
|
325
351
|
adapter.connections.set_connection_name()
|
|
@@ -329,7 +355,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
329
355
|
runtime_config=runtime_config,
|
|
330
356
|
adapter=adapter,
|
|
331
357
|
review_mode=review,
|
|
332
|
-
base_path=target_base_path
|
|
358
|
+
base_path=target_base_path,
|
|
333
359
|
)
|
|
334
360
|
except DbtProjectError as e:
|
|
335
361
|
raise e
|
|
@@ -350,27 +376,26 @@ class DbtAdapter(BaseAdapter):
|
|
|
350
376
|
|
|
351
377
|
def get_columns(self, model: str, base=False) -> List[Column]:
|
|
352
378
|
relation = self.create_relation(model, base)
|
|
353
|
-
get_columns_macro =
|
|
354
|
-
if self.adapter.connections.TYPE ==
|
|
355
|
-
get_columns_macro =
|
|
379
|
+
get_columns_macro = "get_columns_in_relation"
|
|
380
|
+
if self.adapter.connections.TYPE == "databricks":
|
|
381
|
+
get_columns_macro = "get_columns_comments"
|
|
356
382
|
|
|
357
|
-
if dbt_version <
|
|
383
|
+
if dbt_version < "v1.8":
|
|
358
384
|
columns = self.adapter.execute_macro(
|
|
359
|
-
get_columns_macro,
|
|
360
|
-
|
|
361
|
-
manifest=self.manifest)
|
|
385
|
+
get_columns_macro, kwargs={"relation": relation}, manifest=self.manifest
|
|
386
|
+
)
|
|
362
387
|
else:
|
|
363
388
|
from dbt.context.providers import generate_runtime_macro_context
|
|
389
|
+
|
|
364
390
|
macro_manifest = MacroManifest(self.manifest.macros)
|
|
365
391
|
self.adapter.set_macro_resolver(macro_manifest)
|
|
366
392
|
self.adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
367
|
-
columns = self.adapter.execute_macro(
|
|
368
|
-
get_columns_macro,
|
|
369
|
-
kwargs={"relation": relation})
|
|
393
|
+
columns = self.adapter.execute_macro(get_columns_macro, kwargs={"relation": relation})
|
|
370
394
|
|
|
371
|
-
if self.adapter.connections.TYPE ==
|
|
395
|
+
if self.adapter.connections.TYPE == "databricks":
|
|
372
396
|
# reference: get_columns_in_relation (dbt/adapters/databricks/impl.py)
|
|
373
397
|
from dbt.adapters.databricks import DatabricksColumn
|
|
398
|
+
|
|
374
399
|
rows = columns
|
|
375
400
|
columns = []
|
|
376
401
|
for row in rows:
|
|
@@ -378,7 +403,9 @@ class DbtAdapter(BaseAdapter):
|
|
|
378
403
|
break
|
|
379
404
|
columns.append(
|
|
380
405
|
DatabricksColumn(
|
|
381
|
-
column=row["col_name"],
|
|
406
|
+
column=row["col_name"],
|
|
407
|
+
dtype=row["data_type"],
|
|
408
|
+
comment=row["comment"],
|
|
382
409
|
)
|
|
383
410
|
)
|
|
384
411
|
return columns
|
|
@@ -389,29 +416,29 @@ class DbtAdapter(BaseAdapter):
|
|
|
389
416
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
390
417
|
manifest_dict = manifest.to_dict()
|
|
391
418
|
|
|
392
|
-
node = manifest_dict[
|
|
419
|
+
node = manifest_dict["nodes"].get(model_id)
|
|
393
420
|
if node is None:
|
|
394
421
|
return {}
|
|
395
422
|
|
|
396
|
-
node_name = node[
|
|
397
|
-
with self.adapter.connection_named(
|
|
423
|
+
node_name = node["name"]
|
|
424
|
+
with self.adapter.connection_named("model"):
|
|
398
425
|
columns = [column for column in self.get_columns(node_name, base=base)]
|
|
399
426
|
|
|
400
|
-
child_map: List[str] = manifest_dict[
|
|
427
|
+
child_map: List[str] = manifest_dict["child_map"][model_id]
|
|
401
428
|
cols_not_null = []
|
|
402
429
|
cols_unique = []
|
|
403
430
|
|
|
404
431
|
for child in child_map:
|
|
405
|
-
comps = child.split(
|
|
432
|
+
comps = child.split(".")
|
|
406
433
|
child_type = comps[0]
|
|
407
434
|
child_name = comps[2]
|
|
408
435
|
|
|
409
|
-
not_null_prefix = f
|
|
410
|
-
if child_type ==
|
|
411
|
-
cols_not_null.append(child_name[len(not_null_prefix):])
|
|
412
|
-
unique_prefix = f
|
|
413
|
-
if child_type ==
|
|
414
|
-
cols_unique.append(child_name[len(unique_prefix):])
|
|
436
|
+
not_null_prefix = f"not_null_{node_name}_"
|
|
437
|
+
if child_type == "test" and child_name.startswith(not_null_prefix):
|
|
438
|
+
cols_not_null.append(child_name[len(not_null_prefix) :])
|
|
439
|
+
unique_prefix = f"unique_{node_name}_"
|
|
440
|
+
if child_type == "test" and child_name.startswith(unique_prefix):
|
|
441
|
+
cols_unique.append(child_name[len(unique_prefix) :])
|
|
415
442
|
|
|
416
443
|
columns_info = {}
|
|
417
444
|
primary_key = None
|
|
@@ -419,16 +446,16 @@ class DbtAdapter(BaseAdapter):
|
|
|
419
446
|
col_name = c.column
|
|
420
447
|
col = dict(name=col_name, type=c.dtype)
|
|
421
448
|
if col_name in cols_not_null:
|
|
422
|
-
col[
|
|
449
|
+
col["not_null"] = True
|
|
423
450
|
if col_name in cols_unique:
|
|
424
|
-
col[
|
|
451
|
+
col["unique"] = True
|
|
425
452
|
if not primary_key:
|
|
426
453
|
primary_key = col_name
|
|
427
454
|
columns_info[col_name] = col
|
|
428
455
|
|
|
429
456
|
result = dict(columns=columns_info)
|
|
430
457
|
if primary_key:
|
|
431
|
-
result[
|
|
458
|
+
result["primary_key"] = primary_key
|
|
432
459
|
|
|
433
460
|
return result
|
|
434
461
|
|
|
@@ -437,7 +464,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
437
464
|
Load the artifacts from the 'target' and 'target-base' directory
|
|
438
465
|
"""
|
|
439
466
|
if self.runtime_config is None:
|
|
440
|
-
raise Exception(
|
|
467
|
+
raise Exception("Cannot find the dbt project configuration")
|
|
441
468
|
|
|
442
469
|
project_root = self.runtime_config.project_root
|
|
443
470
|
target_path = self.runtime_config.target_path
|
|
@@ -446,17 +473,17 @@ class DbtAdapter(BaseAdapter):
|
|
|
446
473
|
self.base_path = os.path.join(project_root, target_base_path)
|
|
447
474
|
|
|
448
475
|
# load the artifacts
|
|
449
|
-
path = os.path.join(project_root, target_path,
|
|
476
|
+
path = os.path.join(project_root, target_path, "manifest.json")
|
|
450
477
|
curr_manifest = load_manifest(path=path)
|
|
451
478
|
if curr_manifest is None:
|
|
452
479
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
453
|
-
path = os.path.join(project_root, target_base_path,
|
|
480
|
+
path = os.path.join(project_root, target_base_path, "manifest.json")
|
|
454
481
|
base_manifest = load_manifest(path=path)
|
|
455
482
|
if base_manifest is None:
|
|
456
483
|
raise FileNotFoundError(ENOENT, os.strerror(ENOENT), path)
|
|
457
484
|
|
|
458
|
-
curr_catalog = load_catalog(path=os.path.join(project_root, target_path,
|
|
459
|
-
base_catalog = load_catalog(path=os.path.join(project_root, target_base_path,
|
|
485
|
+
curr_catalog = load_catalog(path=os.path.join(project_root, target_path, "catalog.json"))
|
|
486
|
+
base_catalog = load_catalog(path=os.path.join(project_root, target_base_path, "catalog.json"))
|
|
460
487
|
|
|
461
488
|
# set the value if all the artifacts are loaded successfully
|
|
462
489
|
self.curr_manifest = curr_manifest
|
|
@@ -474,22 +501,21 @@ class DbtAdapter(BaseAdapter):
|
|
|
474
501
|
|
|
475
502
|
# set the file paths to watch
|
|
476
503
|
self.artifacts_files = [
|
|
477
|
-
os.path.join(project_root, target_path,
|
|
478
|
-
os.path.join(project_root, target_path,
|
|
479
|
-
os.path.join(project_root, target_base_path,
|
|
480
|
-
os.path.join(project_root, target_base_path,
|
|
504
|
+
os.path.join(project_root, target_path, "manifest.json"),
|
|
505
|
+
os.path.join(project_root, target_path, "catalog.json"),
|
|
506
|
+
os.path.join(project_root, target_base_path, "manifest.json"),
|
|
507
|
+
os.path.join(project_root, target_base_path, "catalog.json"),
|
|
481
508
|
]
|
|
482
509
|
|
|
483
510
|
def is_python_model(self, node_id: str, base: Optional[bool] = False):
|
|
484
511
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
485
512
|
model = manifest.nodes.get(node_id)
|
|
486
|
-
if hasattr(model,
|
|
487
|
-
return model.language ==
|
|
513
|
+
if hasattr(model, "language"):
|
|
514
|
+
return model.language == "python"
|
|
488
515
|
|
|
489
516
|
return False
|
|
490
517
|
|
|
491
518
|
def find_node_by_name(self, node_name, base=False) -> Optional[ManifestNode]:
|
|
492
|
-
|
|
493
519
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
494
520
|
|
|
495
521
|
for key, node in manifest.nodes.items():
|
|
@@ -499,22 +525,22 @@ class DbtAdapter(BaseAdapter):
|
|
|
499
525
|
return None
|
|
500
526
|
|
|
501
527
|
def get_node_name_by_id(self, unique_id):
|
|
502
|
-
if unique_id.startswith(
|
|
528
|
+
if unique_id.startswith("source."):
|
|
503
529
|
if unique_id in self.curr_manifest.sources:
|
|
504
530
|
return self.curr_manifest.sources[unique_id].name
|
|
505
531
|
elif unique_id in self.base_manifest.sources:
|
|
506
532
|
return self.base_manifest.sources[unique_id].name
|
|
507
|
-
elif unique_id.startswith(
|
|
533
|
+
elif unique_id.startswith("metric."):
|
|
508
534
|
if unique_id in self.curr_manifest.metrics:
|
|
509
535
|
return self.curr_manifest.metrics[unique_id].name
|
|
510
536
|
elif unique_id in self.base_manifest.metrics:
|
|
511
537
|
return self.base_manifest.metrics[unique_id].name
|
|
512
|
-
elif unique_id.startswith(
|
|
538
|
+
elif unique_id.startswith("exposure."):
|
|
513
539
|
if unique_id in self.curr_manifest.exposures:
|
|
514
540
|
return self.curr_manifest.exposures[unique_id].name
|
|
515
541
|
elif unique_id in self.base_manifest.exposures:
|
|
516
542
|
return self.base_manifest.exposures[unique_id].name
|
|
517
|
-
elif unique_id.startswith(
|
|
543
|
+
elif unique_id.startswith("semantic_model."):
|
|
518
544
|
if unique_id in self.curr_manifest.semantic_models:
|
|
519
545
|
return self.curr_manifest.semantic_models[unique_id].name
|
|
520
546
|
elif unique_id in self.base_manifest.semantic_models:
|
|
@@ -529,14 +555,24 @@ class DbtAdapter(BaseAdapter):
|
|
|
529
555
|
def get_manifest(self, base: bool):
|
|
530
556
|
return self.curr_manifest if base is False else self.base_manifest
|
|
531
557
|
|
|
532
|
-
def generate_sql(
|
|
558
|
+
def generate_sql(
|
|
559
|
+
self,
|
|
560
|
+
sql_template: str,
|
|
561
|
+
base: bool = False,
|
|
562
|
+
context=None,
|
|
563
|
+
provided_manifest=None,
|
|
564
|
+
):
|
|
533
565
|
if context is None:
|
|
534
566
|
context = {}
|
|
535
567
|
manifest = provided_manifest if provided_manifest is not None else as_manifest(self.get_manifest(base))
|
|
536
568
|
parser = SqlBlockParser(self.runtime_config, manifest, self.runtime_config)
|
|
537
569
|
|
|
538
|
-
if dbt_version >= dbt_version.parse(
|
|
539
|
-
from dbt_common.context import
|
|
570
|
+
if dbt_version >= dbt_version.parse("v1.8"):
|
|
571
|
+
from dbt_common.context import (
|
|
572
|
+
get_invocation_context,
|
|
573
|
+
set_invocation_context,
|
|
574
|
+
)
|
|
575
|
+
|
|
540
576
|
set_invocation_context({})
|
|
541
577
|
get_invocation_context()._env = dict(os.environ)
|
|
542
578
|
|
|
@@ -544,21 +580,27 @@ class DbtAdapter(BaseAdapter):
|
|
|
544
580
|
node = parser.parse_remote(sql_template, node_id)
|
|
545
581
|
process_node(self.runtime_config, manifest, node)
|
|
546
582
|
|
|
547
|
-
if dbt_version < dbt_version.parse(
|
|
583
|
+
if dbt_version < dbt_version.parse("v1.8"):
|
|
548
584
|
compiler = self.adapter.get_compiler()
|
|
549
585
|
compiler.compile_node(node, manifest, context)
|
|
550
586
|
return node.compiled_code
|
|
551
587
|
else:
|
|
552
|
-
from dbt.context.providers import generate_runtime_model_context
|
|
553
588
|
from dbt.clients import jinja
|
|
589
|
+
from dbt.context.providers import generate_runtime_model_context
|
|
590
|
+
|
|
554
591
|
jinja_ctx = generate_runtime_model_context(node, self.runtime_config, manifest)
|
|
555
592
|
jinja_ctx.update(context)
|
|
556
593
|
compiled_code = jinja.get_rendered(sql_template, jinja_ctx, node)
|
|
557
594
|
return compiled_code
|
|
558
595
|
|
|
559
|
-
def execute(
|
|
560
|
-
|
|
561
|
-
|
|
596
|
+
def execute(
|
|
597
|
+
self,
|
|
598
|
+
sql: str,
|
|
599
|
+
auto_begin: bool = False,
|
|
600
|
+
fetch: bool = False,
|
|
601
|
+
limit: Optional[int] = None,
|
|
602
|
+
) -> Tuple[any, agate.Table]:
|
|
603
|
+
if dbt_version < dbt_version.parse("v1.6"):
|
|
562
604
|
return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch)
|
|
563
605
|
|
|
564
606
|
return self.adapter.execute(sql, auto_begin=auto_begin, fetch=fetch, limit=limit)
|
|
@@ -569,7 +611,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
569
611
|
|
|
570
612
|
node_ids = nodes.keys()
|
|
571
613
|
parent_map = {}
|
|
572
|
-
for k, parents in manifest_dict[
|
|
614
|
+
for k, parents in manifest_dict["parent_map"].items():
|
|
573
615
|
if k not in node_ids:
|
|
574
616
|
continue
|
|
575
617
|
parent_map[k] = [parent for parent in parents if parent in node_ids]
|
|
@@ -580,8 +622,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
580
622
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
581
623
|
manifest_dict = manifest.to_dict()
|
|
582
624
|
|
|
583
|
-
if node_id in manifest_dict[
|
|
584
|
-
return manifest_dict[
|
|
625
|
+
if node_id in manifest_dict["parent_map"]:
|
|
626
|
+
return manifest_dict["parent_map"][node_id]
|
|
585
627
|
|
|
586
628
|
def get_lineage(self, base: Optional[bool] = False):
|
|
587
629
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
@@ -590,12 +632,14 @@ class DbtAdapter(BaseAdapter):
|
|
|
590
632
|
return self.get_lineage_cached(base, cache_key)
|
|
591
633
|
|
|
592
634
|
def get_lineage_diff(self) -> LineageDiff:
|
|
593
|
-
cache_key = hash(
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
635
|
+
cache_key = hash(
|
|
636
|
+
(
|
|
637
|
+
id(self.base_manifest),
|
|
638
|
+
id(self.base_catalog),
|
|
639
|
+
id(self.curr_manifest),
|
|
640
|
+
id(self.curr_catalog),
|
|
641
|
+
)
|
|
642
|
+
)
|
|
599
643
|
return self._get_lineage_diff_cached(cache_key)
|
|
600
644
|
|
|
601
645
|
@lru_cache(maxsize=2)
|
|
@@ -614,35 +658,35 @@ class DbtAdapter(BaseAdapter):
|
|
|
614
658
|
|
|
615
659
|
nodes = {}
|
|
616
660
|
|
|
617
|
-
for node in manifest_dict[
|
|
618
|
-
unique_id = node[
|
|
619
|
-
resource_type = node[
|
|
661
|
+
for node in manifest_dict["nodes"].values():
|
|
662
|
+
unique_id = node["unique_id"]
|
|
663
|
+
resource_type = node["resource_type"]
|
|
620
664
|
|
|
621
|
-
if resource_type not in [
|
|
665
|
+
if resource_type not in ["model", "seed", "exposure", "snapshot"]:
|
|
622
666
|
continue
|
|
623
667
|
|
|
624
668
|
nodes[unique_id] = {
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
669
|
+
"id": node["unique_id"],
|
|
670
|
+
"name": node["name"],
|
|
671
|
+
"resource_type": node["resource_type"],
|
|
672
|
+
"package_name": node["package_name"],
|
|
673
|
+
"schema": node["schema"],
|
|
674
|
+
"config": node["config"],
|
|
675
|
+
"checksum": node["checksum"],
|
|
676
|
+
"raw_code": node["raw_code"],
|
|
633
677
|
}
|
|
634
678
|
|
|
635
679
|
# List of <type>.<package_name>.<node_name>.<hash>
|
|
636
680
|
# model.jaffle_shop.customer_segments
|
|
637
681
|
# test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d
|
|
638
682
|
# test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1
|
|
639
|
-
child_map: List[str] = manifest_dict[
|
|
683
|
+
child_map: List[str] = manifest_dict["child_map"][unique_id]
|
|
640
684
|
cols_not_null = []
|
|
641
685
|
cols_unique = []
|
|
642
686
|
|
|
643
687
|
for child in child_map:
|
|
644
|
-
node_name = node[
|
|
645
|
-
comps = child.split(
|
|
688
|
+
node_name = node["name"]
|
|
689
|
+
comps = child.split(".")
|
|
646
690
|
if len(comps) < 2:
|
|
647
691
|
# only happens in unittest
|
|
648
692
|
continue
|
|
@@ -650,12 +694,12 @@ class DbtAdapter(BaseAdapter):
|
|
|
650
694
|
child_type = comps[0]
|
|
651
695
|
child_name = comps[2]
|
|
652
696
|
|
|
653
|
-
not_null_prefix = f
|
|
654
|
-
if child_type ==
|
|
655
|
-
cols_not_null.append(child_name[len(not_null_prefix):])
|
|
656
|
-
unique_prefix = f
|
|
657
|
-
if child_type ==
|
|
658
|
-
cols_unique.append(child_name[len(unique_prefix):])
|
|
697
|
+
not_null_prefix = f"not_null_{node_name}_"
|
|
698
|
+
if child_type == "test" and child_name.startswith(not_null_prefix):
|
|
699
|
+
cols_not_null.append(child_name[len(not_null_prefix) :])
|
|
700
|
+
unique_prefix = f"unique_{node_name}_"
|
|
701
|
+
if child_type == "test" and child_name.startswith(unique_prefix):
|
|
702
|
+
cols_unique.append(child_name[len(unique_prefix) :])
|
|
659
703
|
|
|
660
704
|
if catalog is not None and unique_id in catalog.nodes:
|
|
661
705
|
columns = {}
|
|
@@ -663,61 +707,58 @@ class DbtAdapter(BaseAdapter):
|
|
|
663
707
|
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
664
708
|
col = dict(name=col_name, type=col_metadata.type)
|
|
665
709
|
if col_name in cols_not_null:
|
|
666
|
-
col[
|
|
710
|
+
col["not_null"] = True
|
|
667
711
|
if col_name in cols_unique:
|
|
668
|
-
col[
|
|
712
|
+
col["unique"] = True
|
|
669
713
|
if not primary_key:
|
|
670
714
|
primary_key = col_name
|
|
671
715
|
columns[col_name] = col
|
|
672
|
-
nodes[unique_id][
|
|
716
|
+
nodes[unique_id]["columns"] = columns
|
|
673
717
|
if primary_key:
|
|
674
|
-
nodes[unique_id][
|
|
718
|
+
nodes[unique_id]["primary_key"] = primary_key
|
|
675
719
|
|
|
676
|
-
for source in manifest_dict[
|
|
677
|
-
unique_id = source[
|
|
720
|
+
for source in manifest_dict["sources"].values():
|
|
721
|
+
unique_id = source["unique_id"]
|
|
678
722
|
|
|
679
723
|
nodes[unique_id] = {
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
724
|
+
"id": source["unique_id"],
|
|
725
|
+
"name": source["name"],
|
|
726
|
+
"resource_type": source["resource_type"],
|
|
727
|
+
"package_name": source["package_name"],
|
|
728
|
+
"config": source["config"],
|
|
685
729
|
}
|
|
686
730
|
|
|
687
731
|
if catalog is not None and unique_id in catalog.sources:
|
|
688
|
-
nodes[unique_id][
|
|
689
|
-
col_name: {
|
|
690
|
-
'name': col_name,
|
|
691
|
-
'type': col_metadata.type
|
|
692
|
-
}
|
|
732
|
+
nodes[unique_id]["columns"] = {
|
|
733
|
+
col_name: {"name": col_name, "type": col_metadata.type}
|
|
693
734
|
for col_name, col_metadata in catalog.sources[unique_id].columns.items()
|
|
694
735
|
}
|
|
695
736
|
|
|
696
|
-
for exposure in manifest_dict[
|
|
697
|
-
nodes[exposure[
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
737
|
+
for exposure in manifest_dict["exposures"].values():
|
|
738
|
+
nodes[exposure["unique_id"]] = {
|
|
739
|
+
"id": exposure["unique_id"],
|
|
740
|
+
"name": exposure["name"],
|
|
741
|
+
"resource_type": exposure["resource_type"],
|
|
742
|
+
"package_name": exposure["package_name"],
|
|
743
|
+
"config": exposure["config"],
|
|
703
744
|
}
|
|
704
|
-
for metric in manifest_dict[
|
|
705
|
-
nodes[metric[
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
745
|
+
for metric in manifest_dict["metrics"].values():
|
|
746
|
+
nodes[metric["unique_id"]] = {
|
|
747
|
+
"id": metric["unique_id"],
|
|
748
|
+
"name": metric["name"],
|
|
749
|
+
"resource_type": metric["resource_type"],
|
|
750
|
+
"package_name": metric["package_name"],
|
|
751
|
+
"config": metric["config"],
|
|
711
752
|
}
|
|
712
753
|
|
|
713
|
-
if
|
|
714
|
-
for semantic_models in manifest_dict[
|
|
715
|
-
nodes[semantic_models[
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
754
|
+
if "semantic_models" in manifest_dict:
|
|
755
|
+
for semantic_models in manifest_dict["semantic_models"].values():
|
|
756
|
+
nodes[semantic_models["unique_id"]] = {
|
|
757
|
+
"id": semantic_models["unique_id"],
|
|
758
|
+
"name": semantic_models["name"],
|
|
759
|
+
"resource_type": semantic_models["resource_type"],
|
|
760
|
+
"package_name": semantic_models["package_name"],
|
|
761
|
+
"config": semantic_models["config"],
|
|
721
762
|
}
|
|
722
763
|
|
|
723
764
|
parent_map = self.build_parent_map(nodes, base)
|
|
@@ -725,7 +766,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
725
766
|
if base is False:
|
|
726
767
|
cll_tracker.end_lineage()
|
|
727
768
|
cll_tracker.set_total_nodes(len(nodes))
|
|
728
|
-
log_performance(
|
|
769
|
+
log_performance("model lineage", cll_tracker.to_dict())
|
|
729
770
|
cll_tracker.reset()
|
|
730
771
|
|
|
731
772
|
return dict(
|
|
@@ -739,10 +780,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
739
780
|
def _get_lineage_diff_cached(self, cache_key) -> LineageDiff:
|
|
740
781
|
base = self.get_lineage(base=True)
|
|
741
782
|
current = self.get_lineage(base=False)
|
|
742
|
-
keys = {
|
|
743
|
-
*base.get('nodes', {}).keys(),
|
|
744
|
-
*current.get('nodes', {}).keys()
|
|
745
|
-
}
|
|
783
|
+
keys = {*base.get("nodes", {}).keys(), *current.get("nodes", {}).keys()}
|
|
746
784
|
|
|
747
785
|
# Start to diff
|
|
748
786
|
perf_tracking = BreakingPerformanceTracking()
|
|
@@ -750,7 +788,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
750
788
|
|
|
751
789
|
base_manifest = as_manifest(self.get_manifest(True))
|
|
752
790
|
curr_manifest = as_manifest(self.get_manifest(False))
|
|
753
|
-
perf_tracking.record_checkpoint(
|
|
791
|
+
perf_tracking.record_checkpoint("manifest")
|
|
754
792
|
|
|
755
793
|
def ref_func(*args):
|
|
756
794
|
if len(args) == 1:
|
|
@@ -762,7 +800,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
762
800
|
return node
|
|
763
801
|
|
|
764
802
|
def source_func(source_name, table_name):
|
|
765
|
-
source_name = source_name.replace(
|
|
803
|
+
source_name = source_name.replace("-", "_")
|
|
766
804
|
return f"__{source_name}__{table_name}"
|
|
767
805
|
|
|
768
806
|
jinja_context = dict(
|
|
@@ -773,49 +811,47 @@ class DbtAdapter(BaseAdapter):
|
|
|
773
811
|
# for each node, compare the base and current lineage
|
|
774
812
|
diff = {}
|
|
775
813
|
for key in keys:
|
|
776
|
-
base_node = base.get(
|
|
777
|
-
curr_node = current.get(
|
|
814
|
+
base_node = base.get("nodes", {}).get(key)
|
|
815
|
+
curr_node = current.get("nodes", {}).get(key)
|
|
778
816
|
if base_node and curr_node:
|
|
779
|
-
base_checksum = base_node.get(
|
|
780
|
-
curr_checksum = curr_node.get(
|
|
817
|
+
base_checksum = base_node.get("checksum", {}).get("checksum")
|
|
818
|
+
curr_checksum = curr_node.get("checksum", {}).get("checksum")
|
|
781
819
|
change = None
|
|
782
820
|
if base_checksum is None or curr_checksum is None or base_checksum == curr_checksum:
|
|
783
821
|
continue
|
|
784
822
|
|
|
785
|
-
if curr_node.get(
|
|
823
|
+
if curr_node.get("resource_type") == "model":
|
|
786
824
|
try:
|
|
787
825
|
perf_tracking.increment_modified_nodes()
|
|
788
826
|
|
|
789
827
|
def _get_schema(lineage):
|
|
790
828
|
schema = {}
|
|
791
|
-
nodes = lineage[
|
|
792
|
-
parent_list = lineage[
|
|
829
|
+
nodes = lineage["nodes"]
|
|
830
|
+
parent_list = lineage["parent_map"].get(key, [])
|
|
793
831
|
for parent_id in parent_list:
|
|
794
832
|
parent_node = nodes.get(parent_id)
|
|
795
833
|
if parent_node is None:
|
|
796
834
|
continue
|
|
797
|
-
columns = parent_node.get(
|
|
798
|
-
name = parent_node.get(
|
|
799
|
-
if parent_node.get(
|
|
800
|
-
parts = parent_id.split(
|
|
835
|
+
columns = parent_node.get("columns") or {}
|
|
836
|
+
name = parent_node.get("name")
|
|
837
|
+
if parent_node.get("resource_type") == "source":
|
|
838
|
+
parts = parent_id.split(".")
|
|
801
839
|
source = parts[2]
|
|
802
840
|
table = parts[3]
|
|
803
|
-
source = source.replace(
|
|
841
|
+
source = source.replace("-", "_")
|
|
804
842
|
name = f"__{source}__{table}"
|
|
805
|
-
schema[name] = {
|
|
806
|
-
name: column.get('type') for name, column in columns.items()
|
|
807
|
-
}
|
|
843
|
+
schema[name] = {name: column.get("type") for name, column in columns.items()}
|
|
808
844
|
return schema
|
|
809
845
|
|
|
810
846
|
base_sql = self.generate_sql(
|
|
811
|
-
base_node.get(
|
|
847
|
+
base_node.get("raw_code"),
|
|
812
848
|
context=jinja_context,
|
|
813
|
-
provided_manifest=base_manifest
|
|
849
|
+
provided_manifest=base_manifest,
|
|
814
850
|
)
|
|
815
851
|
curr_sql = self.generate_sql(
|
|
816
|
-
curr_node.get(
|
|
852
|
+
curr_node.get("raw_code"),
|
|
817
853
|
context=jinja_context,
|
|
818
|
-
provided_manifest=curr_manifest
|
|
854
|
+
provided_manifest=curr_manifest,
|
|
819
855
|
)
|
|
820
856
|
base_schema = _get_schema(base)
|
|
821
857
|
curr_schema = _get_schema(current)
|
|
@@ -834,14 +870,13 @@ class DbtAdapter(BaseAdapter):
|
|
|
834
870
|
|
|
835
871
|
# Make sure that the case of the column names are the same
|
|
836
872
|
changed_columns = {
|
|
837
|
-
column.lower(): change_status
|
|
838
|
-
for column, change_status in (change.columns or {}).items()
|
|
873
|
+
column.lower(): change_status for column, change_status in (change.columns or {}).items()
|
|
839
874
|
}
|
|
840
875
|
changed_columns_names = set(changed_columns)
|
|
841
876
|
changed_columns_final = {}
|
|
842
877
|
|
|
843
|
-
base_columns = base_node.get(
|
|
844
|
-
curr_columns = curr_node.get(
|
|
878
|
+
base_columns = base_node.get("columns") or {}
|
|
879
|
+
curr_columns = curr_node.get("columns") or {}
|
|
845
880
|
columns_names = set(base_columns) | set(curr_columns)
|
|
846
881
|
|
|
847
882
|
for column_name in columns_names:
|
|
@@ -850,16 +885,16 @@ class DbtAdapter(BaseAdapter):
|
|
|
850
885
|
|
|
851
886
|
change.columns = changed_columns_final
|
|
852
887
|
except Exception:
|
|
853
|
-
change = NodeChange(category=
|
|
888
|
+
change = NodeChange(category="unknown")
|
|
854
889
|
|
|
855
|
-
diff[key] = NodeDiff(change_status=
|
|
890
|
+
diff[key] = NodeDiff(change_status="modified", change=change)
|
|
856
891
|
elif base_node:
|
|
857
|
-
diff[key] = NodeDiff(change_status=
|
|
892
|
+
diff[key] = NodeDiff(change_status="removed")
|
|
858
893
|
elif curr_node:
|
|
859
|
-
diff[key] = NodeDiff(change_status=
|
|
894
|
+
diff[key] = NodeDiff(change_status="added")
|
|
860
895
|
|
|
861
896
|
perf_tracking.end_lineage_diff()
|
|
862
|
-
log_performance(
|
|
897
|
+
log_performance("model lineage diff", perf_tracking.to_dict())
|
|
863
898
|
|
|
864
899
|
return LineageDiff(
|
|
865
900
|
base=base,
|
|
@@ -874,8 +909,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
874
909
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
875
910
|
manifest_dict = manifest.to_dict()
|
|
876
911
|
|
|
877
|
-
parent_ids = find_upstream(node_id, manifest_dict.get(
|
|
878
|
-
child_ids = find_downstream(node_id, manifest_dict.get(
|
|
912
|
+
parent_ids = find_upstream(node_id, manifest_dict.get("parent_map"))
|
|
913
|
+
child_ids = find_downstream(node_id, manifest_dict.get("child_map"))
|
|
879
914
|
cll_node_ids = parent_ids.union(child_ids)
|
|
880
915
|
cll_node_ids.add(node_id)
|
|
881
916
|
|
|
@@ -888,7 +923,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
888
923
|
|
|
889
924
|
cll_tracker.end_column_lineage()
|
|
890
925
|
cll_tracker.set_total_nodes(len(nodes))
|
|
891
|
-
log_performance(
|
|
926
|
+
log_performance("column level lineage", cll_tracker.to_dict())
|
|
892
927
|
cll_tracker.reset()
|
|
893
928
|
|
|
894
929
|
return dict(nodes=nodes)
|
|
@@ -900,8 +935,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
900
935
|
manifest = self.curr_manifest if base is False else self.base_manifest
|
|
901
936
|
manifest_dict = manifest.to_dict()
|
|
902
937
|
parent_list = []
|
|
903
|
-
if node_id in manifest_dict[
|
|
904
|
-
parent_list = manifest_dict[
|
|
938
|
+
if node_id in manifest_dict["parent_map"]:
|
|
939
|
+
parent_list = manifest_dict["parent_map"][node_id]
|
|
905
940
|
|
|
906
941
|
node = deepcopy(nodes[node_id])
|
|
907
942
|
self.append_column_lineage(node, parent_list, base)
|
|
@@ -909,49 +944,49 @@ class DbtAdapter(BaseAdapter):
|
|
|
909
944
|
|
|
910
945
|
def append_column_lineage(self, node: Dict, parent_list: List, base: Optional[bool] = False):
|
|
911
946
|
def _apply_all_columns(node, trans_type, depends_on):
|
|
912
|
-
for col in node.get(
|
|
913
|
-
col[
|
|
914
|
-
col[
|
|
947
|
+
for col in node.get("columns", {}).values():
|
|
948
|
+
col["transformation_type"] = trans_type
|
|
949
|
+
col["depends_on"] = depends_on
|
|
915
950
|
|
|
916
951
|
def _depend_node_to_id(column_lineage, nodes):
|
|
917
952
|
for cl in column_lineage.values():
|
|
918
953
|
for depend_on in cl.depends_on:
|
|
919
|
-
if depend_on.node.startswith(
|
|
954
|
+
if depend_on.node.startswith("__"):
|
|
920
955
|
for n in nodes.values():
|
|
921
|
-
if n.get(
|
|
956
|
+
if n.get("resource_type") != "source":
|
|
922
957
|
continue
|
|
923
958
|
# __source__table -> source.table
|
|
924
959
|
source_table = depend_on.node.lstrip("_").replace("__", ".", 1).lower()
|
|
925
|
-
if source_table in n.get(
|
|
926
|
-
depend_on.node = n.get(
|
|
960
|
+
if source_table in n.get("id"):
|
|
961
|
+
depend_on.node = n.get("id")
|
|
927
962
|
break
|
|
928
963
|
else:
|
|
929
964
|
for n in nodes.values():
|
|
930
|
-
if n.get(
|
|
931
|
-
depend_on.node = n.get(
|
|
965
|
+
if n.get("name") == depend_on.node.lower():
|
|
966
|
+
depend_on.node = n.get("id")
|
|
932
967
|
break
|
|
933
968
|
|
|
934
969
|
cll_tracker = CLLPerformanceTracking()
|
|
935
970
|
nodes = self.get_lineage_nodes_metadata(base=base)
|
|
936
971
|
manifest = as_manifest(self.get_manifest(base))
|
|
937
|
-
resource_type = node.get(
|
|
938
|
-
if resource_type not in {
|
|
972
|
+
resource_type = node.get("resource_type")
|
|
973
|
+
if resource_type not in {"model", "seed", "source", "snapshot"}:
|
|
939
974
|
return
|
|
940
975
|
|
|
941
|
-
if resource_type ==
|
|
942
|
-
_apply_all_columns(node,
|
|
976
|
+
if resource_type == "source" or resource_type == "seed":
|
|
977
|
+
_apply_all_columns(node, "source", [])
|
|
943
978
|
return
|
|
944
979
|
|
|
945
|
-
if node.get(
|
|
946
|
-
_apply_all_columns(node,
|
|
980
|
+
if node.get("raw_code") is None or self.is_python_model(node.get("id"), base=base):
|
|
981
|
+
_apply_all_columns(node, "unknown", [])
|
|
947
982
|
return
|
|
948
983
|
|
|
949
984
|
# dbt <= 1.8, MetricFlow expects the time spine table to be named metricflow_time_spine
|
|
950
|
-
if node.get(
|
|
951
|
-
_apply_all_columns(node,
|
|
985
|
+
if node.get("name") == "metricflow_time_spine":
|
|
986
|
+
_apply_all_columns(node, "source", [])
|
|
952
987
|
return
|
|
953
988
|
|
|
954
|
-
if not node.get(
|
|
989
|
+
if not node.get("columns", {}):
|
|
955
990
|
# no catalog
|
|
956
991
|
return
|
|
957
992
|
|
|
@@ -967,7 +1002,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
967
1002
|
def source_func(source_name, table_name):
|
|
968
1003
|
return f"__{source_name}__{table_name}"
|
|
969
1004
|
|
|
970
|
-
raw_code = node.get(
|
|
1005
|
+
raw_code = node.get("raw_code")
|
|
971
1006
|
jinja_context = dict(
|
|
972
1007
|
ref=ref_func,
|
|
973
1008
|
source=source_func,
|
|
@@ -978,16 +1013,14 @@ class DbtAdapter(BaseAdapter):
|
|
|
978
1013
|
parent_node = nodes.get(parent_id)
|
|
979
1014
|
if parent_node is None:
|
|
980
1015
|
continue
|
|
981
|
-
columns = parent_node.get(
|
|
982
|
-
name = parent_node.get(
|
|
983
|
-
if parent_node.get(
|
|
984
|
-
parts = parent_id.split(
|
|
1016
|
+
columns = parent_node.get("columns") or {}
|
|
1017
|
+
name = parent_node.get("name")
|
|
1018
|
+
if parent_node.get("resource_type") == "source":
|
|
1019
|
+
parts = parent_id.split(".")
|
|
985
1020
|
source = parts[2]
|
|
986
1021
|
table = parts[3]
|
|
987
1022
|
name = f"__{source}__{table}"
|
|
988
|
-
schema[name] = {
|
|
989
|
-
name: column.get('type') for name, column in columns.items()
|
|
990
|
-
}
|
|
1023
|
+
schema[name] = {name: column.get("type") for name, column in columns.items()}
|
|
991
1024
|
|
|
992
1025
|
try:
|
|
993
1026
|
# provide a manifest to speedup and not pollute the manifest
|
|
@@ -999,20 +1032,20 @@ class DbtAdapter(BaseAdapter):
|
|
|
999
1032
|
column_lineage = cll(compiled_sql, schema=schema, dialect=dialect)
|
|
1000
1033
|
except RecceException:
|
|
1001
1034
|
# TODO: provide parsing error message if needed
|
|
1002
|
-
_apply_all_columns(node,
|
|
1035
|
+
_apply_all_columns(node, "unknown", [])
|
|
1003
1036
|
cll_tracker.increment_sqlglot_error_nodes()
|
|
1004
1037
|
return
|
|
1005
1038
|
except Exception:
|
|
1006
|
-
_apply_all_columns(node,
|
|
1039
|
+
_apply_all_columns(node, "unknown", [])
|
|
1007
1040
|
cll_tracker.increment_other_error_nodes()
|
|
1008
1041
|
return
|
|
1009
1042
|
|
|
1010
1043
|
_depend_node_to_id(column_lineage, nodes)
|
|
1011
1044
|
|
|
1012
|
-
for name, column in node.get(
|
|
1045
|
+
for name, column in node.get("columns", {}).items():
|
|
1013
1046
|
if name in column_lineage:
|
|
1014
|
-
column[
|
|
1015
|
-
column[
|
|
1047
|
+
column["depends_on"] = column_lineage[name].depends_on
|
|
1048
|
+
column["transformation_type"] = column_lineage[name].type
|
|
1016
1049
|
|
|
1017
1050
|
@lru_cache(maxsize=2)
|
|
1018
1051
|
def get_lineage_nodes_metadata(self, base: Optional[bool] = False):
|
|
@@ -1021,18 +1054,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
1021
1054
|
manifest_dict = manifest.to_dict()
|
|
1022
1055
|
|
|
1023
1056
|
nodes = {}
|
|
1024
|
-
for node in manifest_dict[
|
|
1025
|
-
unique_id = node[
|
|
1026
|
-
resource_type = node[
|
|
1057
|
+
for node in manifest_dict["nodes"].values():
|
|
1058
|
+
unique_id = node["unique_id"]
|
|
1059
|
+
resource_type = node["resource_type"]
|
|
1027
1060
|
|
|
1028
|
-
if resource_type not in [
|
|
1061
|
+
if resource_type not in ["model", "seed", "exposure", "snapshot"]:
|
|
1029
1062
|
continue
|
|
1030
1063
|
|
|
1031
1064
|
nodes[unique_id] = {
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1065
|
+
"id": node["unique_id"],
|
|
1066
|
+
"name": node["name"],
|
|
1067
|
+
"resource_type": node["resource_type"],
|
|
1068
|
+
"raw_code": node["raw_code"],
|
|
1036
1069
|
}
|
|
1037
1070
|
|
|
1038
1071
|
if catalog is not None and unique_id in catalog.nodes:
|
|
@@ -1040,23 +1073,20 @@ class DbtAdapter(BaseAdapter):
|
|
|
1040
1073
|
for col_name, col_metadata in catalog.nodes[unique_id].columns.items():
|
|
1041
1074
|
col = dict(name=col_name, type=col_metadata.type)
|
|
1042
1075
|
columns[col_name] = col
|
|
1043
|
-
nodes[unique_id][
|
|
1076
|
+
nodes[unique_id]["columns"] = columns
|
|
1044
1077
|
|
|
1045
|
-
for source in manifest_dict[
|
|
1046
|
-
unique_id = source[
|
|
1078
|
+
for source in manifest_dict["sources"].values():
|
|
1079
|
+
unique_id = source["unique_id"]
|
|
1047
1080
|
|
|
1048
1081
|
nodes[unique_id] = {
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1082
|
+
"id": source["unique_id"],
|
|
1083
|
+
"name": source["name"],
|
|
1084
|
+
"resource_type": source["resource_type"],
|
|
1052
1085
|
}
|
|
1053
1086
|
|
|
1054
1087
|
if catalog is not None and unique_id in catalog.sources:
|
|
1055
|
-
nodes[unique_id][
|
|
1056
|
-
col_name: {
|
|
1057
|
-
'name': col_name,
|
|
1058
|
-
'type': col_metadata.type
|
|
1059
|
-
}
|
|
1088
|
+
nodes[unique_id]["columns"] = {
|
|
1089
|
+
col_name: {"name": col_name, "type": col_metadata.type}
|
|
1060
1090
|
for col_name, col_metadata in catalog.sources[unique_id].columns.items()
|
|
1061
1091
|
}
|
|
1062
1092
|
|
|
@@ -1067,8 +1097,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
1067
1097
|
base_manifest = self.get_manifest(base=True)
|
|
1068
1098
|
if unique_id in curr_manifest.nodes.keys() or unique_id in base_manifest.nodes.keys():
|
|
1069
1099
|
return {
|
|
1070
|
-
|
|
1071
|
-
|
|
1100
|
+
"current": curr_manifest.nodes.get(unique_id),
|
|
1101
|
+
"base": base_manifest.nodes.get(unique_id),
|
|
1072
1102
|
}
|
|
1073
1103
|
return None
|
|
1074
1104
|
|
|
@@ -1091,39 +1121,40 @@ class DbtAdapter(BaseAdapter):
|
|
|
1091
1121
|
if self.base_path:
|
|
1092
1122
|
self.artifacts_observer.schedule(event_handler, self.base_path, recursive=False)
|
|
1093
1123
|
self.artifacts_observer.start()
|
|
1094
|
-
logger.info(
|
|
1124
|
+
logger.info("Start monitoring dbt artifacts")
|
|
1095
1125
|
|
|
1096
1126
|
def stop_monitor_artifacts(self):
|
|
1097
1127
|
if self.artifacts_files:
|
|
1098
1128
|
self.artifacts_observer.stop()
|
|
1099
1129
|
self.artifacts_observer.join()
|
|
1100
|
-
logger.info(
|
|
1130
|
+
logger.info("Stop monitoring artifacts")
|
|
1101
1131
|
|
|
1102
1132
|
def start_monitor_base_env(self, callback: Callable = None):
|
|
1103
|
-
target_base_dir = os.path.join(self.runtime_config.project_root,
|
|
1133
|
+
target_base_dir = os.path.join(self.runtime_config.project_root, "target-base")
|
|
1104
1134
|
base_env_files = {
|
|
1105
|
-
os.path.join(target_base_dir,
|
|
1106
|
-
os.path.join(target_base_dir,
|
|
1135
|
+
os.path.join(target_base_dir, "manifest.json"),
|
|
1136
|
+
os.path.join(target_base_dir, "catalog.json"),
|
|
1107
1137
|
}
|
|
1108
1138
|
event_handler = EnvironmentEventHandler(self.base_env_observer, base_env_files, callback=callback)
|
|
1109
1139
|
self.base_env_observer.schedule(event_handler, self.runtime_config.project_root, recursive=True)
|
|
1110
1140
|
self.base_env_observer.start()
|
|
1111
|
-
logger.info(
|
|
1141
|
+
logger.info("Start monitoring base environment")
|
|
1112
1142
|
|
|
1113
1143
|
def stop_monitor_base_env(self):
|
|
1114
1144
|
if self.base_env_observer.is_alive():
|
|
1115
1145
|
self.base_env_observer.stop()
|
|
1116
1146
|
self.base_env_observer.join()
|
|
1117
|
-
logger.info(
|
|
1118
|
-
|
|
1119
|
-
def set_artifacts(
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1147
|
+
logger.info("Stop monitoring base environment")
|
|
1148
|
+
|
|
1149
|
+
def set_artifacts(
|
|
1150
|
+
self,
|
|
1151
|
+
base_manifest: WritableManifest,
|
|
1152
|
+
curr_manifest: WritableManifest,
|
|
1153
|
+
manifest: Manifest,
|
|
1154
|
+
previous_manifest: Manifest,
|
|
1155
|
+
base_catalog: CatalogArtifact,
|
|
1156
|
+
curr_catalog: CatalogArtifact,
|
|
1157
|
+
):
|
|
1127
1158
|
self.curr_manifest = curr_manifest
|
|
1128
1159
|
self.base_manifest = base_manifest
|
|
1129
1160
|
self.manifest = manifest
|
|
@@ -1132,7 +1163,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1132
1163
|
self.previous_state = previous_state(
|
|
1133
1164
|
Path(self.base_path),
|
|
1134
1165
|
Path(self.runtime_config.target_path),
|
|
1135
|
-
Path(self.runtime_config.project_root)
|
|
1166
|
+
Path(self.runtime_config.project_root),
|
|
1136
1167
|
)
|
|
1137
1168
|
self.previous_state.manifest = previous_manifest
|
|
1138
1169
|
|
|
@@ -1154,18 +1185,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
1154
1185
|
# we capture the original manifest as base and only update the current
|
|
1155
1186
|
target_type = os.path.basename(os.path.dirname(refresh_file_path))
|
|
1156
1187
|
if self.target_path and target_type == os.path.basename(self.target_path):
|
|
1157
|
-
if refresh_file_path.endswith(
|
|
1188
|
+
if refresh_file_path.endswith("manifest.json"):
|
|
1158
1189
|
self.curr_manifest = load_manifest(path=refresh_file_path)
|
|
1159
1190
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1160
1191
|
self.get_cll_cached.cache_clear()
|
|
1161
1192
|
self.get_lineage_nodes_metadata.cache_clear()
|
|
1162
|
-
elif refresh_file_path.endswith(
|
|
1193
|
+
elif refresh_file_path.endswith("catalog.json"):
|
|
1163
1194
|
self.curr_catalog = load_catalog(path=refresh_file_path)
|
|
1164
1195
|
self.get_lineage_nodes_metadata.cache_clear()
|
|
1165
1196
|
elif self.base_path and target_type == os.path.basename(self.base_path):
|
|
1166
|
-
if refresh_file_path.endswith(
|
|
1197
|
+
if refresh_file_path.endswith("manifest.json"):
|
|
1167
1198
|
self.base_manifest = load_manifest(path=refresh_file_path)
|
|
1168
|
-
elif refresh_file_path.endswith(
|
|
1199
|
+
elif refresh_file_path.endswith("catalog.json"):
|
|
1169
1200
|
self.base_catalog = load_catalog(path=refresh_file_path)
|
|
1170
1201
|
|
|
1171
1202
|
def create_relation(self, model, base=False):
|
|
@@ -1180,18 +1211,22 @@ class DbtAdapter(BaseAdapter):
|
|
|
1180
1211
|
select: Optional[str] = None,
|
|
1181
1212
|
exclude: Optional[str] = None,
|
|
1182
1213
|
packages: Optional[list[str]] = None,
|
|
1183
|
-
view_mode: Optional[Literal[
|
|
1214
|
+
view_mode: Optional[Literal["all", "changed_models"]] = None,
|
|
1184
1215
|
) -> Set[str]:
|
|
1185
|
-
from dbt.graph import NodeSelector
|
|
1186
|
-
from dbt.compilation import Compiler
|
|
1187
|
-
from dbt.graph import parse_difference, SelectionIntersection, SelectionUnion
|
|
1188
1216
|
import dbt.compilation
|
|
1217
|
+
from dbt.compilation import Compiler
|
|
1218
|
+
from dbt.graph import (
|
|
1219
|
+
NodeSelector,
|
|
1220
|
+
SelectionIntersection,
|
|
1221
|
+
SelectionUnion,
|
|
1222
|
+
parse_difference,
|
|
1223
|
+
)
|
|
1189
1224
|
|
|
1190
1225
|
select_list = [select] if select else None
|
|
1191
1226
|
exclude_list = [exclude] if exclude else None
|
|
1192
1227
|
|
|
1193
1228
|
def _parse_difference(include, exclude):
|
|
1194
|
-
if dbt_version <
|
|
1229
|
+
if dbt_version < "v1.8":
|
|
1195
1230
|
return parse_difference(include, exclude, "eager")
|
|
1196
1231
|
else:
|
|
1197
1232
|
return parse_difference(include, exclude)
|
|
@@ -1199,10 +1234,10 @@ class DbtAdapter(BaseAdapter):
|
|
|
1199
1234
|
specs = [_parse_difference(select_list, exclude_list)]
|
|
1200
1235
|
|
|
1201
1236
|
if packages is not None:
|
|
1202
|
-
package_spec = SelectionUnion([_parse_difference([f
|
|
1237
|
+
package_spec = SelectionUnion([_parse_difference([f"package:{p}"], None) for p in packages])
|
|
1203
1238
|
specs.append(package_spec)
|
|
1204
|
-
if view_mode and view_mode ==
|
|
1205
|
-
specs.append(_parse_difference([
|
|
1239
|
+
if view_mode and view_mode == "changed_models":
|
|
1240
|
+
specs.append(_parse_difference(["1+state:modified+"], None))
|
|
1206
1241
|
spec = SelectionIntersection(specs)
|
|
1207
1242
|
|
|
1208
1243
|
manifest = Manifest()
|
|
@@ -1215,8 +1250,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
1215
1250
|
for node_id, node in manifest_prev.nodes.items():
|
|
1216
1251
|
if node_id not in manifest.nodes:
|
|
1217
1252
|
node_dict = node.to_dict()
|
|
1218
|
-
if
|
|
1219
|
-
node_dict[
|
|
1253
|
+
if "raw_code" in node_dict:
|
|
1254
|
+
node_dict["raw_code"] = "__removed__"
|
|
1220
1255
|
node_class = type(node)
|
|
1221
1256
|
removed_node = node_class.from_dict(node_dict)
|
|
1222
1257
|
manifest.nodes[node_id] = removed_node
|
|
@@ -1225,8 +1260,11 @@ class DbtAdapter(BaseAdapter):
|
|
|
1225
1260
|
manifest.sources = {**manifest_prev.sources, **manifest_curr.sources}
|
|
1226
1261
|
manifest.exposures = {**manifest_prev.exposures, **manifest_curr.exposures}
|
|
1227
1262
|
manifest.metrics = {**manifest_prev.metrics, **manifest_curr.metrics}
|
|
1228
|
-
if hasattr(manifest_prev,
|
|
1229
|
-
manifest.semantic_models = {
|
|
1263
|
+
if hasattr(manifest_prev, "semantic_models"):
|
|
1264
|
+
manifest.semantic_models = {
|
|
1265
|
+
**manifest_prev.semantic_models,
|
|
1266
|
+
**manifest_curr.semantic_models,
|
|
1267
|
+
}
|
|
1230
1268
|
|
|
1231
1269
|
compiler = Compiler(self.runtime_config)
|
|
1232
1270
|
# disable to print compile states
|
|
@@ -1241,28 +1279,28 @@ class DbtAdapter(BaseAdapter):
|
|
|
1241
1279
|
return selector.get_selected(spec)
|
|
1242
1280
|
|
|
1243
1281
|
def export_artifacts(self) -> ArtifactsRoot:
|
|
1244
|
-
|
|
1282
|
+
"""
|
|
1245
1283
|
Export the artifacts from the current state
|
|
1246
|
-
|
|
1284
|
+
"""
|
|
1247
1285
|
artifacts = ArtifactsRoot()
|
|
1248
1286
|
|
|
1249
1287
|
def _load_artifact(artifact):
|
|
1250
1288
|
return artifact.to_dict() if artifact else None
|
|
1251
1289
|
|
|
1252
1290
|
artifacts.base = {
|
|
1253
|
-
|
|
1254
|
-
|
|
1291
|
+
"manifest": _load_artifact(self.base_manifest),
|
|
1292
|
+
"catalog": _load_artifact(self.base_catalog),
|
|
1255
1293
|
}
|
|
1256
1294
|
artifacts.current = {
|
|
1257
|
-
|
|
1258
|
-
|
|
1295
|
+
"manifest": _load_artifact(self.curr_manifest),
|
|
1296
|
+
"catalog": _load_artifact(self.curr_catalog),
|
|
1259
1297
|
}
|
|
1260
1298
|
return artifacts
|
|
1261
1299
|
|
|
1262
1300
|
def export_artifacts_from_file(self) -> ArtifactsRoot:
|
|
1263
|
-
|
|
1301
|
+
"""
|
|
1264
1302
|
Export the artifacts from the state file. This is the old implementation
|
|
1265
|
-
|
|
1303
|
+
"""
|
|
1266
1304
|
artifacts = ArtifactsRoot()
|
|
1267
1305
|
target_path = self.runtime_config.target_path
|
|
1268
1306
|
target_base_path = self.base_path
|
|
@@ -1271,18 +1309,18 @@ class DbtAdapter(BaseAdapter):
|
|
|
1271
1309
|
if not os.path.isfile(path):
|
|
1272
1310
|
return None
|
|
1273
1311
|
|
|
1274
|
-
with open(path,
|
|
1312
|
+
with open(path, "r") as f:
|
|
1275
1313
|
json_content = f.read()
|
|
1276
1314
|
return json.loads(json_content)
|
|
1277
1315
|
|
|
1278
1316
|
project_root = self.runtime_config.project_root
|
|
1279
1317
|
artifacts.base = {
|
|
1280
|
-
|
|
1281
|
-
|
|
1318
|
+
"manifest": _load_artifact(os.path.join(project_root, target_base_path, "manifest.json")),
|
|
1319
|
+
"catalog": _load_artifact(os.path.join(project_root, target_base_path, "catalog.json")),
|
|
1282
1320
|
}
|
|
1283
1321
|
artifacts.current = {
|
|
1284
|
-
|
|
1285
|
-
|
|
1322
|
+
"manifest": _load_artifact(os.path.join(project_root, target_path, "manifest.json")),
|
|
1323
|
+
"catalog": _load_artifact(os.path.join(project_root, target_path, "catalog.json")),
|
|
1286
1324
|
}
|
|
1287
1325
|
return artifacts
|
|
1288
1326
|
|
|
@@ -1290,7 +1328,7 @@ class DbtAdapter(BaseAdapter):
|
|
|
1290
1328
|
# Merge the artifacts from the state file or cloud
|
|
1291
1329
|
def _select_artifact(
|
|
1292
1330
|
original: Union[WritableManifest, CatalogArtifact],
|
|
1293
|
-
new: Union[WritableManifest, CatalogArtifact]
|
|
1331
|
+
new: Union[WritableManifest, CatalogArtifact],
|
|
1294
1332
|
):
|
|
1295
1333
|
if merge:
|
|
1296
1334
|
if not original:
|
|
@@ -1301,16 +1339,16 @@ class DbtAdapter(BaseAdapter):
|
|
|
1301
1339
|
else:
|
|
1302
1340
|
return new
|
|
1303
1341
|
|
|
1304
|
-
self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get(
|
|
1305
|
-
self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get(
|
|
1306
|
-
self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get(
|
|
1307
|
-
self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get(
|
|
1342
|
+
self.base_manifest = _select_artifact(self.base_manifest, load_manifest(data=artifacts.base.get("manifest")))
|
|
1343
|
+
self.curr_manifest = _select_artifact(self.curr_manifest, load_manifest(data=artifacts.current.get("manifest")))
|
|
1344
|
+
self.base_catalog = _select_artifact(self.base_catalog, load_catalog(data=artifacts.base.get("catalog")))
|
|
1345
|
+
self.curr_catalog = _select_artifact(self.curr_catalog, load_catalog(data=artifacts.current.get("catalog")))
|
|
1308
1346
|
|
|
1309
1347
|
self.manifest = as_manifest(self.curr_manifest)
|
|
1310
1348
|
self.previous_state = previous_state(
|
|
1311
1349
|
Path(self.base_path),
|
|
1312
1350
|
Path(self.runtime_config.target_path),
|
|
1313
|
-
Path(self.runtime_config.project_root)
|
|
1351
|
+
Path(self.runtime_config.project_root),
|
|
1314
1352
|
)
|
|
1315
1353
|
self.previous_state.manifest = as_manifest(self.base_manifest)
|
|
1316
1354
|
|
|
@@ -1326,7 +1364,8 @@ class DbtAdapter(BaseAdapter):
|
|
|
1326
1364
|
|
|
1327
1365
|
if not self.curr_manifest or not self.base_manifest:
|
|
1328
1366
|
raise Exception(
|
|
1329
|
-
|
|
1367
|
+
"No enough dbt artifacts in the state file. Please use the latest recce to generate the recce state"
|
|
1368
|
+
)
|
|
1330
1369
|
|
|
1331
1370
|
@contextmanager
|
|
1332
1371
|
def connection_named(self, name: str) -> Iterator[None]:
|