altimate-datapilot-cli 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- altimate_datapilot_cli-0.0.8.dist-info/AUTHORS.rst +5 -0
- altimate_datapilot_cli-0.0.8.dist-info/LICENSE +9 -0
- altimate_datapilot_cli-0.0.8.dist-info/METADATA +102 -0
- altimate_datapilot_cli-0.0.8.dist-info/RECORD +139 -0
- altimate_datapilot_cli-0.0.8.dist-info/WHEEL +5 -0
- altimate_datapilot_cli-0.0.8.dist-info/entry_points.txt +4 -0
- altimate_datapilot_cli-0.0.8.dist-info/top_level.txt +1 -0
- datapilot/__init__.py +1 -0
- datapilot/__main__.py +14 -0
- datapilot/cli/__init__.py +0 -0
- datapilot/cli/main.py +11 -0
- datapilot/clients/__init__.py +0 -0
- datapilot/clients/altimate/__init__.py +0 -0
- datapilot/clients/altimate/client.py +85 -0
- datapilot/clients/altimate/utils.py +75 -0
- datapilot/config/__init__.py +0 -0
- datapilot/config/config.py +16 -0
- datapilot/config/utils.py +32 -0
- datapilot/core/__init__.py +0 -0
- datapilot/core/insights/__init__.py +2 -0
- datapilot/core/insights/base/__init__.py +0 -0
- datapilot/core/insights/base/insight.py +34 -0
- datapilot/core/insights/report.py +16 -0
- datapilot/core/insights/schema.py +24 -0
- datapilot/core/insights/sql/__init__.py +0 -0
- datapilot/core/insights/sql/base/__init__.py +0 -0
- datapilot/core/insights/sql/base/insight.py +18 -0
- datapilot/core/insights/sql/runtime/__init__.py +0 -0
- datapilot/core/insights/sql/static/__init__.py +0 -0
- datapilot/core/insights/utils.py +20 -0
- datapilot/core/platforms/__init__.py +0 -0
- datapilot/core/platforms/dbt/__init__.py +0 -0
- datapilot/core/platforms/dbt/cli/__init__.py +0 -0
- datapilot/core/platforms/dbt/cli/cli.py +112 -0
- datapilot/core/platforms/dbt/constants.py +34 -0
- datapilot/core/platforms/dbt/exceptions.py +6 -0
- datapilot/core/platforms/dbt/executor.py +157 -0
- datapilot/core/platforms/dbt/factory.py +22 -0
- datapilot/core/platforms/dbt/formatting.py +45 -0
- datapilot/core/platforms/dbt/hooks/__init__.py +0 -0
- datapilot/core/platforms/dbt/hooks/executor_hook.py +86 -0
- datapilot/core/platforms/dbt/insights/__init__.py +115 -0
- datapilot/core/platforms/dbt/insights/base.py +133 -0
- datapilot/core/platforms/dbt/insights/checks/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/checks/base.py +26 -0
- datapilot/core/platforms/dbt/insights/checks/check_column_desc_are_same.py +105 -0
- datapilot/core/platforms/dbt/insights/checks/check_column_name_contract.py +154 -0
- datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +75 -0
- datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +63 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_all_columns.py +96 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_labels_keys.py +112 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_meta_keys.py +108 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_properties_file.py +64 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +118 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +114 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +119 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +129 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +132 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +135 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_database.py +109 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_schema.py +109 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_tags.py +87 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_childs.py +97 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_columns_have_desc.py +96 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_all_columns.py +103 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +94 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_labels_keys.py +110 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_loader.py +62 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +117 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests.py +82 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +117 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +113 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +119 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_table_has_description.py +62 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_tags.py +76 -0
- datapilot/core/platforms/dbt/insights/dbt_test/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/dbt_test/base.py +23 -0
- datapilot/core/platforms/dbt/insights/dbt_test/missing_primary_key_tests.py +130 -0
- datapilot/core/platforms/dbt/insights/dbt_test/test_coverage.py +118 -0
- datapilot/core/platforms/dbt/insights/governance/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/governance/base.py +23 -0
- datapilot/core/platforms/dbt/insights/governance/documentation_on_stale_columns.py +130 -0
- datapilot/core/platforms/dbt/insights/governance/exposures_dependent_on_private_models.py +90 -0
- datapilot/core/platforms/dbt/insights/governance/public_models_without_contracts.py +89 -0
- datapilot/core/platforms/dbt/insights/governance/undocumented_columns.py +148 -0
- datapilot/core/platforms/dbt/insights/governance/undocumented_public_models.py +110 -0
- datapilot/core/platforms/dbt/insights/modelling/README.md +15 -0
- datapilot/core/platforms/dbt/insights/modelling/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/modelling/base.py +31 -0
- datapilot/core/platforms/dbt/insights/modelling/direct_join_to_source.py +125 -0
- datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +113 -0
- datapilot/core/platforms/dbt/insights/modelling/duplicate_sources.py +85 -0
- datapilot/core/platforms/dbt/insights/modelling/hard_coded_references.py +80 -0
- datapilot/core/platforms/dbt/insights/modelling/joining_of_upstream_concepts.py +79 -0
- datapilot/core/platforms/dbt/insights/modelling/model_fanout.py +126 -0
- datapilot/core/platforms/dbt/insights/modelling/multiple_sources_joined.py +83 -0
- datapilot/core/platforms/dbt/insights/modelling/root_model.py +82 -0
- datapilot/core/platforms/dbt/insights/modelling/source_fanout.py +102 -0
- datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_downstream_models.py +103 -0
- datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_staging_models.py +89 -0
- datapilot/core/platforms/dbt/insights/modelling/unused_sources.py +59 -0
- datapilot/core/platforms/dbt/insights/performance/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/performance/base.py +26 -0
- datapilot/core/platforms/dbt/insights/performance/chain_view_linking.py +92 -0
- datapilot/core/platforms/dbt/insights/performance/exposure_parent_materializations.py +104 -0
- datapilot/core/platforms/dbt/insights/schema.py +72 -0
- datapilot/core/platforms/dbt/insights/structure/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/structure/base.py +33 -0
- datapilot/core/platforms/dbt/insights/structure/model_directories_structure.py +92 -0
- datapilot/core/platforms/dbt/insights/structure/model_naming_conventions.py +97 -0
- datapilot/core/platforms/dbt/insights/structure/source_directories_structure.py +80 -0
- datapilot/core/platforms/dbt/insights/structure/test_directory_structure.py +74 -0
- datapilot/core/platforms/dbt/insights/utils.py +9 -0
- datapilot/core/platforms/dbt/schemas/__init__.py +0 -0
- datapilot/core/platforms/dbt/schemas/catalog.py +73 -0
- datapilot/core/platforms/dbt/schemas/manifest.py +462 -0
- datapilot/core/platforms/dbt/utils.py +525 -0
- datapilot/core/platforms/dbt/wrappers/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/v1/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/v1/wrapper.py +18 -0
- datapilot/core/platforms/dbt/wrappers/catalog/wrapper.py +9 -0
- datapilot/core/platforms/dbt/wrappers/manifest/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/schemas.py +47 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/wrapper.py +396 -0
- datapilot/core/platforms/dbt/wrappers/manifest/wrapper.py +35 -0
- datapilot/core/platforms/dbt/wrappers/run_results/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/run_results/run_results.py +39 -0
- datapilot/exceptions/__init__.py +0 -0
- datapilot/exceptions/exceptions.py +10 -0
- datapilot/schemas/__init__.py +0 -0
- datapilot/schemas/constants.py +5 -0
- datapilot/schemas/nodes.py +19 -0
- datapilot/schemas/sql.py +10 -0
- datapilot/utils/__init__.py +0 -0
- datapilot/utils/formatting/__init__.py +0 -0
- datapilot/utils/formatting/utils.py +59 -0
- datapilot/utils/utils.py +317 -0
datapilot/utils/utils.py
ADDED
@@ -0,0 +1,317 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
import subprocess
|
5
|
+
import tempfile
|
6
|
+
import uuid
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import Dict
|
9
|
+
from typing import List
|
10
|
+
from typing import Union
|
11
|
+
|
12
|
+
from dbt_artifacts_parser.parser import parse_catalog
|
13
|
+
from dbt_artifacts_parser.parser import parse_manifest
|
14
|
+
|
15
|
+
from datapilot.config.config import load_config
|
16
|
+
from datapilot.schemas.nodes import ModelNode
|
17
|
+
from datapilot.schemas.nodes import SourceNode
|
18
|
+
|
19
|
+
|
20
|
+
def load_json(file_path: str) -> Dict:
|
21
|
+
try:
|
22
|
+
with Path(file_path).open() as f:
|
23
|
+
return json.load(f)
|
24
|
+
except FileNotFoundError:
|
25
|
+
raise
|
26
|
+
except json.decoder.JSONDecodeError as e:
|
27
|
+
raise ValueError(f"Invalid JSON file: {file_path}") from e
|
28
|
+
except IsADirectoryError as e:
|
29
|
+
raise ValueError(f"Please provide a A valid manifest file path. {file_path} is a directory") from e
|
30
|
+
|
31
|
+
|
32
|
+
def extract_dir_name_from_file_path(path: str) -> str:
|
33
|
+
# Handle both Windows and Linux paths using os.path
|
34
|
+
# Get root directory name
|
35
|
+
return Path(path).parent.name
|
36
|
+
|
37
|
+
|
38
|
+
def extract_folders_in_path(path: str) -> list:
|
39
|
+
# Split the path into parts
|
40
|
+
path_parts = path.split(os.path.sep)
|
41
|
+
|
42
|
+
# Exclude the last part if it's a file (has a file extension)
|
43
|
+
if "." in path_parts[-1]:
|
44
|
+
path_parts = path_parts[:-1]
|
45
|
+
path_parts = [part for part in path_parts if part != ""]
|
46
|
+
return path_parts
|
47
|
+
|
48
|
+
|
49
|
+
def get_dir_path(path: str) -> str:
|
50
|
+
"""
|
51
|
+
Get the directory path of a file path.
|
52
|
+
For example, if the path is /a/b/c/d.txt, the directory path is /a/b/c
|
53
|
+
|
54
|
+
:param path:
|
55
|
+
:return:
|
56
|
+
"""
|
57
|
+
return Path(path).parent
|
58
|
+
|
59
|
+
|
60
|
+
def is_superset_path(superset_path: str, path: str):
|
61
|
+
"""
|
62
|
+
Check if the path is a sub-path of the superset path.
|
63
|
+
|
64
|
+
:param superset_path: The superset path
|
65
|
+
:param path: The path to be checked
|
66
|
+
:return: True if the path is a sub-path of the superset path, False otherwise
|
67
|
+
"""
|
68
|
+
|
69
|
+
try:
|
70
|
+
Path(path).relative_to(superset_path)
|
71
|
+
return True
|
72
|
+
except ValueError:
|
73
|
+
return False
|
74
|
+
|
75
|
+
|
76
|
+
def get_changed_files(include_untracked=True):
|
77
|
+
command = ["git", "status", "--porcelain"]
|
78
|
+
if include_untracked:
|
79
|
+
command.append("-uall")
|
80
|
+
result = subprocess.run(command, capture_output=True, text=True) # noqa
|
81
|
+
changed_files = []
|
82
|
+
for line in result.stdout.splitlines():
|
83
|
+
if line.startswith("??") and include_untracked:
|
84
|
+
changed_files.append(line.split()[1])
|
85
|
+
elif line.startswith(("M", "A", "D", "R", " M", " A", " D", " R")):
|
86
|
+
changed_files.append(line.split()[1])
|
87
|
+
return changed_files
|
88
|
+
|
89
|
+
|
90
|
+
def get_tmp_dir_path():
|
91
|
+
tmp_dir = Path(tempfile.gettempdir()) / str(uuid.uuid4())
|
92
|
+
tmp_dir.mkdir(parents=True, exist_ok=True)
|
93
|
+
return tmp_dir
|
94
|
+
|
95
|
+
|
96
|
+
def get_column_type(dtype: str) -> str:
|
97
|
+
dtype = dtype.lower()
|
98
|
+
if re.match(r".*int.*", dtype):
|
99
|
+
return "INTEGER"
|
100
|
+
elif re.match(r".*float.*", dtype):
|
101
|
+
return "FLOAT"
|
102
|
+
elif re.match(r".*bool.*", dtype):
|
103
|
+
return "BOOLEAN"
|
104
|
+
elif re.match(r".*date.*", dtype):
|
105
|
+
return "DATE"
|
106
|
+
elif re.match(r".*time.*", dtype):
|
107
|
+
return "TIME"
|
108
|
+
elif re.match(r".*timestamp.*", dtype):
|
109
|
+
return "TIMESTAMP"
|
110
|
+
elif re.match(r".*text.*", dtype):
|
111
|
+
return "TEXT"
|
112
|
+
elif re.match(r".*char.*", dtype):
|
113
|
+
return "TEXT"
|
114
|
+
elif re.match(r".*varchar.*", dtype):
|
115
|
+
return "TEXT"
|
116
|
+
elif re.match(r".*numeric.*", dtype):
|
117
|
+
return "NUMERIC"
|
118
|
+
elif re.match(r".*decimal.*", dtype):
|
119
|
+
return "DECIMAL"
|
120
|
+
elif re.match(r".*double.*", dtype):
|
121
|
+
return "DOUBLE"
|
122
|
+
elif re.match(r".*real.*", dtype):
|
123
|
+
return "REAL"
|
124
|
+
else:
|
125
|
+
return "TEXT"
|
126
|
+
|
127
|
+
|
128
|
+
def get_manifest_model_nodes(manifest: Dict, models: List) -> List[ModelNode]:
|
129
|
+
nodes = []
|
130
|
+
for node in manifest["nodes"].values():
|
131
|
+
if node["name"] in models:
|
132
|
+
if node["resource_type"] == "model" and node["config"]["materialized"] in ["table", "view"]:
|
133
|
+
nodes.append(
|
134
|
+
ModelNode(
|
135
|
+
unique_id=node["unique_id"],
|
136
|
+
name=node["name"],
|
137
|
+
resource_type=node["resource_type"],
|
138
|
+
database=node["database"],
|
139
|
+
alias=node["alias"],
|
140
|
+
table_schema=node["schema"],
|
141
|
+
)
|
142
|
+
)
|
143
|
+
return nodes
|
144
|
+
|
145
|
+
|
146
|
+
def get_manifest_source_nodes(manifest: Dict, sources: List) -> List[SourceNode]:
|
147
|
+
nodes = []
|
148
|
+
for node in manifest["sources"].values():
|
149
|
+
if node["source_name"] in sources:
|
150
|
+
nodes.append(
|
151
|
+
SourceNode(
|
152
|
+
unique_id=node["unique_id"],
|
153
|
+
name=node["source_name"],
|
154
|
+
resource_type=node["resource_type"],
|
155
|
+
table=node["identifier"],
|
156
|
+
database=node["database"],
|
157
|
+
table_schema=node["schema"],
|
158
|
+
)
|
159
|
+
)
|
160
|
+
return nodes
|
161
|
+
|
162
|
+
|
163
|
+
def get_model_tables(models: List[ModelNode]) -> List[str]:
|
164
|
+
tables = []
|
165
|
+
for model in models:
|
166
|
+
tables.append(f"{model.database}.{model.table_schema}.{model.alias}")
|
167
|
+
return tables
|
168
|
+
|
169
|
+
|
170
|
+
def get_source_tables(sources: List[SourceNode]) -> List[str]:
|
171
|
+
tables = []
|
172
|
+
for source in sources:
|
173
|
+
tables.append(f"{source.database}.{source.table_schema}.{source.name}")
|
174
|
+
return tables
|
175
|
+
|
176
|
+
|
177
|
+
def get_table_name(node: Union[ModelNode, SourceNode], node_type: str) -> str:
|
178
|
+
if node_type == "nodes":
|
179
|
+
return f"{node.database}.{node.table_schema}.{node.alias}"
|
180
|
+
return f"{node.database}.{node.table_schema}.{node.name}"
|
181
|
+
|
182
|
+
|
183
|
+
def fill_catalog(table_columns_map: Dict, manifest: Dict, catalog: Dict, nodes: List[Union[ModelNode, SourceNode]], node_type: str) -> Dict:
|
184
|
+
if not nodes:
|
185
|
+
catalog[node_type] = {}
|
186
|
+
return catalog
|
187
|
+
|
188
|
+
for node in nodes:
|
189
|
+
columns = {}
|
190
|
+
for column in table_columns_map[node.unique_id]:
|
191
|
+
column_type = get_column_type(column["dtype"])
|
192
|
+
columns[column["column"]] = {
|
193
|
+
"type": column_type,
|
194
|
+
"index": len(columns) + 1,
|
195
|
+
"name": column["column"],
|
196
|
+
"comment": None,
|
197
|
+
}
|
198
|
+
|
199
|
+
catalog[node_type] = {
|
200
|
+
node.unique_id: {
|
201
|
+
"metadata": {
|
202
|
+
"type": "BASE TABLE",
|
203
|
+
"schema": manifest[node_type][node.unique_id]["schema"],
|
204
|
+
"name": node.alias if node_type == "nodes" else node.name,
|
205
|
+
"database": manifest[node_type][node.unique_id]["database"],
|
206
|
+
"comment": None,
|
207
|
+
"owner": None,
|
208
|
+
},
|
209
|
+
"columns": columns,
|
210
|
+
"stats": {},
|
211
|
+
"unique_id": node.unique_id,
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
return catalog
|
216
|
+
|
217
|
+
|
218
|
+
def run_macro(macro: str, base_path: str) -> str:
|
219
|
+
dbt_compile = subprocess.run(
|
220
|
+
["dbt", "compile", "--inline", macro], # noqa
|
221
|
+
capture_output=True,
|
222
|
+
cwd=base_path,
|
223
|
+
text=True,
|
224
|
+
)
|
225
|
+
return dbt_compile.stdout
|
226
|
+
|
227
|
+
|
228
|
+
def generate_partial_manifest_catalog(changed_files, base_path: str = "./"):
|
229
|
+
try:
|
230
|
+
# print(f"Running generate_partial_manifest_catalog for {changed_files}")
|
231
|
+
yaml_files = [
|
232
|
+
f for f in changed_files if Path(f).suffix in [".yml", ".yaml"] and Path(f).name not in ["dbt_project.yml", "profiles.yml"]
|
233
|
+
]
|
234
|
+
model_stem = [Path(f).stem for f in changed_files if Path(f).suffix in [".sql"]]
|
235
|
+
# print(f"yaml_files: {yaml_files}")
|
236
|
+
# print(f"model_stem: {model_stem}")
|
237
|
+
model_set = set()
|
238
|
+
source_set = set()
|
239
|
+
|
240
|
+
for file in yaml_files:
|
241
|
+
parsed_file = load_config(file)
|
242
|
+
if "models" in parsed_file:
|
243
|
+
for model in parsed_file["models"]:
|
244
|
+
model_set.add(model.get("name", ""))
|
245
|
+
if "sources" in parsed_file:
|
246
|
+
for source in parsed_file["sources"]:
|
247
|
+
source_set.add(source.get("name", ""))
|
248
|
+
|
249
|
+
for model in model_stem:
|
250
|
+
model_set.add(model)
|
251
|
+
|
252
|
+
models = list(model_set)
|
253
|
+
source_list = list(source_set)
|
254
|
+
|
255
|
+
# print(f"models: {models}")
|
256
|
+
# print(f"sources: {source_list}")
|
257
|
+
subprocess.run(["dbt", "parse"], cwd=base_path, stdout=subprocess.PIPE) # noqa
|
258
|
+
|
259
|
+
manifest_file = Path(Path(base_path) / "target/manifest.json")
|
260
|
+
with manifest_file.open() as f:
|
261
|
+
manifest = json.load(f)
|
262
|
+
|
263
|
+
nodes = get_manifest_model_nodes(manifest, models)
|
264
|
+
sources = get_manifest_source_nodes(manifest, source_list)
|
265
|
+
|
266
|
+
nodes_data = [{"name": node.name, "resource_type": node.resource_type, "unique_id": node.unique_id, "table": ""} for node in nodes]
|
267
|
+
|
268
|
+
sources_data = [
|
269
|
+
{"name": source.name, "resource_type": source.resource_type, "unique_id": source.unique_id, "table": source.table}
|
270
|
+
for source in sources
|
271
|
+
]
|
272
|
+
|
273
|
+
nodes_str = ",\n".join(json.dumps(data) for data in nodes_data + sources_data)
|
274
|
+
|
275
|
+
query = (
|
276
|
+
"{% set result = {} %}{% set nodes = ["
|
277
|
+
+ nodes_str
|
278
|
+
+ '] %}{% for n in nodes %}{% if n["resource_type"] == "source" %}{% set columns = adapter.get_columns_in_relation(source(n["name"], n["table"])) %}{% else %}{% set columns = adapter.get_columns_in_relation(ref(n["name"])) %}{% endif %}{% set new_columns = [] %}{% for column in columns %}{% do new_columns.append({"column": column.name, "dtype": column.dtype}) %}{% endfor %}{% do result.update({n["unique_id"]:new_columns}) %}{% endfor %}{{ tojson(result) }}'
|
279
|
+
)
|
280
|
+
|
281
|
+
dbt_compile_output = run_macro(query, base_path)
|
282
|
+
|
283
|
+
# print(dbt_compile_output)
|
284
|
+
|
285
|
+
compiled_inline_node = dbt_compile_output.split("Compiled inline node is:")[1].strip().replace("'", "").strip()
|
286
|
+
|
287
|
+
table_columns_map = json.loads(compiled_inline_node)
|
288
|
+
|
289
|
+
# we need to get all columns from compiled_dict which is a list of dictionaries
|
290
|
+
# and each item in the list is a dictionary with keys table, name, type
|
291
|
+
# we need to create a map of all the columns for each table
|
292
|
+
# and then create a catalog for each table
|
293
|
+
|
294
|
+
catalog = {
|
295
|
+
"metadata": {
|
296
|
+
"dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json",
|
297
|
+
"dbt_version": "1.7.2",
|
298
|
+
"generated_at": "2024-03-04T11:13:52.284167Z",
|
299
|
+
"invocation_id": "e2970ef7-c397-404b-ac5d-63a71a45b628",
|
300
|
+
"env": {},
|
301
|
+
},
|
302
|
+
"errors": None,
|
303
|
+
}
|
304
|
+
|
305
|
+
catalog = fill_catalog(table_columns_map, manifest, catalog, nodes, "nodes")
|
306
|
+
catalog = fill_catalog(table_columns_map, manifest, catalog, sources, "sources")
|
307
|
+
|
308
|
+
selected_models = [node.unique_id for node in nodes + sources]
|
309
|
+
return selected_models, parse_manifest(manifest), parse_catalog(catalog)
|
310
|
+
except Exception as e:
|
311
|
+
raise Exception("Unable to generate partial manifest and catalog") from e
|
312
|
+
|
313
|
+
|
314
|
+
if __name__ == "__main__":
|
315
|
+
print("Running main")
|
316
|
+
print(generate_partial_manifest_catalog([], "/Users/gaurp/Desktop/manifest.json", ""))
|
317
|
+
print("Done running main")
|