mdbt 0.4.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdbt/cmdline.py ADDED
@@ -0,0 +1,368 @@
1
+ import re
2
+
3
+ import click
4
+
5
+ from mdbt.build_dbt_docs_ai import BuildDBTDocs
6
+ from mdbt.build_unit_test_data_ai import BuildUnitTestDataAI
7
+ from mdbt.expectations_output_builder import ExpectationsOutputBuilder
8
+ from mdbt.lightdash import Lightdash
9
+ from mdbt.main import MDBT
10
+ from mdbt.precommit_format import PrecommitFormat
11
+ from mdbt.recce import Recce
12
+ from mdbt.sort_yaml_fields import SortYAML
13
+ from mdbt.sql_sorter import ColumnSorter
14
+
15
+ mdbt_class = MDBT()
16
+
17
+
18
+ # Create a Click group
19
+ class CustomCmdLoader(click.Group):
20
+
21
+ def get_command(self, ctx, cmd_name):
22
+ ctx.ensure_object(dict)
23
+
24
+ # Match commands ending with + optionally followed by a number, such as 'sbuild+' or 'sbuild+3'
25
+ suffix_match = re.match(r"(.+)\+(\d*)$", cmd_name)
26
+ if suffix_match:
27
+ cmd_name, count = suffix_match.groups()
28
+ ctx.obj["build_children"] = True
29
+ ctx.obj["build_children_count"] = (
30
+ int(count) if count else None
31
+ ) # Default to 1 if no number is specified
32
+
33
+ # Match commands starting with a number followed by +, such as '3+sbuild'
34
+ prefix_match = re.match(r"(\d+)\+(.+)", cmd_name)
35
+ if prefix_match:
36
+ count, cmd_name = prefix_match.groups()
37
+ ctx.obj["build_parents"] = True
38
+ ctx.obj["build_parents_count"] = (
39
+ int(count) if count else None
40
+ ) # Default to 1 if no number is specified
41
+
42
+ return click.Group.get_command(self, ctx, cmd_name)
43
+
44
+ def list_commands(self, ctx):
45
+ # List of all commands
46
+ return [
47
+ "help",
48
+ "build",
49
+ "trun",
50
+ "run",
51
+ "test",
52
+ "compile",
53
+ "clip-compile",
54
+ "unittest",
55
+ "sbuild",
56
+ "pbuild",
57
+ "gbuild",
58
+ "build-docs",
59
+ "build-unit",
60
+ "ld-preview",
61
+ "clean-stg",
62
+ "pre-commit",
63
+ "sort-yaml",
64
+ "sort-sql", # Sort SQL from clipboard
65
+ "recce",
66
+ "exp",
67
+ "format",
68
+ ]
69
+
70
+
71
+ mdbt = CustomCmdLoader()
72
+
73
+
74
+ @mdbt.command()
75
+ @click.option(
76
+ "--full-refresh", "-f", is_flag=True, help="Run a full refresh on all models."
77
+ )
78
+ @click.option("--select", "-s", type=str, help="DBT style select string")
79
+ @click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
80
+ @click.option(
81
+ "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
82
+ )
83
+ @click.pass_context
84
+ def build(ctx, full_refresh, select, fail_fast, threads):
85
+ """Execute a DBT build command passthrough."""
86
+ mdbt_class.build(ctx, full_refresh, select, fail_fast, threads)
87
+
88
+
89
+ @mdbt.command()
90
+ @click.option(
91
+ "--full-refresh", "-f", is_flag=True, help="Run a full refresh on all models."
92
+ )
93
+ @click.option("--select", "-s", type=str, help="DBT style select string")
94
+ @click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
95
+ @click.option(
96
+ "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
97
+ )
98
+ @click.pass_context
99
+ def trun(ctx, full_refresh, select, fail_fast, threads):
100
+ """Execute a DBT run, then test command."""
101
+ mdbt_class.trun(ctx, full_refresh, select, fail_fast, threads)
102
+
103
+
104
+ @mdbt.command()
105
+ @click.option(
106
+ "--full-refresh", "-f", is_flag=True, help="Run a full refresh on all models."
107
+ )
108
+ @click.option("--select", "-s", type=str, help="DBT style select string")
109
+ @click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
110
+ @click.option(
111
+ "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
112
+ )
113
+ @click.pass_context
114
+ def run(ctx, full_refresh, select, fail_fast, threads):
115
+ """Pass through to DBT run command."""
116
+ mdbt_class.run(ctx, full_refresh, select, fail_fast, threads)
117
+
118
+
119
+ @mdbt.command()
120
+ @click.option("--select", "-s", type=str, help="DBT style select string")
121
+ @click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
122
+ @click.option(
123
+ "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
124
+ )
125
+ @click.pass_context
126
+ def test(ctx, select, fail_fast, threads):
127
+ """Pass through to DBT test command."""
128
+ mdbt_class.test(ctx, select, fail_fast, threads)
129
+
130
+
131
+ @mdbt.command()
132
+ @click.option("--select", "-s", type=str, help="DBT style select string")
133
+ @click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
134
+ @click.pass_context
135
+ def unittest(ctx, select, fail_fast):
136
+ """Run unit tests on models."""
137
+ mdbt_class.unittest(ctx, select, fail_fast)
138
+
139
+
140
+ @mdbt.command()
141
+ @click.option("--select", "-s", type=str, help="Name of the model(s) to compile.")
142
+ @click.pass_context
143
+ def compile(ctx, select):
144
+ """Pass through to DBT compile."""
145
+ mdbt_class.compile(ctx, select)
146
+
147
+
148
+ @mdbt.command()
149
+ @click.option(
150
+ "--select",
151
+ "-s",
152
+ type=str,
153
+ help="Name of the model to compile. Recommend only running one.",
154
+ )
155
+ @click.pass_context
156
+ def clip_compile(ctx, select):
157
+ """Pass through to DBT compile."""
158
+ mdbt_class.clip_compile(ctx, select)
159
+
160
+
161
+ @mdbt.command()
162
+ @click.pass_context
163
+ def recce(ctx):
164
+ """Run a recce of the current state of the project."""
165
+ Recce().recce(ctx)
166
+
167
+
168
+ @mdbt.command()
169
+ @click.option(
170
+ "--full-refresh",
171
+ "-f",
172
+ is_flag=True,
173
+ help="Force a full refresh on all models in build scope.",
174
+ )
175
+ @click.option(
176
+ "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
177
+ )
178
+ @click.pass_context
179
+ def sbuild(ctx, full_refresh, threads):
180
+ """Build models based on changes in current state since last build."""
181
+ mdbt_class.sbuild(ctx, full_refresh, threads)
182
+
183
+
184
+ @mdbt.command()
185
+ @click.option(
186
+ "--full-refresh",
187
+ "-f",
188
+ is_flag=True,
189
+ help="Force a full refresh on all models in build scope.",
190
+ )
191
+ @click.option(
192
+ "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
193
+ )
194
+ @click.option(
195
+ "--skip-dl",
196
+ "--sd",
197
+ is_flag=True,
198
+ help="Skip downloading the manifest file from Snowflake. Use the one that was already downloaded.",
199
+ )
200
+ @click.pass_context
201
+ def pbuild(ctx, full_refresh, threads, skip_dl):
202
+ """Build models based on changes from production to current branch."""
203
+ mdbt_class.pbuild(ctx, full_refresh, threads, skip_dl)
204
+
205
+
206
+ @mdbt.command()
207
+ @click.option(
208
+ "--main",
209
+ "-m",
210
+ is_flag=True,
211
+ help="Build all models vs diff to the main branch. Make sure to pull main so it"
212
+ "s up-to-date.",
213
+ )
214
+ @click.option(
215
+ "--full-refresh",
216
+ "-f",
217
+ is_flag=True,
218
+ help="Force a full refresh on all models in build scope.",
219
+ )
220
+ @click.option(
221
+ "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
222
+ )
223
+ @click.pass_context
224
+ def gbuild(ctx, main, full_refresh, threads):
225
+ """Build models based on Git changes from production to current branch."""
226
+ mdbt_class.gbuild(ctx, main, full_refresh, threads)
227
+
228
+
229
+ @mdbt.command()
230
+ @click.option(
231
+ "--select",
232
+ "-s",
233
+ type=str,
234
+ required=True,
235
+ help="Name of the model to build unit test data for.",
236
+ )
237
+ @click.option(
238
+ "--sys_context",
239
+ type=str,
240
+ help="Add helpful info so the AI understs the context of the model it's documenting. ",
241
+ )
242
+ @click.option(
243
+ "--is_new",
244
+ "-n",
245
+ is_flag=True,
246
+ help="Passing this flag will bypass the questions such as 'is this a new model,' and 'add to git"
247
+ )
248
+ @click.pass_context
249
+ def build_docs(ctx, select, sys_context, is_new):
250
+ """Build dbt YML model docs for a model. This command will sample the database."""
251
+ dbt_docs = BuildDBTDocs()
252
+ dbt_docs.main(select, sys_context, is_new)
253
+
254
+ @mdbt.command()
255
+ @click.option(
256
+ "--select",
257
+ "-s",
258
+ type=str,
259
+ required=True,
260
+ help="Name of the model to build unit test data for.",
261
+ )
262
+ @click.pass_context
263
+ def build_unit(ctx, select):
264
+ """Build unit test mock and expect data for a model. This command will sample the database."""
265
+ build_unit_test_data = BuildUnitTestDataAI()
266
+ build_unit_test_data.main(select)
267
+
268
+
269
+ @mdbt.command()
270
+ @click.option(
271
+ "--select",
272
+ "-s",
273
+ type=str,
274
+ help="Name of the model to start a lightdash preview for. If not provided, all models will be previewed.",
275
+ )
276
+ @click.option(
277
+ "--name",
278
+ "-n",
279
+ type=str,
280
+ help="Name of the lightdash preview. If no name given, the preview will take the name of the current branch.",
281
+ )
282
+ @click.option(
283
+ "--l43",
284
+ is_flag=True,
285
+ help="Include L3 and L4 models in the preview. Default is False.",
286
+ )
287
+ @click.pass_context
288
+ def ld_preview(ctx, select, name, l43):
289
+ """Start a lightdash preview for a model."""
290
+ preview_name = name
291
+ Lightdash().lightdash_start_preview(ctx, select, preview_name, l43)
292
+
293
+
294
+ @mdbt.command()
295
+ @click.option("--select", "-s", type=str, help="Names of the model(s) to clean.")
296
+ @click.option(
297
+ "--split-names", is_flag=True, help="Split names like isupdated into is_updated."
298
+ )
299
+ @click.option(
300
+ "--remove-airbyte",
301
+ is_flag=True,
302
+ help="Whether to remove Airbyte specific lines. Default is True.",
303
+ )
304
+ @click.option(
305
+ "--overwrite",
306
+ is_flag=True,
307
+ help="Will overwrite the files. If not set, files will be saved to a folder.",
308
+ )
309
+ @click.pass_context
310
+ def clean_stg(select, split_names, remove_airbyte, overwrite):
311
+ """Designed to clean files in the L1_stg folders only"""
312
+ sql_model_cleaner = SQLModelCleaner()
313
+ sql_model_cleaner.main(select, split_names, remove_airbyte, overwrite)
314
+
315
+
316
+ @mdbt.command()
317
+ @click.option("--select", "-s", type=str, help="Name of model to sort YML columns for.")
318
+ @click.option("--all-files", is_flag=True, help="Sort all YML files in the project.")
319
+ @click.option("--overwrite", is_flag=True, help="Overwrite the existing YML file.")
320
+ def sort_yaml(select, all_files, overwrite):
321
+ sy = SortYAML()
322
+ sy.main(select, all_files, overwrite)
323
+
324
+ @mdbt.command()
325
+ def sort_sql():
326
+ c = ColumnSorter()
327
+ c.main()
328
+
329
+ @mdbt.command()
330
+ @click.pass_context
331
+ def pre_commit(ctx):
332
+ """Run pre-commit hooks."""
333
+ PrecommitFormat().pre_commit(ctx)
334
+
335
+
336
+ @mdbt.command()
337
+ @click.option(
338
+ "--select",
339
+ "-s",
340
+ type=str,
341
+ help="Name of the model(s) to format. Takes precidence over --all and --main.",
342
+ )
343
+ @click.option("--all", "-a", is_flag=True, help="Format all models.")
344
+ @click.option(
345
+ "--main",
346
+ "-m",
347
+ is_flag=True,
348
+ help="Format all models vs diff to the main branch. Make sure to pull main so it"
349
+ "s up-to-date.",
350
+ )
351
+ @click.pass_context
352
+ def format(ctx, select, all, main):
353
+ """Format models using sqlfluff."""
354
+ PrecommitFormat().format(ctx, select, all, main)
355
+
356
+
357
+ @mdbt.command()
358
+ @click.option(
359
+ "--select",
360
+ "-s",
361
+ type=str,
362
+ help="Name of the model(s) to format. Takes precidence over --all and --main.",
363
+ )
364
+ @click.pass_context
365
+ def exp(ctx, select):
366
+ """Build expectations for models."""
367
+ expectations_output_builder = ExpectationsOutputBuilder()
368
+ expectations_output_builder.main(select)
mdbt/core.py ADDED
@@ -0,0 +1,113 @@
1
+ import json
2
+ import os
3
+ import re
4
+ import subprocess
5
+ import sys
6
+ import typing as t
7
+
8
+ import snowflake.connector as snow
9
+ from dotenv import find_dotenv
10
+ from dotenv import load_dotenv
11
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
12
+ load_dotenv(find_dotenv("../.env"))
13
+ load_dotenv(find_dotenv(".env"))
14
+
15
+
16
+ class Core:
17
+
18
+ def __init__(self, test_mode=False):
19
+ self._conn = None
20
+ self._cur = None
21
+ self._create_snowflake_connection()
22
+ self.test_mode = test_mode
23
+ self.dbt_ls_test_mode_output = None
24
+ self.dbt_test_mode_command_check_value = None
25
+ self.exclude_seed_snapshot = "resource_type:snapshot resource_type:seed"
26
+
27
+ self.dbt_execute_command_output = ""
28
+
29
+ def _create_snowflake_connection(self):
30
+ if not os.environ.get("SNOWFLAKE_MAIN_ACCOUNT"):
31
+ raise ValueError(
32
+ "SNOWFLAKE_MAIN_ACCOUNT environment variable is not set"
33
+ )
34
+ self._conn = snow.connect(
35
+ account=os.environ.get("SNOWFLAKE_MAIN_ACCOUNT"),
36
+ password=os.environ.get("SNOWFLAKE_MAIN_PASSWORD"),
37
+ schema=os.environ.get("SNOWFLAKE_MAIN_SCHEMA"),
38
+ user=os.environ.get("SNOWFLAKE_MAIN_USER"),
39
+ warehouse=os.environ.get("SNOWFLAKE_MAIN_WAREHOUSE"),
40
+ database=os.environ.get("SNOWFLAKE_MAIN_DATABASE"),
41
+ role=os.environ.get("SNOWFLAKE_MAIN_ROLE"),
42
+ )
43
+
44
+ self._cur = self._conn.cursor()
45
+
46
+ def dbt_ls_to_json(self, args):
47
+ cmd = ["dbt", "ls", "--output", "json"]
48
+ cmd = cmd + args
49
+ try:
50
+ if self.test_mode:
51
+ output = self.dbt_ls_test_mode_output
52
+ else:
53
+ output = subprocess.run(
54
+ cmd, check=True, text=True, capture_output=True
55
+ ).stdout
56
+ except subprocess.CalledProcessError as e:
57
+ print(e.stderr)
58
+ print(e.stdout)
59
+ print(" ".join(cmd))
60
+ sys.exit(e.returncode)
61
+ # The results come back with a few header lines that need to be removed, then a series of JSON string with a
62
+ # format like: {"name": "active_patient_metrics", "resource_type": "model", "config":
63
+ # {"materialized": "incremental"}} RE removes the header stuff and finds the json lines.
64
+ json_lines = re.findall(r"^{.*$", output, re.MULTILINE)
65
+ # Split lines and filter to get only JSON strings
66
+ models_json = [json.loads(line) for line in json_lines]
67
+ return models_json
68
+
69
+ @staticmethod
70
+ def execute_dbt_command_capture(command: str, args: t.List[str]) -> str:
71
+ """
72
+ Executes a DBT command and captures the output without streaming to the stdout.
73
+ Args:
74
+ command: The DBT command to run.
75
+ args: A list of args to pass into the command.
76
+
77
+ Returns:
78
+ A string containing the results of the command.
79
+ """
80
+ cmd = ["dbt", command] + args
81
+ try:
82
+ output = subprocess.run(
83
+ cmd, check=True, text=True, capture_output=True
84
+ ).stdout
85
+ except subprocess.CalledProcessError as e:
86
+ print(f'Failure while running command: {" ".join(cmd)}')
87
+ print(e.stderr)
88
+ print(e.stdout)
89
+ sys.exit(e.returncode)
90
+ return output
91
+
92
+ def get_file_path(self, model_name):
93
+ # This will get the path of the model. note, that unit tests show up as models, so must be excluded via the folder.
94
+ #
95
+ args = [
96
+ "--select",
97
+ model_name,
98
+ "--exclude",
99
+ "path:tests/* resource_type:test",
100
+ "--output-keys",
101
+ "original_file_path",
102
+ ]
103
+ model_ls_json = self.dbt_ls_to_json(args)
104
+ file_path = model_ls_json[0]["original_file_path"]
105
+ return file_path
106
+
107
+ @staticmethod
108
+ def handle_cmd_line_error(e):
109
+ print(f'Failure while running command: {" ".join(e.cmd)}')
110
+ print(e.stderr)
111
+ print(e.stdout)
112
+ raise Exception(f"Failure while running command: {' '.join(e.cmd)}")
113
+ # sys.exit(e.returncode)
@@ -0,0 +1,74 @@
1
+ import os
2
+
3
+ import yaml
4
+
5
+ from mdbt.core import Core
6
+
7
+
8
+ class ExpectationsOutputBuilder(Core):
9
+
10
+ def __init__(self, test_mode=False):
11
+ super().__init__(test_mode=test_mode)
12
+
13
+ def main(self, select):
14
+ args = ["--output-keys", "name resource_type original_file_path"]
15
+ if select:
16
+ args += ["--select", select]
17
+ model_data = self.dbt_ls_to_json(args)
18
+ for model in model_data:
19
+ if model.get("resource_type") == "model":
20
+ yaml_file_path = model.get("original_file_path")[:-4] + ".yml"
21
+ database = os.environ.get("DEV_DATABASE")
22
+ schema = os.environ.get("DEV_SCHEMA")
23
+ model_name = model.get("name")
24
+ self.process_yaml(yaml_file_path, database, schema, model_name)
25
+
26
+ def process_yaml(self, yaml_file_path, database, schema, model_name):
27
+ with open(yaml_file_path, "r") as f:
28
+ yaml_content = yaml.safe_load(f)
29
+
30
+ model = yaml_content.get("models", [])[0]
31
+ columns = model.get("columns", [])
32
+ print(f"*********\nStarting model: {model_name}\n*********")
33
+ for column in columns:
34
+ column_name = column.get("name")
35
+ data_tests = column.get("data_tests", [])
36
+
37
+ for data_test in data_tests:
38
+ if isinstance(data_test, dict):
39
+ for expectation_name, expectation_params in data_test.items():
40
+ # fmt: off
41
+ expectation_pattern = "dbt_expectations.expect_column_sum_to_be_between"
42
+ # fmt: on
43
+ if expectation_name == expectation_pattern:
44
+ min_value = expectation_params.get("min_value")
45
+ max_value = expectation_params.get("max_value")
46
+ row_condition = expectation_params.get("row_condition", "")
47
+
48
+ # Build SQL query
49
+ sql = f"""
50
+ SELECT SUM({column_name}) AS current_value
51
+ , {min_value} AS expected_lower
52
+ , {max_value} AS expected_higher
53
+ , iff(current_value between expected_lower and expected_higher, '\033[92m Pass\033[0m', '\033[91m Fail\033[0m') AS result
54
+ FROM {database}.{schema}.{model_name}
55
+ """
56
+
57
+ if row_condition:
58
+ sql += f" WHERE {row_condition}"
59
+
60
+ # Execute the query
61
+ self._cur.execute(sql)
62
+ results_df = self._cur.fetch_pandas_all()
63
+
64
+ # Print the results
65
+ print(f"Model: {model_name}")
66
+ print(f"Column: {column_name}")
67
+ print(f"Condition: {row_condition}")
68
+ print(results_df.to_string(index=False))
69
+ print("\n")
70
+
71
+
72
+ if __name__ == "__main__":
73
+ builder = ExpectationsOutputBuilder()
74
+ builder.main(select="appointment_revenue_mrpv_metrics")
mdbt/lightdash.py ADDED
@@ -0,0 +1,84 @@
1
+ import json
2
+ import os
3
+ import subprocess
4
+ import sys
5
+
6
+ from click.core import Context
7
+
8
+ from mdbt.core import Core
9
+
10
+
11
+ class Lightdash(Core):
12
+
13
+ def __init__(self, test_mode=False):
14
+ super().__init__(test_mode=test_mode)
15
+
16
+ def lightdash_start_preview(
17
+ self, ctx: Context, select: str, preview_name: str, l43: bool
18
+ ):
19
+ # Check to make sure the LIGHTDASH_PROJECT env variable is set
20
+ if not os.getenv("LIGHTDASH_PROJECT"):
21
+ print(
22
+ "LIGHTDASH_PROJECT environment variable not set. Set this key to the ID of the project you will "
23
+ "promote charts to."
24
+ )
25
+ sys.exit(1)
26
+ else:
27
+ print(f"Building for LIGHTDASH_PROJECT: {os.getenv('LIGHTDASH_PROJECT')}")
28
+
29
+ self._check_lightdash_for_updates()
30
+ if not preview_name:
31
+ # If no preview name, use the current name of the git branch
32
+ result = subprocess.run(
33
+ ["git", "branch", "--show-current"], stdout=subprocess.PIPE, text=True
34
+ )
35
+ preview_name = result.stdout.strip()
36
+
37
+ args = ["lightdash", "start-preview", "--name", preview_name]
38
+
39
+ if l43:
40
+ args = args + ["-s", "tag:l3 tag:l4"]
41
+
42
+ if select:
43
+ args = args + ["--select", select]
44
+
45
+ try:
46
+ print(f'Running command: {" ".join(args)}')
47
+ subprocess.run(args, check=True)
48
+ except subprocess.CalledProcessError as e:
49
+ self.handle_cmd_line_error(e)
50
+
51
+ @staticmethod
52
+ def _check_lightdash_for_updates():
53
+ api_str = 'curl -s "https://app.lightdash.cloud/api/v1/health"'
54
+
55
+ try:
56
+ result = subprocess.run(
57
+ api_str, shell=True, check=True, text=True, capture_output=True
58
+ )
59
+ # Convert to JSON
60
+ result_json = json.loads(result.stdout)
61
+ except subprocess.CalledProcessError as e:
62
+ print(f"Failure while running command: {api_str}")
63
+ print(e.stderr)
64
+ print(e.stdout)
65
+ sys.exit(e.returncode)
66
+
67
+ api_version = result_json["results"]["version"]
68
+
69
+ result = subprocess.run(
70
+ ["lightdash", "--version"], check=True, text=True, capture_output=True
71
+ )
72
+
73
+ current_version = result.stdout.strip()
74
+
75
+ if api_version != current_version:
76
+ print(
77
+ f"API version {api_version} does not match current version {current_version}. Upgrading."
78
+ )
79
+ args = ["npm", "install", "-g", f"@lightdash/cli@{api_version}"]
80
+ subprocess.run(args, check=True)
81
+ else:
82
+ print(
83
+ f"API version {api_version} matches current version {current_version}."
84
+ )