PyPI - mdbt - Versions diffs - 0.4.27__py3-none-any.whl - Mend

mdbt 0.4.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

mdbt/__init__.py +0 -0
mdbt/ai_core.py +116 -0
mdbt/build_dbt_docs_ai.py +147 -0
mdbt/build_unit_test_data_ai.py +129 -0
mdbt/cmdline.py +368 -0
mdbt/core.py +113 -0
mdbt/expectations_output_builder.py +74 -0
mdbt/lightdash.py +84 -0
mdbt/main.py +474 -0
mdbt/precommit_format.py +84 -0
mdbt/prompts.py +244 -0
mdbt/recce.py +66 -0
mdbt/sort_yaml_fields.py +148 -0
mdbt/sql_sorter.py +165 -0
mdbt-0.4.27.dist-info/METADATA +28 -0
mdbt-0.4.27.dist-info/RECORD +20 -0
mdbt-0.4.27.dist-info/WHEEL +5 -0
mdbt-0.4.27.dist-info/entry_points.txt +2 -0
mdbt-0.4.27.dist-info/licenses/LICENSE +21 -0
mdbt-0.4.27.dist-info/top_level.txt +1 -0

mdbt/cmdline.py ADDED Viewed

@@ -0,0 +1,368 @@
+import re
+import click
+from mdbt.build_dbt_docs_ai import BuildDBTDocs
+from mdbt.build_unit_test_data_ai import BuildUnitTestDataAI
+from mdbt.expectations_output_builder import ExpectationsOutputBuilder
+from mdbt.lightdash import Lightdash
+from mdbt.main import MDBT
+from mdbt.precommit_format import PrecommitFormat
+from mdbt.recce import Recce
+from mdbt.sort_yaml_fields import SortYAML
+from mdbt.sql_sorter import ColumnSorter
+mdbt_class = MDBT()
+# Create a Click group
+class CustomCmdLoader(click.Group):
+    def get_command(self, ctx, cmd_name):
+        ctx.ensure_object(dict)
+        # Match commands ending with + optionally followed by a number, such as 'sbuild+' or 'sbuild+3'
+        suffix_match = re.match(r"(.+)\+(\d*)$", cmd_name)
+        if suffix_match:
+            cmd_name, count = suffix_match.groups()
+            ctx.obj["build_children"] = True
+            ctx.obj["build_children_count"] = (
+                int(count) if count else None
+            )  # Default to 1 if no number is specified
+        # Match commands starting with a number followed by +, such as '3+sbuild'
+        prefix_match = re.match(r"(\d+)\+(.+)", cmd_name)
+        if prefix_match:
+            count, cmd_name = prefix_match.groups()
+            ctx.obj["build_parents"] = True
+            ctx.obj["build_parents_count"] = (
+                int(count) if count else None
+            )  # Default to 1 if no number is specified
+        return click.Group.get_command(self, ctx, cmd_name)
+    def list_commands(self, ctx):
+        # List of all commands
+        return [
+            "help",
+            "build",
+            "trun",
+            "run",
+            "test",
+            "compile",
+            "clip-compile",
+            "unittest",
+            "sbuild",
+            "pbuild",
+            "gbuild",
+            "build-docs",
+            "build-unit",
+            "ld-preview",
+            "clean-stg",
+            "pre-commit",
+            "sort-yaml",
+            "sort-sql",  # Sort SQL from clipboard
+            "recce",
+            "exp",
+            "format",
+        ]
+mdbt = CustomCmdLoader()
+@mdbt.command()
+@click.option(
+    "--full-refresh", "-f", is_flag=True, help="Run a full refresh on all models."
+)
+@click.option("--select", "-s", type=str, help="DBT style select string")
+@click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
+@click.option(
+    "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
+)
+@click.pass_context
+def build(ctx, full_refresh, select, fail_fast, threads):
+    """Execute a DBT build command passthrough."""
+    mdbt_class.build(ctx, full_refresh, select, fail_fast, threads)
+@mdbt.command()
+@click.option(
+    "--full-refresh", "-f", is_flag=True, help="Run a full refresh on all models."
+)
+@click.option("--select", "-s", type=str, help="DBT style select string")
+@click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
+@click.option(
+    "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
+)
+@click.pass_context
+def trun(ctx, full_refresh, select, fail_fast, threads):
+    """Execute a DBT run, then test command."""
+    mdbt_class.trun(ctx, full_refresh, select, fail_fast, threads)
+@mdbt.command()
+@click.option(
+    "--full-refresh", "-f", is_flag=True, help="Run a full refresh on all models."
+)
+@click.option("--select", "-s", type=str, help="DBT style select string")
+@click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
+@click.option(
+    "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
+)
+@click.pass_context
+def run(ctx, full_refresh, select, fail_fast, threads):
+    """Pass through to DBT run command."""
+    mdbt_class.run(ctx, full_refresh, select, fail_fast, threads)
+@mdbt.command()
+@click.option("--select", "-s", type=str, help="DBT style select string")
+@click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
+@click.option(
+    "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
+)
+@click.pass_context
+def test(ctx, select, fail_fast, threads):
+    """Pass through to DBT test command."""
+    mdbt_class.test(ctx, select, fail_fast, threads)
+@mdbt.command()
+@click.option("--select", "-s", type=str, help="DBT style select string")
+@click.option("--fail-fast", is_flag=True, help="Fail fast on errors.")
+@click.pass_context
+def unittest(ctx, select, fail_fast):
+    """Run unit tests on models."""
+    mdbt_class.unittest(ctx, select, fail_fast)
+@mdbt.command()
+@click.option("--select", "-s", type=str, help="Name of the model(s) to compile.")
+@click.pass_context
+def compile(ctx, select):
+    """Pass through to DBT compile."""
+    mdbt_class.compile(ctx, select)
+@mdbt.command()
+@click.option(
+    "--select",
+    "-s",
+    type=str,
+    help="Name of the model to compile. Recommend only running one.",
+)
+@click.pass_context
+def clip_compile(ctx, select):
+    """Pass through to DBT compile."""
+    mdbt_class.clip_compile(ctx, select)
+@mdbt.command()
+@click.pass_context
+def recce(ctx):
+    """Run a recce of the current state of the project."""
+    Recce().recce(ctx)
+@mdbt.command()
+@click.option(
+    "--full-refresh",
+    "-f",
+    is_flag=True,
+    help="Force a full refresh on all models in build scope.",
+)
+@click.option(
+    "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
+)
+@click.pass_context
+def sbuild(ctx, full_refresh, threads):
+    """Build models based on changes in current state since last build."""
+    mdbt_class.sbuild(ctx, full_refresh, threads)
+@mdbt.command()
+@click.option(
+    "--full-refresh",
+    "-f",
+    is_flag=True,
+    help="Force a full refresh on all models in build scope.",
+)
+@click.option(
+    "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
+)
+@click.option(
+    "--skip-dl",
+    "--sd",
+    is_flag=True,
+    help="Skip downloading the manifest file from Snowflake. Use the one that was already downloaded.",
+)
+@click.pass_context
+def pbuild(ctx, full_refresh, threads, skip_dl):
+    """Build models based on changes from production to current branch."""
+    mdbt_class.pbuild(ctx, full_refresh, threads, skip_dl)
+@mdbt.command()
+@click.option(
+    "--main",
+    "-m",
+    is_flag=True,
+    help="Build all models vs diff to the main branch. Make sure to pull main so it"
+    "s up-to-date.",
+)
+@click.option(
+    "--full-refresh",
+    "-f",
+    is_flag=True,
+    help="Force a full refresh on all models in build scope.",
+)
+@click.option(
+    "--threads", "-t", type=int, help="Number of threads to use during DBT operations."
+)
+@click.pass_context
+def gbuild(ctx, main, full_refresh, threads):
+    """Build models based on Git changes from production to current branch."""
+    mdbt_class.gbuild(ctx, main, full_refresh, threads)
+@mdbt.command()
+@click.option(
+    "--select",
+    "-s",
+    type=str,
+    required=True,
+    help="Name of the model to build unit test data for.",
+)
+@click.option(
+    "--sys_context",
+    type=str,
+    help="Add helpful info so the AI understs the context of the model it's documenting. ",
+)
+@click.option(
+    "--is_new",
+    "-n",
+    is_flag=True,
+    help="Passing this flag will bypass the questions such as 'is this a new model,' and 'add to git"
+)
+@click.pass_context
+def build_docs(ctx, select, sys_context, is_new):
+    """Build dbt YML model docs for a model. This command will sample the database."""
+    dbt_docs = BuildDBTDocs()
+    dbt_docs.main(select, sys_context, is_new)
+@mdbt.command()
+@click.option(
+    "--select",
+    "-s",
+    type=str,
+    required=True,
+    help="Name of the model to build unit test data for.",
+)
+@click.pass_context
+def build_unit(ctx, select):
+    """Build unit test mock and expect data for a model. This command will sample the database."""
+    build_unit_test_data = BuildUnitTestDataAI()
+    build_unit_test_data.main(select)
+@mdbt.command()
+@click.option(
+    "--select",
+    "-s",
+    type=str,
+    help="Name of the model to start a lightdash preview for. If not provided, all models will be previewed.",
+)
+@click.option(
+    "--name",
+    "-n",
+    type=str,
+    help="Name of the lightdash preview. If no name given, the preview will take the name of the current branch.",
+)
+@click.option(
+    "--l43",
+    is_flag=True,
+    help="Include L3 and L4 models in the preview. Default is False.",
+)
+@click.pass_context
+def ld_preview(ctx, select, name, l43):
+    """Start a lightdash preview for a model."""
+    preview_name = name
+    Lightdash().lightdash_start_preview(ctx, select, preview_name, l43)
+@mdbt.command()
+@click.option("--select", "-s", type=str, help="Names of the model(s) to clean.")
+@click.option(
+    "--split-names", is_flag=True, help="Split names like isupdated into is_updated."
+)
+@click.option(
+    "--remove-airbyte",
+    is_flag=True,
+    help="Whether to remove Airbyte specific lines. Default is True.",
+)
+@click.option(
+    "--overwrite",
+    is_flag=True,
+    help="Will overwrite the files. If not set, files will be saved to a folder.",
+)
+@click.pass_context
+def clean_stg(select, split_names, remove_airbyte, overwrite):
+    """Designed to clean files in the L1_stg folders only"""
+    sql_model_cleaner = SQLModelCleaner()
+    sql_model_cleaner.main(select, split_names, remove_airbyte, overwrite)
+@mdbt.command()
+@click.option("--select", "-s", type=str, help="Name of model to sort YML columns for.")
+@click.option("--all-files", is_flag=True, help="Sort all YML files in the project.")
+@click.option("--overwrite", is_flag=True, help="Overwrite the existing YML file.")
+def sort_yaml(select, all_files, overwrite):
+    sy = SortYAML()
+    sy.main(select, all_files, overwrite)
+@mdbt.command()
+def sort_sql():
+    c = ColumnSorter()
+    c.main()
+@mdbt.command()
+@click.pass_context
+def pre_commit(ctx):
+    """Run pre-commit hooks."""
+    PrecommitFormat().pre_commit(ctx)
+@mdbt.command()
+@click.option(
+    "--select",
+    "-s",
+    type=str,
+    help="Name of the model(s) to format. Takes precidence over --all and --main.",
+)
+@click.option("--all", "-a", is_flag=True, help="Format all models.")
+@click.option(
+    "--main",
+    "-m",
+    is_flag=True,
+    help="Format all models vs diff to the main branch. Make sure to pull main so it"
+    "s up-to-date.",
+)
+@click.pass_context
+def format(ctx, select, all, main):
+    """Format models using sqlfluff."""
+    PrecommitFormat().format(ctx, select, all, main)
+@mdbt.command()
+@click.option(
+    "--select",
+    "-s",
+    type=str,
+    help="Name of the model(s) to format. Takes precidence over --all and --main.",
+)
+@click.pass_context
+def exp(ctx, select):
+    """Build expectations for models."""
+    expectations_output_builder = ExpectationsOutputBuilder()
+    expectations_output_builder.main(select)

mdbt/core.py ADDED Viewed

@@ -0,0 +1,113 @@
+import json
+import os
+import re
+import subprocess
+import sys
+import typing as t
+import snowflake.connector as snow
+from dotenv import find_dotenv
+from dotenv import load_dotenv
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+load_dotenv(find_dotenv("../.env"))
+load_dotenv(find_dotenv(".env"))
+class Core:
+    def __init__(self, test_mode=False):
+        self._conn = None
+        self._cur = None
+        self._create_snowflake_connection()
+        self.test_mode = test_mode
+        self.dbt_ls_test_mode_output = None
+        self.dbt_test_mode_command_check_value = None
+        self.exclude_seed_snapshot = "resource_type:snapshot resource_type:seed"
+        self.dbt_execute_command_output = ""
+    def _create_snowflake_connection(self):
+        if not os.environ.get("SNOWFLAKE_MAIN_ACCOUNT"):
+            raise ValueError(
+                "SNOWFLAKE_MAIN_ACCOUNT environment variable is not set"
+            )
+        self._conn = snow.connect(
+            account=os.environ.get("SNOWFLAKE_MAIN_ACCOUNT"),
+            password=os.environ.get("SNOWFLAKE_MAIN_PASSWORD"),
+            schema=os.environ.get("SNOWFLAKE_MAIN_SCHEMA"),
+            user=os.environ.get("SNOWFLAKE_MAIN_USER"),
+            warehouse=os.environ.get("SNOWFLAKE_MAIN_WAREHOUSE"),
+            database=os.environ.get("SNOWFLAKE_MAIN_DATABASE"),
+            role=os.environ.get("SNOWFLAKE_MAIN_ROLE"),
+        )
+        self._cur = self._conn.cursor()
+    def dbt_ls_to_json(self, args):
+        cmd = ["dbt", "ls", "--output", "json"]
+        cmd = cmd + args
+        try:
+            if self.test_mode:
+                output = self.dbt_ls_test_mode_output
+            else:
+                output = subprocess.run(
+                    cmd, check=True, text=True, capture_output=True
+                ).stdout
+        except subprocess.CalledProcessError as e:
+            print(e.stderr)
+            print(e.stdout)
+            print(" ".join(cmd))
+            sys.exit(e.returncode)
+        # The results come back with a few header lines that need to be removed, then a series of JSON string with a
+        # format like: {"name": "active_patient_metrics", "resource_type": "model", "config":
+        # {"materialized": "incremental"}} RE removes the header stuff and finds the json lines.
+        json_lines = re.findall(r"^{.*$", output, re.MULTILINE)
+        # Split lines and filter to get only JSON strings
+        models_json = [json.loads(line) for line in json_lines]
+        return models_json
+    @staticmethod
+    def execute_dbt_command_capture(command: str, args: t.List[str]) -> str:
+        """
+        Executes a DBT command and captures the output without streaming to the stdout.
+        Args:
+            command: The DBT command to run.
+            args: A list of args to pass into the command.
+        Returns:
+            A string containing the results of the command.
+        """
+        cmd = ["dbt", command] + args
+        try:
+            output = subprocess.run(
+                cmd, check=True, text=True, capture_output=True
+            ).stdout
+        except subprocess.CalledProcessError as e:
+            print(f'Failure while running command: {" ".join(cmd)}')
+            print(e.stderr)
+            print(e.stdout)
+            sys.exit(e.returncode)
+        return output
+    def get_file_path(self, model_name):
+        # This will get the path of the model. note, that unit tests show up as models, so must be excluded via the folder.
+        #
+        args = [
+            "--select",
+            model_name,
+            "--exclude",
+            "path:tests/* resource_type:test",
+            "--output-keys",
+            "original_file_path",
+        ]
+        model_ls_json = self.dbt_ls_to_json(args)
+        file_path = model_ls_json[0]["original_file_path"]
+        return file_path
+    @staticmethod
+    def handle_cmd_line_error(e):
+        print(f'Failure while running command: {" ".join(e.cmd)}')
+        print(e.stderr)
+        print(e.stdout)
+        raise Exception(f"Failure while running command: {' '.join(e.cmd)}")
+        # sys.exit(e.returncode)

mdbt/expectations_output_builder.py ADDED Viewed

@@ -0,0 +1,74 @@
+import os
+import yaml
+from mdbt.core import Core
+class ExpectationsOutputBuilder(Core):
+    def __init__(self, test_mode=False):
+        super().__init__(test_mode=test_mode)
+    def main(self, select):
+        args = ["--output-keys", "name resource_type original_file_path"]
+        if select:
+            args += ["--select", select]
+        model_data = self.dbt_ls_to_json(args)
+        for model in model_data:
+            if model.get("resource_type") == "model":
+                yaml_file_path = model.get("original_file_path")[:-4] + ".yml"
+                database = os.environ.get("DEV_DATABASE")
+                schema = os.environ.get("DEV_SCHEMA")
+                model_name = model.get("name")
+                self.process_yaml(yaml_file_path, database, schema, model_name)
+    def process_yaml(self, yaml_file_path, database, schema, model_name):
+        with open(yaml_file_path, "r") as f:
+            yaml_content = yaml.safe_load(f)
+        model = yaml_content.get("models", [])[0]
+        columns = model.get("columns", [])
+        print(f"*********\nStarting model: {model_name}\n*********")
+        for column in columns:
+            column_name = column.get("name")
+            data_tests = column.get("data_tests", [])
+            for data_test in data_tests:
+                if isinstance(data_test, dict):
+                    for expectation_name, expectation_params in data_test.items():
+                        # fmt: off
+                        expectation_pattern = "dbt_expectations.expect_column_sum_to_be_between"
+                        # fmt: on
+                        if expectation_name == expectation_pattern:
+                            min_value = expectation_params.get("min_value")
+                            max_value = expectation_params.get("max_value")
+                            row_condition = expectation_params.get("row_condition", "")
+                            # Build SQL query
+                            sql = f"""
+                            SELECT SUM({column_name}) AS current_value
+                                 , {min_value} AS expected_lower
+                                 , {max_value} AS expected_higher
+                                 , iff(current_value between expected_lower and expected_higher, '\033[92m Pass\033[0m', '\033[91m Fail\033[0m') AS result
+                            FROM {database}.{schema}.{model_name}
+                            """
+                            if row_condition:
+                                sql += f" WHERE {row_condition}"
+                            # Execute the query
+                            self._cur.execute(sql)
+                            results_df = self._cur.fetch_pandas_all()
+                            # Print the results
+                            print(f"Model: {model_name}")
+                            print(f"Column: {column_name}")
+                            print(f"Condition: {row_condition}")
+                            print(results_df.to_string(index=False))
+                            print("\n")
+if __name__ == "__main__":
+    builder = ExpectationsOutputBuilder()
+    builder.main(select="appointment_revenue_mrpv_metrics")

mdbt/lightdash.py ADDED Viewed

@@ -0,0 +1,84 @@
+import json
+import os
+import subprocess
+import sys
+from click.core import Context
+from mdbt.core import Core
+class Lightdash(Core):
+    def __init__(self, test_mode=False):
+        super().__init__(test_mode=test_mode)
+    def lightdash_start_preview(
+        self, ctx: Context, select: str, preview_name: str, l43: bool
+    ):
+        # Check to make sure the LIGHTDASH_PROJECT env variable is set
+        if not os.getenv("LIGHTDASH_PROJECT"):
+            print(
+                "LIGHTDASH_PROJECT environment variable not set. Set this key to the ID of the project you will "
+                "promote charts to."
+            )
+            sys.exit(1)
+        else:
+            print(f"Building for LIGHTDASH_PROJECT: {os.getenv('LIGHTDASH_PROJECT')}")
+        self._check_lightdash_for_updates()
+        if not preview_name:
+            # If no preview name, use the current name of the git branch
+            result = subprocess.run(
+                ["git", "branch", "--show-current"], stdout=subprocess.PIPE, text=True
+            )
+            preview_name = result.stdout.strip()
+        args = ["lightdash", "start-preview", "--name", preview_name]
+        if l43:
+            args = args + ["-s", "tag:l3 tag:l4"]
+        if select:
+            args = args + ["--select", select]
+        try:
+            print(f'Running command: {" ".join(args)}')
+            subprocess.run(args, check=True)
+        except subprocess.CalledProcessError as e:
+            self.handle_cmd_line_error(e)
+    @staticmethod
+    def _check_lightdash_for_updates():
+        api_str = 'curl -s "https://app.lightdash.cloud/api/v1/health"'
+        try:
+            result = subprocess.run(
+                api_str, shell=True, check=True, text=True, capture_output=True
+            )
+            # Convert to JSON
+            result_json = json.loads(result.stdout)
+        except subprocess.CalledProcessError as e:
+            print(f"Failure while running command: {api_str}")
+            print(e.stderr)
+            print(e.stdout)
+            sys.exit(e.returncode)
+        api_version = result_json["results"]["version"]
+        result = subprocess.run(
+            ["lightdash", "--version"], check=True, text=True, capture_output=True
+        )
+        current_version = result.stdout.strip()
+        if api_version != current_version:
+            print(
+                f"API version {api_version} does not match current version {current_version}. Upgrading."
+            )
+            args = ["npm", "install", "-g", f"@lightdash/cli@{api_version}"]
+            subprocess.run(args, check=True)
+        else:
+            print(
+                f"API version {api_version} matches current version {current_version}."
+            )