altimate-datapilot-cli 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- altimate_datapilot_cli-0.0.8.dist-info/AUTHORS.rst +5 -0
- altimate_datapilot_cli-0.0.8.dist-info/LICENSE +9 -0
- altimate_datapilot_cli-0.0.8.dist-info/METADATA +102 -0
- altimate_datapilot_cli-0.0.8.dist-info/RECORD +139 -0
- altimate_datapilot_cli-0.0.8.dist-info/WHEEL +5 -0
- altimate_datapilot_cli-0.0.8.dist-info/entry_points.txt +4 -0
- altimate_datapilot_cli-0.0.8.dist-info/top_level.txt +1 -0
- datapilot/__init__.py +1 -0
- datapilot/__main__.py +14 -0
- datapilot/cli/__init__.py +0 -0
- datapilot/cli/main.py +11 -0
- datapilot/clients/__init__.py +0 -0
- datapilot/clients/altimate/__init__.py +0 -0
- datapilot/clients/altimate/client.py +85 -0
- datapilot/clients/altimate/utils.py +75 -0
- datapilot/config/__init__.py +0 -0
- datapilot/config/config.py +16 -0
- datapilot/config/utils.py +32 -0
- datapilot/core/__init__.py +0 -0
- datapilot/core/insights/__init__.py +2 -0
- datapilot/core/insights/base/__init__.py +0 -0
- datapilot/core/insights/base/insight.py +34 -0
- datapilot/core/insights/report.py +16 -0
- datapilot/core/insights/schema.py +24 -0
- datapilot/core/insights/sql/__init__.py +0 -0
- datapilot/core/insights/sql/base/__init__.py +0 -0
- datapilot/core/insights/sql/base/insight.py +18 -0
- datapilot/core/insights/sql/runtime/__init__.py +0 -0
- datapilot/core/insights/sql/static/__init__.py +0 -0
- datapilot/core/insights/utils.py +20 -0
- datapilot/core/platforms/__init__.py +0 -0
- datapilot/core/platforms/dbt/__init__.py +0 -0
- datapilot/core/platforms/dbt/cli/__init__.py +0 -0
- datapilot/core/platforms/dbt/cli/cli.py +112 -0
- datapilot/core/platforms/dbt/constants.py +34 -0
- datapilot/core/platforms/dbt/exceptions.py +6 -0
- datapilot/core/platforms/dbt/executor.py +157 -0
- datapilot/core/platforms/dbt/factory.py +22 -0
- datapilot/core/platforms/dbt/formatting.py +45 -0
- datapilot/core/platforms/dbt/hooks/__init__.py +0 -0
- datapilot/core/platforms/dbt/hooks/executor_hook.py +86 -0
- datapilot/core/platforms/dbt/insights/__init__.py +115 -0
- datapilot/core/platforms/dbt/insights/base.py +133 -0
- datapilot/core/platforms/dbt/insights/checks/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/checks/base.py +26 -0
- datapilot/core/platforms/dbt/insights/checks/check_column_desc_are_same.py +105 -0
- datapilot/core/platforms/dbt/insights/checks/check_column_name_contract.py +154 -0
- datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +75 -0
- datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +63 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_all_columns.py +96 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_labels_keys.py +112 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_meta_keys.py +108 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_properties_file.py +64 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +118 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +114 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +119 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +129 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +132 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +135 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_database.py +109 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_schema.py +109 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_tags.py +87 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_childs.py +97 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_columns_have_desc.py +96 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_all_columns.py +103 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +94 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_labels_keys.py +110 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_loader.py +62 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +117 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests.py +82 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +117 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +113 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +119 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_table_has_description.py +62 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_tags.py +76 -0
- datapilot/core/platforms/dbt/insights/dbt_test/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/dbt_test/base.py +23 -0
- datapilot/core/platforms/dbt/insights/dbt_test/missing_primary_key_tests.py +130 -0
- datapilot/core/platforms/dbt/insights/dbt_test/test_coverage.py +118 -0
- datapilot/core/platforms/dbt/insights/governance/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/governance/base.py +23 -0
- datapilot/core/platforms/dbt/insights/governance/documentation_on_stale_columns.py +130 -0
- datapilot/core/platforms/dbt/insights/governance/exposures_dependent_on_private_models.py +90 -0
- datapilot/core/platforms/dbt/insights/governance/public_models_without_contracts.py +89 -0
- datapilot/core/platforms/dbt/insights/governance/undocumented_columns.py +148 -0
- datapilot/core/platforms/dbt/insights/governance/undocumented_public_models.py +110 -0
- datapilot/core/platforms/dbt/insights/modelling/README.md +15 -0
- datapilot/core/platforms/dbt/insights/modelling/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/modelling/base.py +31 -0
- datapilot/core/platforms/dbt/insights/modelling/direct_join_to_source.py +125 -0
- datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +113 -0
- datapilot/core/platforms/dbt/insights/modelling/duplicate_sources.py +85 -0
- datapilot/core/platforms/dbt/insights/modelling/hard_coded_references.py +80 -0
- datapilot/core/platforms/dbt/insights/modelling/joining_of_upstream_concepts.py +79 -0
- datapilot/core/platforms/dbt/insights/modelling/model_fanout.py +126 -0
- datapilot/core/platforms/dbt/insights/modelling/multiple_sources_joined.py +83 -0
- datapilot/core/platforms/dbt/insights/modelling/root_model.py +82 -0
- datapilot/core/platforms/dbt/insights/modelling/source_fanout.py +102 -0
- datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_downstream_models.py +103 -0
- datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_staging_models.py +89 -0
- datapilot/core/platforms/dbt/insights/modelling/unused_sources.py +59 -0
- datapilot/core/platforms/dbt/insights/performance/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/performance/base.py +26 -0
- datapilot/core/platforms/dbt/insights/performance/chain_view_linking.py +92 -0
- datapilot/core/platforms/dbt/insights/performance/exposure_parent_materializations.py +104 -0
- datapilot/core/platforms/dbt/insights/schema.py +72 -0
- datapilot/core/platforms/dbt/insights/structure/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/structure/base.py +33 -0
- datapilot/core/platforms/dbt/insights/structure/model_directories_structure.py +92 -0
- datapilot/core/platforms/dbt/insights/structure/model_naming_conventions.py +97 -0
- datapilot/core/platforms/dbt/insights/structure/source_directories_structure.py +80 -0
- datapilot/core/platforms/dbt/insights/structure/test_directory_structure.py +74 -0
- datapilot/core/platforms/dbt/insights/utils.py +9 -0
- datapilot/core/platforms/dbt/schemas/__init__.py +0 -0
- datapilot/core/platforms/dbt/schemas/catalog.py +73 -0
- datapilot/core/platforms/dbt/schemas/manifest.py +462 -0
- datapilot/core/platforms/dbt/utils.py +525 -0
- datapilot/core/platforms/dbt/wrappers/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/v1/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/v1/wrapper.py +18 -0
- datapilot/core/platforms/dbt/wrappers/catalog/wrapper.py +9 -0
- datapilot/core/platforms/dbt/wrappers/manifest/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/schemas.py +47 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/wrapper.py +396 -0
- datapilot/core/platforms/dbt/wrappers/manifest/wrapper.py +35 -0
- datapilot/core/platforms/dbt/wrappers/run_results/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/run_results/run_results.py +39 -0
- datapilot/exceptions/__init__.py +0 -0
- datapilot/exceptions/exceptions.py +10 -0
- datapilot/schemas/__init__.py +0 -0
- datapilot/schemas/constants.py +5 -0
- datapilot/schemas/nodes.py +19 -0
- datapilot/schemas/sql.py +10 -0
- datapilot/utils/__init__.py +0 -0
- datapilot/utils/formatting/__init__.py +0 -0
- datapilot/utils/formatting/utils.py +59 -0
- datapilot/utils/utils.py +317 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
from typing import Dict
|
2
|
+
from typing import Optional
|
3
|
+
|
4
|
+
from datapilot.core.insights.schema import Severity
|
5
|
+
from datapilot.schemas.constants import CONFIG_METRICS
|
6
|
+
from datapilot.schemas.constants import CONFIG_SEVERITY
|
7
|
+
|
8
|
+
|
9
|
+
def get_severity(
|
10
|
+
config: Optional[Dict],
|
11
|
+
alias: str,
|
12
|
+
default_severity: Severity,
|
13
|
+
):
|
14
|
+
if config is None:
|
15
|
+
return default_severity
|
16
|
+
|
17
|
+
insights = config.get(CONFIG_METRICS, {})
|
18
|
+
metric = insights.get(alias, {})
|
19
|
+
severity = metric.get(CONFIG_SEVERITY, default_severity)
|
20
|
+
return severity
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,112 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
import click
|
4
|
+
|
5
|
+
from datapilot.clients.altimate.utils import check_token_and_instance
|
6
|
+
from datapilot.clients.altimate.utils import onboard_manifest
|
7
|
+
from datapilot.clients.altimate.utils import validate_credentials
|
8
|
+
from datapilot.config.config import load_config
|
9
|
+
from datapilot.core.platforms.dbt.constants import MODEL
|
10
|
+
from datapilot.core.platforms.dbt.constants import PROJECT
|
11
|
+
from datapilot.core.platforms.dbt.executor import DBTInsightGenerator
|
12
|
+
from datapilot.core.platforms.dbt.formatting import generate_model_insights_table
|
13
|
+
from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
|
14
|
+
from datapilot.core.platforms.dbt.utils import load_catalog
|
15
|
+
from datapilot.core.platforms.dbt.utils import load_manifest
|
16
|
+
from datapilot.utils.formatting.utils import tabulate_data
|
17
|
+
|
18
|
+
logging.basicConfig(level=logging.INFO)
|
19
|
+
|
20
|
+
|
21
|
+
# New dbt group
|
22
|
+
@click.group()
|
23
|
+
def dbt():
|
24
|
+
"""DBT specific commands."""
|
25
|
+
|
26
|
+
|
27
|
+
@dbt.command("project-health")
|
28
|
+
@click.option(
|
29
|
+
"--manifest-path",
|
30
|
+
required=True,
|
31
|
+
help="Path to the DBT manifest file",
|
32
|
+
)
|
33
|
+
@click.option(
|
34
|
+
"--catalog-path",
|
35
|
+
required=False,
|
36
|
+
help="Path to the DBT catalog file",
|
37
|
+
)
|
38
|
+
@click.option(
|
39
|
+
"--config-path",
|
40
|
+
required=False,
|
41
|
+
help="Path to the DBT config file",
|
42
|
+
)
|
43
|
+
@click.option(
|
44
|
+
"--select",
|
45
|
+
required=False,
|
46
|
+
default=None,
|
47
|
+
help="Selective model testing. Specify one or more models to run tests on.",
|
48
|
+
)
|
49
|
+
def project_health(manifest_path, catalog_path, config_path=None, select=None):
|
50
|
+
"""
|
51
|
+
Validate the DBT project's configuration and structure.
|
52
|
+
:param manifest_path: Path to the DBT manifest file.
|
53
|
+
"""
|
54
|
+
config = None
|
55
|
+
if config_path:
|
56
|
+
config = load_config(config_path)
|
57
|
+
selected_models = []
|
58
|
+
if select:
|
59
|
+
selected_models = select.split(" ")
|
60
|
+
manifest = load_manifest(manifest_path)
|
61
|
+
catalog = load_catalog(catalog_path) if catalog_path else None
|
62
|
+
insight_generator = DBTInsightGenerator(manifest=manifest, catalog=catalog, config=config, selected_models=selected_models)
|
63
|
+
reports = insight_generator.run()
|
64
|
+
|
65
|
+
package_insights = reports[PROJECT]
|
66
|
+
model_insights = reports[MODEL]
|
67
|
+
model_report = generate_model_insights_table(model_insights)
|
68
|
+
if len(model_report) > 0:
|
69
|
+
click.echo("--" * 50)
|
70
|
+
click.echo("Model Insights")
|
71
|
+
click.echo("--" * 50)
|
72
|
+
for model_id, report in model_report.items():
|
73
|
+
click.echo(f"Model: {model_id}")
|
74
|
+
click.echo(f"File path: {report['path']}")
|
75
|
+
click.echo(tabulate_data(report["table"], headers="keys"))
|
76
|
+
click.echo("\n")
|
77
|
+
|
78
|
+
if len(package_insights) > 0:
|
79
|
+
project_report = generate_project_insights_table(package_insights)
|
80
|
+
click.echo("--" * 50)
|
81
|
+
click.echo("Project Insights")
|
82
|
+
click.echo("--" * 50)
|
83
|
+
click.echo(tabulate_data(project_report, headers="keys"))
|
84
|
+
|
85
|
+
|
86
|
+
@dbt.command("onboard")
|
87
|
+
@click.option("--token", prompt="API Token", help="Your API token for authentication.")
|
88
|
+
@click.option("--instance-name", prompt="Instance Name", help="Your tenant ID.")
|
89
|
+
@click.option("--dbt_core_integration_id", prompt="DBT Core Integration ID", help="DBT Core Integration ID")
|
90
|
+
@click.option("--manifest-path", required=True, prompt="Manifest Path", help="Path to the manifest file.")
|
91
|
+
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
|
92
|
+
def onboard(token, instance_name, dbt_core_integration_id, manifest_path, backend_url="https://api.myaltimate.com", env=None):
|
93
|
+
"""Onboard a manifest file to DBT."""
|
94
|
+
check_token_and_instance(token, instance_name)
|
95
|
+
|
96
|
+
if not validate_credentials(token, backend_url, instance_name):
|
97
|
+
click.echo("Error: Invalid credentials.")
|
98
|
+
return
|
99
|
+
|
100
|
+
# This will throw error if manifest file is incorrect
|
101
|
+
try:
|
102
|
+
load_manifest(manifest_path)
|
103
|
+
except Exception as e:
|
104
|
+
click.echo(f"Error: {e}")
|
105
|
+
return
|
106
|
+
|
107
|
+
response = onboard_manifest(token, instance_name, dbt_core_integration_id, manifest_path, backend_url)
|
108
|
+
|
109
|
+
if response["ok"]:
|
110
|
+
click.echo("Manifest onboarded successfully!")
|
111
|
+
else:
|
112
|
+
click.echo(f"{response['message']}")
|
@@ -0,0 +1,34 @@
|
|
1
|
+
SEED = "seed"
|
2
|
+
MACRO = "macro"
|
3
|
+
TEST = "test"
|
4
|
+
MODEL = "model"
|
5
|
+
SOURCE = "source"
|
6
|
+
|
7
|
+
|
8
|
+
PROJECT = "project"
|
9
|
+
SQL = "sql"
|
10
|
+
|
11
|
+
# Model Types
|
12
|
+
MART: str = "mart"
|
13
|
+
STAGING = "staging"
|
14
|
+
INTERMEDIATE = "intermediate"
|
15
|
+
BASE = "base"
|
16
|
+
OTHER = "other"
|
17
|
+
|
18
|
+
# MATERIALIZATION
|
19
|
+
TABLE = "table"
|
20
|
+
INCREMENTAL = "incremental"
|
21
|
+
VIEW = "view"
|
22
|
+
EPHEMERAL = "ephemeral"
|
23
|
+
|
24
|
+
|
25
|
+
MATERIALIZED = [TABLE, INCREMENTAL]
|
26
|
+
NON_MATERIALIZED = [VIEW, EPHEMERAL]
|
27
|
+
|
28
|
+
|
29
|
+
GENERIC = "generic"
|
30
|
+
SINGULAR = "singular"
|
31
|
+
OTHER_TEST_NODE = "other_test_node"
|
32
|
+
|
33
|
+
|
34
|
+
FOLDER = "folder"
|
@@ -0,0 +1,157 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
# from src.utils.formatting.utils import generate_model_insights_table
|
4
|
+
from typing import Dict
|
5
|
+
from typing import List
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
from datapilot.core.platforms.dbt.constants import MODEL
|
9
|
+
from datapilot.core.platforms.dbt.constants import PROJECT
|
10
|
+
from datapilot.core.platforms.dbt.exceptions import AltimateCLIArgumentError
|
11
|
+
from datapilot.core.platforms.dbt.factory import DBTFactory
|
12
|
+
from datapilot.core.platforms.dbt.insights import INSIGHTS
|
13
|
+
from datapilot.core.platforms.dbt.schemas.manifest import Catalog
|
14
|
+
from datapilot.core.platforms.dbt.schemas.manifest import Manifest
|
15
|
+
from datapilot.core.platforms.dbt.utils import get_models
|
16
|
+
from datapilot.utils.formatting.utils import RED
|
17
|
+
from datapilot.utils.formatting.utils import YELLOW
|
18
|
+
from datapilot.utils.formatting.utils import color_text
|
19
|
+
|
20
|
+
|
21
|
+
class DBTInsightGenerator:
|
22
|
+
def __init__(
|
23
|
+
self,
|
24
|
+
manifest: Manifest,
|
25
|
+
catalog: Optional[Catalog] = None,
|
26
|
+
run_results_path: Optional[str] = None,
|
27
|
+
env: Optional[str] = None,
|
28
|
+
config: Optional[Dict] = None,
|
29
|
+
target: str = "dev",
|
30
|
+
selected_models: Optional[str] = None,
|
31
|
+
selected_model_ids: Optional[List[str]] = None,
|
32
|
+
):
|
33
|
+
self.run_results_path = run_results_path
|
34
|
+
self.target = target
|
35
|
+
self.env = env
|
36
|
+
self.config = config or {}
|
37
|
+
|
38
|
+
self.manifest_wrapper = DBTFactory.get_manifest_wrapper(manifest)
|
39
|
+
self.manifest_present = True
|
40
|
+
self.catalog_present = False
|
41
|
+
self.catalog_wrapper = None
|
42
|
+
|
43
|
+
if catalog:
|
44
|
+
self.catalog_wrapper = DBTFactory.get_catalog_wrapper(catalog)
|
45
|
+
self.catalog_present = True
|
46
|
+
|
47
|
+
self.run_results_present = False
|
48
|
+
self.logger = logging.getLogger("dbt-insight-generator")
|
49
|
+
|
50
|
+
self.nodes = self.manifest_wrapper.get_nodes()
|
51
|
+
self.macros = self.manifest_wrapper.get_macros()
|
52
|
+
self.sources = self.manifest_wrapper.get_sources()
|
53
|
+
self.exposures = self.manifest_wrapper.get_exposures()
|
54
|
+
self.seeds = self.manifest_wrapper.get_seeds()
|
55
|
+
self.children_map = self.manifest_wrapper.parent_to_child_map(self.nodes)
|
56
|
+
self.tests = self.manifest_wrapper.get_tests()
|
57
|
+
self.project_name = self.manifest_wrapper.get_package()
|
58
|
+
self.selected_models = None
|
59
|
+
self.selected_models_flag = False
|
60
|
+
entities = {
|
61
|
+
"nodes": self.nodes,
|
62
|
+
"sources": self.sources,
|
63
|
+
"exposures": self.exposures,
|
64
|
+
"tests": self.tests,
|
65
|
+
}
|
66
|
+
if selected_model_ids:
|
67
|
+
self.selected_models_flag = True
|
68
|
+
self.selected_models = selected_model_ids
|
69
|
+
elif selected_models:
|
70
|
+
self.selected_models_flag = True
|
71
|
+
self.selected_models = get_models(
|
72
|
+
selected_models,
|
73
|
+
entities=entities,
|
74
|
+
)
|
75
|
+
if not self.selected_models:
|
76
|
+
raise AltimateCLIArgumentError(
|
77
|
+
f"Invalid values provided in the --select argument. Could not find models associated with pattern: --select {' '.join(selected_models)}"
|
78
|
+
)
|
79
|
+
self.excluded_models = None
|
80
|
+
self.excluded_models_flag = False
|
81
|
+
|
82
|
+
def _check_if_skipped(self, insight):
|
83
|
+
if self.config.get("disabled_insights", False):
|
84
|
+
if insight.ALIAS in self.config.get("disabled_insights", []):
|
85
|
+
return True
|
86
|
+
return False
|
87
|
+
|
88
|
+
def run(self):
|
89
|
+
reports = {
|
90
|
+
MODEL: {},
|
91
|
+
PROJECT: [],
|
92
|
+
}
|
93
|
+
for insight_class in INSIGHTS:
|
94
|
+
# TODO: Skip insight based on config
|
95
|
+
|
96
|
+
run_insight, message = insight_class.has_all_required_data(
|
97
|
+
has_manifest=self.manifest_present,
|
98
|
+
has_catalog=self.catalog_present,
|
99
|
+
has_run_results=self.run_results_present,
|
100
|
+
)
|
101
|
+
|
102
|
+
if run_insight:
|
103
|
+
self.logger.info(f"Running insight {insight_class.NAME}")
|
104
|
+
insight = insight_class(
|
105
|
+
manifest_wrapper=self.manifest_wrapper,
|
106
|
+
catalog_wrapper=self.catalog_wrapper,
|
107
|
+
nodes=self.nodes,
|
108
|
+
macros=self.macros,
|
109
|
+
sources=self.sources,
|
110
|
+
seeds=self.seeds,
|
111
|
+
exposures=self.exposures,
|
112
|
+
children_map=self.children_map,
|
113
|
+
tests=self.tests,
|
114
|
+
project_name=self.project_name,
|
115
|
+
config=self.config,
|
116
|
+
selected_models=self.selected_models,
|
117
|
+
excluded_models=self.excluded_models,
|
118
|
+
)
|
119
|
+
|
120
|
+
if self._check_if_skipped(insight):
|
121
|
+
self.logger.info(
|
122
|
+
color_text(
|
123
|
+
f"Skipping insight {insight_class.NAME} as it is not enabled in config",
|
124
|
+
YELLOW,
|
125
|
+
)
|
126
|
+
)
|
127
|
+
continue
|
128
|
+
try:
|
129
|
+
insights = insight.generate()
|
130
|
+
num_insights = len(insights)
|
131
|
+
text = f"Found {num_insights} insights for {insight_class.NAME}"
|
132
|
+
if num_insights > 0:
|
133
|
+
self.logger.info(color_text(text, RED))
|
134
|
+
else:
|
135
|
+
self.logger.info(f"No insights found for {insight_class.NAME}")
|
136
|
+
|
137
|
+
for insight in insights:
|
138
|
+
# Handle MODEL level insights
|
139
|
+
if insight.insight_level == MODEL:
|
140
|
+
# Add the insight if the model is selected or if all models are selected
|
141
|
+
# if self.selected_models_flag and insight.unique_id in self.selected_models or not self.selected_models_flag:
|
142
|
+
reports[MODEL].setdefault(insight.unique_id, []).append(insight)
|
143
|
+
# Handle PROJECT level insights, only if all models are selected
|
144
|
+
elif insight.insight_level == PROJECT:
|
145
|
+
reports[PROJECT].append(insight)
|
146
|
+
|
147
|
+
except Exception as e:
|
148
|
+
self.logger.info(
|
149
|
+
color_text(
|
150
|
+
f"Error running insight {insight_class.NAME}: {e}. Skipping insight. {message}",
|
151
|
+
RED,
|
152
|
+
)
|
153
|
+
)
|
154
|
+
else:
|
155
|
+
self.logger.info(color_text(f"Skipping insight {insight_class.NAME} as {message}", YELLOW))
|
156
|
+
|
157
|
+
return reports
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from dbt_artifacts_parser.parsers.catalog.catalog_v1 import CatalogV1
|
2
|
+
from dbt_artifacts_parser.parsers.manifest.manifest_v11 import ManifestV11
|
3
|
+
|
4
|
+
from datapilot.core.platforms.dbt.schemas.manifest import Catalog
|
5
|
+
from datapilot.core.platforms.dbt.schemas.manifest import Manifest
|
6
|
+
from datapilot.core.platforms.dbt.wrappers.catalog.v1.wrapper import CatalogV1Wrapper
|
7
|
+
from datapilot.core.platforms.dbt.wrappers.manifest.v11.wrapper import ManifestV11Wrapper
|
8
|
+
from datapilot.exceptions.exceptions import AltimateNotSupportedError
|
9
|
+
|
10
|
+
|
11
|
+
class DBTFactory:
|
12
|
+
@classmethod
|
13
|
+
def get_manifest_wrapper(cls, manifest: Manifest):
|
14
|
+
if isinstance(manifest, ManifestV11):
|
15
|
+
return ManifestV11Wrapper(manifest)
|
16
|
+
raise AltimateNotSupportedError(f"Manifest version {manifest.metadata.dbt_schema_version} not supported")
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
def get_catalog_wrapper(cls, catalog: Catalog):
|
20
|
+
if isinstance(catalog, CatalogV1):
|
21
|
+
return CatalogV1Wrapper(catalog)
|
22
|
+
raise AltimateNotSupportedError(f"Catalog version {catalog.metadata.dbt_schema_version} not supported")
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from typing import Dict
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from datapilot.core.insights.schema import InsightResult
|
5
|
+
from datapilot.core.insights.schema import Severity
|
6
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
|
7
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTProjectInsightResponse
|
8
|
+
from datapilot.utils.formatting.utils import color_based_on_severity
|
9
|
+
|
10
|
+
|
11
|
+
def gen_table(insight: InsightResult, severity: Severity) -> Dict[str, str]:
|
12
|
+
return {
|
13
|
+
"name": insight.name,
|
14
|
+
"type": insight.type,
|
15
|
+
"level": color_based_on_severity(severity),
|
16
|
+
"message": insight.message,
|
17
|
+
"recommendation": insight.recommendation,
|
18
|
+
"reason_to_flag": insight.reason_to_flag,
|
19
|
+
}
|
20
|
+
|
21
|
+
|
22
|
+
def generate_model_insights_table(model_insights: Dict[str, List[DBTModelInsightResponse]]):
|
23
|
+
results = {}
|
24
|
+
|
25
|
+
for model_id, insights in model_insights.items():
|
26
|
+
for insight in insights:
|
27
|
+
if model_id not in results:
|
28
|
+
results[model_id] = {
|
29
|
+
"package_name": insight.package_name,
|
30
|
+
"unique_id": insight.unique_id,
|
31
|
+
"path": insight.original_file_path,
|
32
|
+
"table": [],
|
33
|
+
}
|
34
|
+
|
35
|
+
results[model_id]["table"].append(gen_table(insight.insight, insight.severity))
|
36
|
+
return results
|
37
|
+
|
38
|
+
|
39
|
+
def generate_project_insights_table(project_insights: List[DBTProjectInsightResponse]):
|
40
|
+
results = []
|
41
|
+
|
42
|
+
for project_insight in project_insights:
|
43
|
+
for insight in project_insight.insights:
|
44
|
+
results.append(gen_table(insight, project_insight.severity))
|
45
|
+
return results
|
File without changes
|
@@ -0,0 +1,86 @@
|
|
1
|
+
import argparse
|
2
|
+
import time
|
3
|
+
from typing import Optional
|
4
|
+
from typing import Sequence
|
5
|
+
|
6
|
+
from datapilot.config.config import load_config
|
7
|
+
from datapilot.core.platforms.dbt.constants import MODEL
|
8
|
+
from datapilot.core.platforms.dbt.constants import PROJECT
|
9
|
+
from datapilot.core.platforms.dbt.executor import DBTInsightGenerator
|
10
|
+
from datapilot.core.platforms.dbt.formatting import generate_model_insights_table
|
11
|
+
from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
|
12
|
+
from datapilot.utils.formatting.utils import tabulate_data
|
13
|
+
from datapilot.utils.utils import generate_partial_manifest_catalog
|
14
|
+
|
15
|
+
|
16
|
+
def main(argv: Optional[Sequence[str]] = None):
|
17
|
+
start_time = time.time()
|
18
|
+
parser = argparse.ArgumentParser()
|
19
|
+
parser.add_argument(
|
20
|
+
"--config-path",
|
21
|
+
nargs="*",
|
22
|
+
help="Path of the config file to be used for the insight generation",
|
23
|
+
)
|
24
|
+
|
25
|
+
parser.add_argument(
|
26
|
+
"--base-path",
|
27
|
+
nargs="*",
|
28
|
+
help="Base path of the dbt project",
|
29
|
+
)
|
30
|
+
|
31
|
+
args = parser.parse_known_args(argv)
|
32
|
+
# print(f"args: {args}", file=sys.__stdout__)
|
33
|
+
config = {}
|
34
|
+
if hasattr(args[0], "config_path") and args[0].config_path:
|
35
|
+
# print(f"Using config file: {args[0].config_path[0]}")
|
36
|
+
config = load_config(args[0].config_path[0])
|
37
|
+
|
38
|
+
base_path = "./"
|
39
|
+
if hasattr(args[0], "base_path") and args[0].base_path:
|
40
|
+
base_path = args[0].base_path[0]
|
41
|
+
|
42
|
+
changed_files = args[1]
|
43
|
+
# print(f"Changed files: {changed_files}")
|
44
|
+
|
45
|
+
if not changed_files:
|
46
|
+
# print("No changed files detected - test. Exiting...")
|
47
|
+
return
|
48
|
+
|
49
|
+
# print(f"Changed files: {changed_files}", file=sys.__stdout__)
|
50
|
+
selected_models, manifest, catalog = generate_partial_manifest_catalog(changed_files, base_path=base_path)
|
51
|
+
# print("se1ected models", selected_models, file=sys.__stdout__)
|
52
|
+
insight_generator = DBTInsightGenerator(
|
53
|
+
manifest=manifest,
|
54
|
+
catalog=catalog,
|
55
|
+
config=config,
|
56
|
+
selected_model_ids=selected_models,
|
57
|
+
)
|
58
|
+
reports = insight_generator.run()
|
59
|
+
if reports:
|
60
|
+
model_report = generate_model_insights_table(reports[MODEL])
|
61
|
+
if len(model_report) > 0:
|
62
|
+
print("--" * 50)
|
63
|
+
print("Model Insights")
|
64
|
+
print("--" * 50)
|
65
|
+
for model_id, report in model_report.items():
|
66
|
+
print(f"Model: {model_id}")
|
67
|
+
print(f"File path: {report['path']}")
|
68
|
+
print(tabulate_data(report["table"], headers="keys"))
|
69
|
+
print("\n")
|
70
|
+
|
71
|
+
project_report = generate_project_insights_table(reports[PROJECT])
|
72
|
+
if len(project_report) > 0:
|
73
|
+
print("--" * 50)
|
74
|
+
print("Project Insights")
|
75
|
+
print("--" * 50)
|
76
|
+
print(tabulate_data(project_report, headers="keys"))
|
77
|
+
|
78
|
+
exit(1)
|
79
|
+
|
80
|
+
end_time = time.time()
|
81
|
+
total_time = end_time - start_time
|
82
|
+
print(f"Total time taken: {round(total_time, 2)} seconds")
|
83
|
+
|
84
|
+
|
85
|
+
if __name__ == "__main__":
|
86
|
+
exit(main())
|
@@ -0,0 +1,115 @@
|
|
1
|
+
from datapilot.core.platforms.dbt.insights.checks.check_column_desc_are_same import CheckColumnDescAreSame
|
2
|
+
from datapilot.core.platforms.dbt.insights.checks.check_column_name_contract import CheckColumnNameContract
|
3
|
+
from datapilot.core.platforms.dbt.insights.checks.check_macro_args_have_desc import CheckMacroArgsHaveDesc
|
4
|
+
from datapilot.core.platforms.dbt.insights.checks.check_macro_has_desc import CheckMacroHasDesc
|
5
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_has_all_columns import CheckModelHasAllColumns
|
6
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_has_meta_keys import CheckModelHasMetaKeys
|
7
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_has_properties_file import CheckModelHasPropertiesFile
|
8
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_has_tests_by_group import CheckModelHasTestsByGroup
|
9
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_has_tests_by_name import CheckModelHasTestsByName
|
10
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_has_tests_by_type import CheckModelHasTestsByType
|
11
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_materialization_by_childs import CheckModelMaterializationByChilds
|
12
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_name_contract import CheckModelNameContract
|
13
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_parents_and_childs import CheckModelParentsAndChilds
|
14
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_parents_database import CheckModelParentsDatabase
|
15
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_parents_schema import CheckModelParentsSchema
|
16
|
+
from datapilot.core.platforms.dbt.insights.checks.check_model_tags import CheckModelTags
|
17
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_childs import CheckSourceChilds
|
18
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_columns_have_desc import CheckSourceColumnsHaveDescriptions
|
19
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_all_columns import CheckSourceHasAllColumns
|
20
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_freshness import CheckSourceHasFreshness
|
21
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_loader import CheckSourceHasLoader
|
22
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_meta_keys import CheckSourceHasMetaKeys
|
23
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests import CheckSourceHasTests
|
24
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests_by_group import CheckSourceHasTestsByGroup
|
25
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests_by_name import CheckSourceHasTestsByName
|
26
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests_by_type import CheckSourceHasTestsByType
|
27
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_table_has_description import CheckSourceTableHasDescription
|
28
|
+
from datapilot.core.platforms.dbt.insights.checks.check_source_tags import CheckSourceTags
|
29
|
+
from datapilot.core.platforms.dbt.insights.dbt_test.missing_primary_key_tests import MissingPrimaryKeyTests
|
30
|
+
from datapilot.core.platforms.dbt.insights.dbt_test.test_coverage import DBTTestCoverage
|
31
|
+
from datapilot.core.platforms.dbt.insights.governance.documentation_on_stale_columns import DBTDocumentationStaleColumns
|
32
|
+
from datapilot.core.platforms.dbt.insights.governance.exposures_dependent_on_private_models import DBTExposureDependentOnPrivateModels
|
33
|
+
from datapilot.core.platforms.dbt.insights.governance.public_models_without_contracts import DBTPublicModelWithoutContracts
|
34
|
+
from datapilot.core.platforms.dbt.insights.governance.undocumented_columns import DBTMissingDocumentation
|
35
|
+
from datapilot.core.platforms.dbt.insights.governance.undocumented_public_models import DBTUndocumentedPublicModels
|
36
|
+
from datapilot.core.platforms.dbt.insights.modelling.direct_join_to_source import DBTDirectJoinSource
|
37
|
+
from datapilot.core.platforms.dbt.insights.modelling.downstream_models_dependent_on_source import DBTDownstreamModelsDependentOnSource
|
38
|
+
from datapilot.core.platforms.dbt.insights.modelling.duplicate_sources import DBTDuplicateSources
|
39
|
+
from datapilot.core.platforms.dbt.insights.modelling.hard_coded_references import DBTHardCodedReferences
|
40
|
+
from datapilot.core.platforms.dbt.insights.modelling.joining_of_upstream_concepts import DBTRejoiningOfUpstreamConcepts
|
41
|
+
from datapilot.core.platforms.dbt.insights.modelling.model_fanout import DBTModelFanout
|
42
|
+
from datapilot.core.platforms.dbt.insights.modelling.multiple_sources_joined import DBTModelsMultipleSourcesJoined
|
43
|
+
from datapilot.core.platforms.dbt.insights.modelling.root_model import DBTRootModel
|
44
|
+
from datapilot.core.platforms.dbt.insights.modelling.source_fanout import DBTSourceFanout
|
45
|
+
from datapilot.core.platforms.dbt.insights.modelling.staging_model_dependent_on_downstream_models import (
|
46
|
+
DBTStagingModelsDependentOnDownstreamModels,
|
47
|
+
)
|
48
|
+
from datapilot.core.platforms.dbt.insights.modelling.staging_model_dependent_on_staging_models import (
|
49
|
+
DBTStagingModelsDependentOnStagingModels,
|
50
|
+
)
|
51
|
+
from datapilot.core.platforms.dbt.insights.modelling.unused_sources import DBTUnusedSources
|
52
|
+
from datapilot.core.platforms.dbt.insights.performance.chain_view_linking import DBTChainViewLinking
|
53
|
+
from datapilot.core.platforms.dbt.insights.performance.exposure_parent_materializations import DBTExposureParentMaterialization
|
54
|
+
from datapilot.core.platforms.dbt.insights.structure.model_directories_structure import DBTModelDirectoryStructure
|
55
|
+
from datapilot.core.platforms.dbt.insights.structure.model_naming_conventions import DBTModelNamingConvention
|
56
|
+
from datapilot.core.platforms.dbt.insights.structure.source_directories_structure import DBTSourceDirectoryStructure
|
57
|
+
from datapilot.core.platforms.dbt.insights.structure.test_directory_structure import DBTTestDirectoryStructure
|
58
|
+
|
59
|
+
INSIGHTS = [
|
60
|
+
DBTDirectJoinSource,
|
61
|
+
DBTDownstreamModelsDependentOnSource,
|
62
|
+
DBTDuplicateSources,
|
63
|
+
DBTModelFanout,
|
64
|
+
DBTRootModel,
|
65
|
+
DBTSourceFanout,
|
66
|
+
DBTStagingModelsDependentOnDownstreamModels,
|
67
|
+
DBTStagingModelsDependentOnStagingModels,
|
68
|
+
DBTUnusedSources,
|
69
|
+
DBTModelsMultipleSourcesJoined,
|
70
|
+
DBTHardCodedReferences,
|
71
|
+
DBTRejoiningOfUpstreamConcepts,
|
72
|
+
DBTExposureDependentOnPrivateModels,
|
73
|
+
DBTUndocumentedPublicModels,
|
74
|
+
DBTPublicModelWithoutContracts,
|
75
|
+
DBTChainViewLinking,
|
76
|
+
DBTExposureParentMaterialization,
|
77
|
+
DBTMissingDocumentation,
|
78
|
+
DBTDocumentationStaleColumns,
|
79
|
+
MissingPrimaryKeyTests,
|
80
|
+
DBTTestCoverage,
|
81
|
+
DBTModelDirectoryStructure,
|
82
|
+
DBTModelNamingConvention,
|
83
|
+
DBTSourceDirectoryStructure,
|
84
|
+
DBTTestDirectoryStructure,
|
85
|
+
CheckColumnDescAreSame,
|
86
|
+
CheckColumnNameContract,
|
87
|
+
CheckMacroArgsHaveDesc,
|
88
|
+
CheckMacroHasDesc,
|
89
|
+
CheckModelHasAllColumns,
|
90
|
+
# CheckModelHasLabelsKeys,
|
91
|
+
CheckModelHasMetaKeys,
|
92
|
+
CheckModelHasPropertiesFile,
|
93
|
+
CheckModelHasTestsByName,
|
94
|
+
CheckModelHasTestsByType,
|
95
|
+
CheckModelHasTestsByGroup,
|
96
|
+
CheckModelMaterializationByChilds,
|
97
|
+
CheckModelNameContract,
|
98
|
+
CheckModelParentsAndChilds,
|
99
|
+
CheckModelParentsDatabase,
|
100
|
+
CheckModelParentsSchema,
|
101
|
+
CheckModelTags,
|
102
|
+
CheckSourceChilds,
|
103
|
+
CheckSourceColumnsHaveDescriptions,
|
104
|
+
CheckSourceHasAllColumns,
|
105
|
+
CheckSourceHasFreshness,
|
106
|
+
# CheckSourceHasLabelsKeys,
|
107
|
+
CheckSourceHasLoader,
|
108
|
+
CheckSourceHasMetaKeys,
|
109
|
+
CheckSourceHasTestsByName,
|
110
|
+
CheckSourceHasTestsByType,
|
111
|
+
CheckSourceHasTestsByGroup,
|
112
|
+
CheckSourceHasTests,
|
113
|
+
CheckSourceTableHasDescription,
|
114
|
+
CheckSourceTags,
|
115
|
+
]
|