altimate-datapilot-cli 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. altimate_datapilot_cli-0.0.8.dist-info/AUTHORS.rst +5 -0
  2. altimate_datapilot_cli-0.0.8.dist-info/LICENSE +9 -0
  3. altimate_datapilot_cli-0.0.8.dist-info/METADATA +102 -0
  4. altimate_datapilot_cli-0.0.8.dist-info/RECORD +139 -0
  5. altimate_datapilot_cli-0.0.8.dist-info/WHEEL +5 -0
  6. altimate_datapilot_cli-0.0.8.dist-info/entry_points.txt +4 -0
  7. altimate_datapilot_cli-0.0.8.dist-info/top_level.txt +1 -0
  8. datapilot/__init__.py +1 -0
  9. datapilot/__main__.py +14 -0
  10. datapilot/cli/__init__.py +0 -0
  11. datapilot/cli/main.py +11 -0
  12. datapilot/clients/__init__.py +0 -0
  13. datapilot/clients/altimate/__init__.py +0 -0
  14. datapilot/clients/altimate/client.py +85 -0
  15. datapilot/clients/altimate/utils.py +75 -0
  16. datapilot/config/__init__.py +0 -0
  17. datapilot/config/config.py +16 -0
  18. datapilot/config/utils.py +32 -0
  19. datapilot/core/__init__.py +0 -0
  20. datapilot/core/insights/__init__.py +2 -0
  21. datapilot/core/insights/base/__init__.py +0 -0
  22. datapilot/core/insights/base/insight.py +34 -0
  23. datapilot/core/insights/report.py +16 -0
  24. datapilot/core/insights/schema.py +24 -0
  25. datapilot/core/insights/sql/__init__.py +0 -0
  26. datapilot/core/insights/sql/base/__init__.py +0 -0
  27. datapilot/core/insights/sql/base/insight.py +18 -0
  28. datapilot/core/insights/sql/runtime/__init__.py +0 -0
  29. datapilot/core/insights/sql/static/__init__.py +0 -0
  30. datapilot/core/insights/utils.py +20 -0
  31. datapilot/core/platforms/__init__.py +0 -0
  32. datapilot/core/platforms/dbt/__init__.py +0 -0
  33. datapilot/core/platforms/dbt/cli/__init__.py +0 -0
  34. datapilot/core/platforms/dbt/cli/cli.py +112 -0
  35. datapilot/core/platforms/dbt/constants.py +34 -0
  36. datapilot/core/platforms/dbt/exceptions.py +6 -0
  37. datapilot/core/platforms/dbt/executor.py +157 -0
  38. datapilot/core/platforms/dbt/factory.py +22 -0
  39. datapilot/core/platforms/dbt/formatting.py +45 -0
  40. datapilot/core/platforms/dbt/hooks/__init__.py +0 -0
  41. datapilot/core/platforms/dbt/hooks/executor_hook.py +86 -0
  42. datapilot/core/platforms/dbt/insights/__init__.py +115 -0
  43. datapilot/core/platforms/dbt/insights/base.py +133 -0
  44. datapilot/core/platforms/dbt/insights/checks/__init__.py +0 -0
  45. datapilot/core/platforms/dbt/insights/checks/base.py +26 -0
  46. datapilot/core/platforms/dbt/insights/checks/check_column_desc_are_same.py +105 -0
  47. datapilot/core/platforms/dbt/insights/checks/check_column_name_contract.py +154 -0
  48. datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +75 -0
  49. datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +63 -0
  50. datapilot/core/platforms/dbt/insights/checks/check_model_has_all_columns.py +96 -0
  51. datapilot/core/platforms/dbt/insights/checks/check_model_has_labels_keys.py +112 -0
  52. datapilot/core/platforms/dbt/insights/checks/check_model_has_meta_keys.py +108 -0
  53. datapilot/core/platforms/dbt/insights/checks/check_model_has_properties_file.py +64 -0
  54. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +118 -0
  55. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +114 -0
  56. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +119 -0
  57. datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +129 -0
  58. datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +132 -0
  59. datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +135 -0
  60. datapilot/core/platforms/dbt/insights/checks/check_model_parents_database.py +109 -0
  61. datapilot/core/platforms/dbt/insights/checks/check_model_parents_schema.py +109 -0
  62. datapilot/core/platforms/dbt/insights/checks/check_model_tags.py +87 -0
  63. datapilot/core/platforms/dbt/insights/checks/check_source_childs.py +97 -0
  64. datapilot/core/platforms/dbt/insights/checks/check_source_columns_have_desc.py +96 -0
  65. datapilot/core/platforms/dbt/insights/checks/check_source_has_all_columns.py +103 -0
  66. datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +94 -0
  67. datapilot/core/platforms/dbt/insights/checks/check_source_has_labels_keys.py +110 -0
  68. datapilot/core/platforms/dbt/insights/checks/check_source_has_loader.py +62 -0
  69. datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +117 -0
  70. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests.py +82 -0
  71. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +117 -0
  72. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +113 -0
  73. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +119 -0
  74. datapilot/core/platforms/dbt/insights/checks/check_source_table_has_description.py +62 -0
  75. datapilot/core/platforms/dbt/insights/checks/check_source_tags.py +76 -0
  76. datapilot/core/platforms/dbt/insights/dbt_test/__init__.py +0 -0
  77. datapilot/core/platforms/dbt/insights/dbt_test/base.py +23 -0
  78. datapilot/core/platforms/dbt/insights/dbt_test/missing_primary_key_tests.py +130 -0
  79. datapilot/core/platforms/dbt/insights/dbt_test/test_coverage.py +118 -0
  80. datapilot/core/platforms/dbt/insights/governance/__init__.py +0 -0
  81. datapilot/core/platforms/dbt/insights/governance/base.py +23 -0
  82. datapilot/core/platforms/dbt/insights/governance/documentation_on_stale_columns.py +130 -0
  83. datapilot/core/platforms/dbt/insights/governance/exposures_dependent_on_private_models.py +90 -0
  84. datapilot/core/platforms/dbt/insights/governance/public_models_without_contracts.py +89 -0
  85. datapilot/core/platforms/dbt/insights/governance/undocumented_columns.py +148 -0
  86. datapilot/core/platforms/dbt/insights/governance/undocumented_public_models.py +110 -0
  87. datapilot/core/platforms/dbt/insights/modelling/README.md +15 -0
  88. datapilot/core/platforms/dbt/insights/modelling/__init__.py +0 -0
  89. datapilot/core/platforms/dbt/insights/modelling/base.py +31 -0
  90. datapilot/core/platforms/dbt/insights/modelling/direct_join_to_source.py +125 -0
  91. datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +113 -0
  92. datapilot/core/platforms/dbt/insights/modelling/duplicate_sources.py +85 -0
  93. datapilot/core/platforms/dbt/insights/modelling/hard_coded_references.py +80 -0
  94. datapilot/core/platforms/dbt/insights/modelling/joining_of_upstream_concepts.py +79 -0
  95. datapilot/core/platforms/dbt/insights/modelling/model_fanout.py +126 -0
  96. datapilot/core/platforms/dbt/insights/modelling/multiple_sources_joined.py +83 -0
  97. datapilot/core/platforms/dbt/insights/modelling/root_model.py +82 -0
  98. datapilot/core/platforms/dbt/insights/modelling/source_fanout.py +102 -0
  99. datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_downstream_models.py +103 -0
  100. datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_staging_models.py +89 -0
  101. datapilot/core/platforms/dbt/insights/modelling/unused_sources.py +59 -0
  102. datapilot/core/platforms/dbt/insights/performance/__init__.py +0 -0
  103. datapilot/core/platforms/dbt/insights/performance/base.py +26 -0
  104. datapilot/core/platforms/dbt/insights/performance/chain_view_linking.py +92 -0
  105. datapilot/core/platforms/dbt/insights/performance/exposure_parent_materializations.py +104 -0
  106. datapilot/core/platforms/dbt/insights/schema.py +72 -0
  107. datapilot/core/platforms/dbt/insights/structure/__init__.py +0 -0
  108. datapilot/core/platforms/dbt/insights/structure/base.py +33 -0
  109. datapilot/core/platforms/dbt/insights/structure/model_directories_structure.py +92 -0
  110. datapilot/core/platforms/dbt/insights/structure/model_naming_conventions.py +97 -0
  111. datapilot/core/platforms/dbt/insights/structure/source_directories_structure.py +80 -0
  112. datapilot/core/platforms/dbt/insights/structure/test_directory_structure.py +74 -0
  113. datapilot/core/platforms/dbt/insights/utils.py +9 -0
  114. datapilot/core/platforms/dbt/schemas/__init__.py +0 -0
  115. datapilot/core/platforms/dbt/schemas/catalog.py +73 -0
  116. datapilot/core/platforms/dbt/schemas/manifest.py +462 -0
  117. datapilot/core/platforms/dbt/utils.py +525 -0
  118. datapilot/core/platforms/dbt/wrappers/__init__.py +0 -0
  119. datapilot/core/platforms/dbt/wrappers/catalog/__init__.py +0 -0
  120. datapilot/core/platforms/dbt/wrappers/catalog/v1/__init__.py +0 -0
  121. datapilot/core/platforms/dbt/wrappers/catalog/v1/wrapper.py +18 -0
  122. datapilot/core/platforms/dbt/wrappers/catalog/wrapper.py +9 -0
  123. datapilot/core/platforms/dbt/wrappers/manifest/__init__.py +0 -0
  124. datapilot/core/platforms/dbt/wrappers/manifest/v11/__init__.py +0 -0
  125. datapilot/core/platforms/dbt/wrappers/manifest/v11/schemas.py +47 -0
  126. datapilot/core/platforms/dbt/wrappers/manifest/v11/wrapper.py +396 -0
  127. datapilot/core/platforms/dbt/wrappers/manifest/wrapper.py +35 -0
  128. datapilot/core/platforms/dbt/wrappers/run_results/__init__.py +0 -0
  129. datapilot/core/platforms/dbt/wrappers/run_results/run_results.py +39 -0
  130. datapilot/exceptions/__init__.py +0 -0
  131. datapilot/exceptions/exceptions.py +10 -0
  132. datapilot/schemas/__init__.py +0 -0
  133. datapilot/schemas/constants.py +5 -0
  134. datapilot/schemas/nodes.py +19 -0
  135. datapilot/schemas/sql.py +10 -0
  136. datapilot/utils/__init__.py +0 -0
  137. datapilot/utils/formatting/__init__.py +0 -0
  138. datapilot/utils/formatting/utils.py +59 -0
  139. datapilot/utils/utils.py +317 -0
@@ -0,0 +1,20 @@
1
+ from typing import Dict
2
+ from typing import Optional
3
+
4
+ from datapilot.core.insights.schema import Severity
5
+ from datapilot.schemas.constants import CONFIG_METRICS
6
+ from datapilot.schemas.constants import CONFIG_SEVERITY
7
+
8
+
9
+ def get_severity(
10
+ config: Optional[Dict],
11
+ alias: str,
12
+ default_severity: Severity,
13
+ ):
14
+ if config is None:
15
+ return default_severity
16
+
17
+ insights = config.get(CONFIG_METRICS, {})
18
+ metric = insights.get(alias, {})
19
+ severity = metric.get(CONFIG_SEVERITY, default_severity)
20
+ return severity
File without changes
File without changes
File without changes
@@ -0,0 +1,112 @@
1
+ import logging
2
+
3
+ import click
4
+
5
+ from datapilot.clients.altimate.utils import check_token_and_instance
6
+ from datapilot.clients.altimate.utils import onboard_manifest
7
+ from datapilot.clients.altimate.utils import validate_credentials
8
+ from datapilot.config.config import load_config
9
+ from datapilot.core.platforms.dbt.constants import MODEL
10
+ from datapilot.core.platforms.dbt.constants import PROJECT
11
+ from datapilot.core.platforms.dbt.executor import DBTInsightGenerator
12
+ from datapilot.core.platforms.dbt.formatting import generate_model_insights_table
13
+ from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
14
+ from datapilot.core.platforms.dbt.utils import load_catalog
15
+ from datapilot.core.platforms.dbt.utils import load_manifest
16
+ from datapilot.utils.formatting.utils import tabulate_data
17
+
18
+ logging.basicConfig(level=logging.INFO)
19
+
20
+
21
+ # New dbt group
22
+ @click.group()
23
+ def dbt():
24
+ """DBT specific commands."""
25
+
26
+
27
+ @dbt.command("project-health")
28
+ @click.option(
29
+ "--manifest-path",
30
+ required=True,
31
+ help="Path to the DBT manifest file",
32
+ )
33
+ @click.option(
34
+ "--catalog-path",
35
+ required=False,
36
+ help="Path to the DBT catalog file",
37
+ )
38
+ @click.option(
39
+ "--config-path",
40
+ required=False,
41
+ help="Path to the DBT config file",
42
+ )
43
+ @click.option(
44
+ "--select",
45
+ required=False,
46
+ default=None,
47
+ help="Selective model testing. Specify one or more models to run tests on.",
48
+ )
49
+ def project_health(manifest_path, catalog_path, config_path=None, select=None):
50
+ """
51
+ Validate the DBT project's configuration and structure.
52
+ :param manifest_path: Path to the DBT manifest file.
53
+ """
54
+ config = None
55
+ if config_path:
56
+ config = load_config(config_path)
57
+ selected_models = []
58
+ if select:
59
+ selected_models = select.split(" ")
60
+ manifest = load_manifest(manifest_path)
61
+ catalog = load_catalog(catalog_path) if catalog_path else None
62
+ insight_generator = DBTInsightGenerator(manifest=manifest, catalog=catalog, config=config, selected_models=selected_models)
63
+ reports = insight_generator.run()
64
+
65
+ package_insights = reports[PROJECT]
66
+ model_insights = reports[MODEL]
67
+ model_report = generate_model_insights_table(model_insights)
68
+ if len(model_report) > 0:
69
+ click.echo("--" * 50)
70
+ click.echo("Model Insights")
71
+ click.echo("--" * 50)
72
+ for model_id, report in model_report.items():
73
+ click.echo(f"Model: {model_id}")
74
+ click.echo(f"File path: {report['path']}")
75
+ click.echo(tabulate_data(report["table"], headers="keys"))
76
+ click.echo("\n")
77
+
78
+ if len(package_insights) > 0:
79
+ project_report = generate_project_insights_table(package_insights)
80
+ click.echo("--" * 50)
81
+ click.echo("Project Insights")
82
+ click.echo("--" * 50)
83
+ click.echo(tabulate_data(project_report, headers="keys"))
84
+
85
+
86
+ @dbt.command("onboard")
87
+ @click.option("--token", prompt="API Token", help="Your API token for authentication.")
88
+ @click.option("--instance-name", prompt="Instance Name", help="Your tenant ID.")
89
+ @click.option("--dbt_core_integration_id", prompt="DBT Core Integration ID", help="DBT Core Integration ID")
90
+ @click.option("--manifest-path", required=True, prompt="Manifest Path", help="Path to the manifest file.")
91
+ @click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
92
+ def onboard(token, instance_name, dbt_core_integration_id, manifest_path, backend_url="https://api.myaltimate.com", env=None):
93
+ """Onboard a manifest file to DBT."""
94
+ check_token_and_instance(token, instance_name)
95
+
96
+ if not validate_credentials(token, backend_url, instance_name):
97
+ click.echo("Error: Invalid credentials.")
98
+ return
99
+
100
+ # This will throw error if manifest file is incorrect
101
+ try:
102
+ load_manifest(manifest_path)
103
+ except Exception as e:
104
+ click.echo(f"Error: {e}")
105
+ return
106
+
107
+ response = onboard_manifest(token, instance_name, dbt_core_integration_id, manifest_path, backend_url)
108
+
109
+ if response["ok"]:
110
+ click.echo("Manifest onboarded successfully!")
111
+ else:
112
+ click.echo(f"{response['message']}")
@@ -0,0 +1,34 @@
1
+ SEED = "seed"
2
+ MACRO = "macro"
3
+ TEST = "test"
4
+ MODEL = "model"
5
+ SOURCE = "source"
6
+
7
+
8
+ PROJECT = "project"
9
+ SQL = "sql"
10
+
11
+ # Model Types
12
+ MART: str = "mart"
13
+ STAGING = "staging"
14
+ INTERMEDIATE = "intermediate"
15
+ BASE = "base"
16
+ OTHER = "other"
17
+
18
+ # MATERIALIZATION
19
+ TABLE = "table"
20
+ INCREMENTAL = "incremental"
21
+ VIEW = "view"
22
+ EPHEMERAL = "ephemeral"
23
+
24
+
25
+ MATERIALIZED = [TABLE, INCREMENTAL]
26
+ NON_MATERIALIZED = [VIEW, EPHEMERAL]
27
+
28
+
29
+ GENERIC = "generic"
30
+ SINGULAR = "singular"
31
+ OTHER_TEST_NODE = "other_test_node"
32
+
33
+
34
+ FOLDER = "folder"
@@ -0,0 +1,6 @@
1
+ class AltimateInvalidManifestError(Exception):
2
+ pass
3
+
4
+
5
+ class AltimateCLIArgumentError(Exception):
6
+ pass
@@ -0,0 +1,157 @@
1
+ import logging
2
+
3
+ # from src.utils.formatting.utils import generate_model_insights_table
4
+ from typing import Dict
5
+ from typing import List
6
+ from typing import Optional
7
+
8
+ from datapilot.core.platforms.dbt.constants import MODEL
9
+ from datapilot.core.platforms.dbt.constants import PROJECT
10
+ from datapilot.core.platforms.dbt.exceptions import AltimateCLIArgumentError
11
+ from datapilot.core.platforms.dbt.factory import DBTFactory
12
+ from datapilot.core.platforms.dbt.insights import INSIGHTS
13
+ from datapilot.core.platforms.dbt.schemas.manifest import Catalog
14
+ from datapilot.core.platforms.dbt.schemas.manifest import Manifest
15
+ from datapilot.core.platforms.dbt.utils import get_models
16
+ from datapilot.utils.formatting.utils import RED
17
+ from datapilot.utils.formatting.utils import YELLOW
18
+ from datapilot.utils.formatting.utils import color_text
19
+
20
+
21
+ class DBTInsightGenerator:
22
+ def __init__(
23
+ self,
24
+ manifest: Manifest,
25
+ catalog: Optional[Catalog] = None,
26
+ run_results_path: Optional[str] = None,
27
+ env: Optional[str] = None,
28
+ config: Optional[Dict] = None,
29
+ target: str = "dev",
30
+ selected_models: Optional[str] = None,
31
+ selected_model_ids: Optional[List[str]] = None,
32
+ ):
33
+ self.run_results_path = run_results_path
34
+ self.target = target
35
+ self.env = env
36
+ self.config = config or {}
37
+
38
+ self.manifest_wrapper = DBTFactory.get_manifest_wrapper(manifest)
39
+ self.manifest_present = True
40
+ self.catalog_present = False
41
+ self.catalog_wrapper = None
42
+
43
+ if catalog:
44
+ self.catalog_wrapper = DBTFactory.get_catalog_wrapper(catalog)
45
+ self.catalog_present = True
46
+
47
+ self.run_results_present = False
48
+ self.logger = logging.getLogger("dbt-insight-generator")
49
+
50
+ self.nodes = self.manifest_wrapper.get_nodes()
51
+ self.macros = self.manifest_wrapper.get_macros()
52
+ self.sources = self.manifest_wrapper.get_sources()
53
+ self.exposures = self.manifest_wrapper.get_exposures()
54
+ self.seeds = self.manifest_wrapper.get_seeds()
55
+ self.children_map = self.manifest_wrapper.parent_to_child_map(self.nodes)
56
+ self.tests = self.manifest_wrapper.get_tests()
57
+ self.project_name = self.manifest_wrapper.get_package()
58
+ self.selected_models = None
59
+ self.selected_models_flag = False
60
+ entities = {
61
+ "nodes": self.nodes,
62
+ "sources": self.sources,
63
+ "exposures": self.exposures,
64
+ "tests": self.tests,
65
+ }
66
+ if selected_model_ids:
67
+ self.selected_models_flag = True
68
+ self.selected_models = selected_model_ids
69
+ elif selected_models:
70
+ self.selected_models_flag = True
71
+ self.selected_models = get_models(
72
+ selected_models,
73
+ entities=entities,
74
+ )
75
+ if not self.selected_models:
76
+ raise AltimateCLIArgumentError(
77
+ f"Invalid values provided in the --select argument. Could not find models associated with pattern: --select {' '.join(selected_models)}"
78
+ )
79
+ self.excluded_models = None
80
+ self.excluded_models_flag = False
81
+
82
+ def _check_if_skipped(self, insight):
83
+ if self.config.get("disabled_insights", False):
84
+ if insight.ALIAS in self.config.get("disabled_insights", []):
85
+ return True
86
+ return False
87
+
88
+ def run(self):
89
+ reports = {
90
+ MODEL: {},
91
+ PROJECT: [],
92
+ }
93
+ for insight_class in INSIGHTS:
94
+ # TODO: Skip insight based on config
95
+
96
+ run_insight, message = insight_class.has_all_required_data(
97
+ has_manifest=self.manifest_present,
98
+ has_catalog=self.catalog_present,
99
+ has_run_results=self.run_results_present,
100
+ )
101
+
102
+ if run_insight:
103
+ self.logger.info(f"Running insight {insight_class.NAME}")
104
+ insight = insight_class(
105
+ manifest_wrapper=self.manifest_wrapper,
106
+ catalog_wrapper=self.catalog_wrapper,
107
+ nodes=self.nodes,
108
+ macros=self.macros,
109
+ sources=self.sources,
110
+ seeds=self.seeds,
111
+ exposures=self.exposures,
112
+ children_map=self.children_map,
113
+ tests=self.tests,
114
+ project_name=self.project_name,
115
+ config=self.config,
116
+ selected_models=self.selected_models,
117
+ excluded_models=self.excluded_models,
118
+ )
119
+
120
+ if self._check_if_skipped(insight):
121
+ self.logger.info(
122
+ color_text(
123
+ f"Skipping insight {insight_class.NAME} as it is not enabled in config",
124
+ YELLOW,
125
+ )
126
+ )
127
+ continue
128
+ try:
129
+ insights = insight.generate()
130
+ num_insights = len(insights)
131
+ text = f"Found {num_insights} insights for {insight_class.NAME}"
132
+ if num_insights > 0:
133
+ self.logger.info(color_text(text, RED))
134
+ else:
135
+ self.logger.info(f"No insights found for {insight_class.NAME}")
136
+
137
+ for insight in insights:
138
+ # Handle MODEL level insights
139
+ if insight.insight_level == MODEL:
140
+ # Add the insight if the model is selected or if all models are selected
141
+ # if self.selected_models_flag and insight.unique_id in self.selected_models or not self.selected_models_flag:
142
+ reports[MODEL].setdefault(insight.unique_id, []).append(insight)
143
+ # Handle PROJECT level insights, only if all models are selected
144
+ elif insight.insight_level == PROJECT:
145
+ reports[PROJECT].append(insight)
146
+
147
+ except Exception as e:
148
+ self.logger.info(
149
+ color_text(
150
+ f"Error running insight {insight_class.NAME}: {e}. Skipping insight. {message}",
151
+ RED,
152
+ )
153
+ )
154
+ else:
155
+ self.logger.info(color_text(f"Skipping insight {insight_class.NAME} as {message}", YELLOW))
156
+
157
+ return reports
@@ -0,0 +1,22 @@
1
+ from dbt_artifacts_parser.parsers.catalog.catalog_v1 import CatalogV1
2
+ from dbt_artifacts_parser.parsers.manifest.manifest_v11 import ManifestV11
3
+
4
+ from datapilot.core.platforms.dbt.schemas.manifest import Catalog
5
+ from datapilot.core.platforms.dbt.schemas.manifest import Manifest
6
+ from datapilot.core.platforms.dbt.wrappers.catalog.v1.wrapper import CatalogV1Wrapper
7
+ from datapilot.core.platforms.dbt.wrappers.manifest.v11.wrapper import ManifestV11Wrapper
8
+ from datapilot.exceptions.exceptions import AltimateNotSupportedError
9
+
10
+
11
+ class DBTFactory:
12
+ @classmethod
13
+ def get_manifest_wrapper(cls, manifest: Manifest):
14
+ if isinstance(manifest, ManifestV11):
15
+ return ManifestV11Wrapper(manifest)
16
+ raise AltimateNotSupportedError(f"Manifest version {manifest.metadata.dbt_schema_version} not supported")
17
+
18
+ @classmethod
19
+ def get_catalog_wrapper(cls, catalog: Catalog):
20
+ if isinstance(catalog, CatalogV1):
21
+ return CatalogV1Wrapper(catalog)
22
+ raise AltimateNotSupportedError(f"Catalog version {catalog.metadata.dbt_schema_version} not supported")
@@ -0,0 +1,45 @@
1
+ from typing import Dict
2
+ from typing import List
3
+
4
+ from datapilot.core.insights.schema import InsightResult
5
+ from datapilot.core.insights.schema import Severity
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
7
+ from datapilot.core.platforms.dbt.insights.schema import DBTProjectInsightResponse
8
+ from datapilot.utils.formatting.utils import color_based_on_severity
9
+
10
+
11
+ def gen_table(insight: InsightResult, severity: Severity) -> Dict[str, str]:
12
+ return {
13
+ "name": insight.name,
14
+ "type": insight.type,
15
+ "level": color_based_on_severity(severity),
16
+ "message": insight.message,
17
+ "recommendation": insight.recommendation,
18
+ "reason_to_flag": insight.reason_to_flag,
19
+ }
20
+
21
+
22
+ def generate_model_insights_table(model_insights: Dict[str, List[DBTModelInsightResponse]]):
23
+ results = {}
24
+
25
+ for model_id, insights in model_insights.items():
26
+ for insight in insights:
27
+ if model_id not in results:
28
+ results[model_id] = {
29
+ "package_name": insight.package_name,
30
+ "unique_id": insight.unique_id,
31
+ "path": insight.original_file_path,
32
+ "table": [],
33
+ }
34
+
35
+ results[model_id]["table"].append(gen_table(insight.insight, insight.severity))
36
+ return results
37
+
38
+
39
+ def generate_project_insights_table(project_insights: List[DBTProjectInsightResponse]):
40
+ results = []
41
+
42
+ for project_insight in project_insights:
43
+ for insight in project_insight.insights:
44
+ results.append(gen_table(insight, project_insight.severity))
45
+ return results
File without changes
@@ -0,0 +1,86 @@
1
+ import argparse
2
+ import time
3
+ from typing import Optional
4
+ from typing import Sequence
5
+
6
+ from datapilot.config.config import load_config
7
+ from datapilot.core.platforms.dbt.constants import MODEL
8
+ from datapilot.core.platforms.dbt.constants import PROJECT
9
+ from datapilot.core.platforms.dbt.executor import DBTInsightGenerator
10
+ from datapilot.core.platforms.dbt.formatting import generate_model_insights_table
11
+ from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
12
+ from datapilot.utils.formatting.utils import tabulate_data
13
+ from datapilot.utils.utils import generate_partial_manifest_catalog
14
+
15
+
16
+ def main(argv: Optional[Sequence[str]] = None):
17
+ start_time = time.time()
18
+ parser = argparse.ArgumentParser()
19
+ parser.add_argument(
20
+ "--config-path",
21
+ nargs="*",
22
+ help="Path of the config file to be used for the insight generation",
23
+ )
24
+
25
+ parser.add_argument(
26
+ "--base-path",
27
+ nargs="*",
28
+ help="Base path of the dbt project",
29
+ )
30
+
31
+ args = parser.parse_known_args(argv)
32
+ # print(f"args: {args}", file=sys.__stdout__)
33
+ config = {}
34
+ if hasattr(args[0], "config_path") and args[0].config_path:
35
+ # print(f"Using config file: {args[0].config_path[0]}")
36
+ config = load_config(args[0].config_path[0])
37
+
38
+ base_path = "./"
39
+ if hasattr(args[0], "base_path") and args[0].base_path:
40
+ base_path = args[0].base_path[0]
41
+
42
+ changed_files = args[1]
43
+ # print(f"Changed files: {changed_files}")
44
+
45
+ if not changed_files:
46
+ # print("No changed files detected - test. Exiting...")
47
+ return
48
+
49
+ # print(f"Changed files: {changed_files}", file=sys.__stdout__)
50
+ selected_models, manifest, catalog = generate_partial_manifest_catalog(changed_files, base_path=base_path)
51
+ # print("se1ected models", selected_models, file=sys.__stdout__)
52
+ insight_generator = DBTInsightGenerator(
53
+ manifest=manifest,
54
+ catalog=catalog,
55
+ config=config,
56
+ selected_model_ids=selected_models,
57
+ )
58
+ reports = insight_generator.run()
59
+ if reports:
60
+ model_report = generate_model_insights_table(reports[MODEL])
61
+ if len(model_report) > 0:
62
+ print("--" * 50)
63
+ print("Model Insights")
64
+ print("--" * 50)
65
+ for model_id, report in model_report.items():
66
+ print(f"Model: {model_id}")
67
+ print(f"File path: {report['path']}")
68
+ print(tabulate_data(report["table"], headers="keys"))
69
+ print("\n")
70
+
71
+ project_report = generate_project_insights_table(reports[PROJECT])
72
+ if len(project_report) > 0:
73
+ print("--" * 50)
74
+ print("Project Insights")
75
+ print("--" * 50)
76
+ print(tabulate_data(project_report, headers="keys"))
77
+
78
+ exit(1)
79
+
80
+ end_time = time.time()
81
+ total_time = end_time - start_time
82
+ print(f"Total time taken: {round(total_time, 2)} seconds")
83
+
84
+
85
+ if __name__ == "__main__":
86
+ exit(main())
@@ -0,0 +1,115 @@
1
+ from datapilot.core.platforms.dbt.insights.checks.check_column_desc_are_same import CheckColumnDescAreSame
2
+ from datapilot.core.platforms.dbt.insights.checks.check_column_name_contract import CheckColumnNameContract
3
+ from datapilot.core.platforms.dbt.insights.checks.check_macro_args_have_desc import CheckMacroArgsHaveDesc
4
+ from datapilot.core.platforms.dbt.insights.checks.check_macro_has_desc import CheckMacroHasDesc
5
+ from datapilot.core.platforms.dbt.insights.checks.check_model_has_all_columns import CheckModelHasAllColumns
6
+ from datapilot.core.platforms.dbt.insights.checks.check_model_has_meta_keys import CheckModelHasMetaKeys
7
+ from datapilot.core.platforms.dbt.insights.checks.check_model_has_properties_file import CheckModelHasPropertiesFile
8
+ from datapilot.core.platforms.dbt.insights.checks.check_model_has_tests_by_group import CheckModelHasTestsByGroup
9
+ from datapilot.core.platforms.dbt.insights.checks.check_model_has_tests_by_name import CheckModelHasTestsByName
10
+ from datapilot.core.platforms.dbt.insights.checks.check_model_has_tests_by_type import CheckModelHasTestsByType
11
+ from datapilot.core.platforms.dbt.insights.checks.check_model_materialization_by_childs import CheckModelMaterializationByChilds
12
+ from datapilot.core.platforms.dbt.insights.checks.check_model_name_contract import CheckModelNameContract
13
+ from datapilot.core.platforms.dbt.insights.checks.check_model_parents_and_childs import CheckModelParentsAndChilds
14
+ from datapilot.core.platforms.dbt.insights.checks.check_model_parents_database import CheckModelParentsDatabase
15
+ from datapilot.core.platforms.dbt.insights.checks.check_model_parents_schema import CheckModelParentsSchema
16
+ from datapilot.core.platforms.dbt.insights.checks.check_model_tags import CheckModelTags
17
+ from datapilot.core.platforms.dbt.insights.checks.check_source_childs import CheckSourceChilds
18
+ from datapilot.core.platforms.dbt.insights.checks.check_source_columns_have_desc import CheckSourceColumnsHaveDescriptions
19
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_all_columns import CheckSourceHasAllColumns
20
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_freshness import CheckSourceHasFreshness
21
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_loader import CheckSourceHasLoader
22
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_meta_keys import CheckSourceHasMetaKeys
23
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests import CheckSourceHasTests
24
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests_by_group import CheckSourceHasTestsByGroup
25
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests_by_name import CheckSourceHasTestsByName
26
+ from datapilot.core.platforms.dbt.insights.checks.check_source_has_tests_by_type import CheckSourceHasTestsByType
27
+ from datapilot.core.platforms.dbt.insights.checks.check_source_table_has_description import CheckSourceTableHasDescription
28
+ from datapilot.core.platforms.dbt.insights.checks.check_source_tags import CheckSourceTags
29
+ from datapilot.core.platforms.dbt.insights.dbt_test.missing_primary_key_tests import MissingPrimaryKeyTests
30
+ from datapilot.core.platforms.dbt.insights.dbt_test.test_coverage import DBTTestCoverage
31
+ from datapilot.core.platforms.dbt.insights.governance.documentation_on_stale_columns import DBTDocumentationStaleColumns
32
+ from datapilot.core.platforms.dbt.insights.governance.exposures_dependent_on_private_models import DBTExposureDependentOnPrivateModels
33
+ from datapilot.core.platforms.dbt.insights.governance.public_models_without_contracts import DBTPublicModelWithoutContracts
34
+ from datapilot.core.platforms.dbt.insights.governance.undocumented_columns import DBTMissingDocumentation
35
+ from datapilot.core.platforms.dbt.insights.governance.undocumented_public_models import DBTUndocumentedPublicModels
36
+ from datapilot.core.platforms.dbt.insights.modelling.direct_join_to_source import DBTDirectJoinSource
37
+ from datapilot.core.platforms.dbt.insights.modelling.downstream_models_dependent_on_source import DBTDownstreamModelsDependentOnSource
38
+ from datapilot.core.platforms.dbt.insights.modelling.duplicate_sources import DBTDuplicateSources
39
+ from datapilot.core.platforms.dbt.insights.modelling.hard_coded_references import DBTHardCodedReferences
40
+ from datapilot.core.platforms.dbt.insights.modelling.joining_of_upstream_concepts import DBTRejoiningOfUpstreamConcepts
41
+ from datapilot.core.platforms.dbt.insights.modelling.model_fanout import DBTModelFanout
42
+ from datapilot.core.platforms.dbt.insights.modelling.multiple_sources_joined import DBTModelsMultipleSourcesJoined
43
+ from datapilot.core.platforms.dbt.insights.modelling.root_model import DBTRootModel
44
+ from datapilot.core.platforms.dbt.insights.modelling.source_fanout import DBTSourceFanout
45
+ from datapilot.core.platforms.dbt.insights.modelling.staging_model_dependent_on_downstream_models import (
46
+ DBTStagingModelsDependentOnDownstreamModels,
47
+ )
48
+ from datapilot.core.platforms.dbt.insights.modelling.staging_model_dependent_on_staging_models import (
49
+ DBTStagingModelsDependentOnStagingModels,
50
+ )
51
+ from datapilot.core.platforms.dbt.insights.modelling.unused_sources import DBTUnusedSources
52
+ from datapilot.core.platforms.dbt.insights.performance.chain_view_linking import DBTChainViewLinking
53
+ from datapilot.core.platforms.dbt.insights.performance.exposure_parent_materializations import DBTExposureParentMaterialization
54
+ from datapilot.core.platforms.dbt.insights.structure.model_directories_structure import DBTModelDirectoryStructure
55
+ from datapilot.core.platforms.dbt.insights.structure.model_naming_conventions import DBTModelNamingConvention
56
+ from datapilot.core.platforms.dbt.insights.structure.source_directories_structure import DBTSourceDirectoryStructure
57
+ from datapilot.core.platforms.dbt.insights.structure.test_directory_structure import DBTTestDirectoryStructure
58
+
59
+ INSIGHTS = [
60
+ DBTDirectJoinSource,
61
+ DBTDownstreamModelsDependentOnSource,
62
+ DBTDuplicateSources,
63
+ DBTModelFanout,
64
+ DBTRootModel,
65
+ DBTSourceFanout,
66
+ DBTStagingModelsDependentOnDownstreamModels,
67
+ DBTStagingModelsDependentOnStagingModels,
68
+ DBTUnusedSources,
69
+ DBTModelsMultipleSourcesJoined,
70
+ DBTHardCodedReferences,
71
+ DBTRejoiningOfUpstreamConcepts,
72
+ DBTExposureDependentOnPrivateModels,
73
+ DBTUndocumentedPublicModels,
74
+ DBTPublicModelWithoutContracts,
75
+ DBTChainViewLinking,
76
+ DBTExposureParentMaterialization,
77
+ DBTMissingDocumentation,
78
+ DBTDocumentationStaleColumns,
79
+ MissingPrimaryKeyTests,
80
+ DBTTestCoverage,
81
+ DBTModelDirectoryStructure,
82
+ DBTModelNamingConvention,
83
+ DBTSourceDirectoryStructure,
84
+ DBTTestDirectoryStructure,
85
+ CheckColumnDescAreSame,
86
+ CheckColumnNameContract,
87
+ CheckMacroArgsHaveDesc,
88
+ CheckMacroHasDesc,
89
+ CheckModelHasAllColumns,
90
+ # CheckModelHasLabelsKeys,
91
+ CheckModelHasMetaKeys,
92
+ CheckModelHasPropertiesFile,
93
+ CheckModelHasTestsByName,
94
+ CheckModelHasTestsByType,
95
+ CheckModelHasTestsByGroup,
96
+ CheckModelMaterializationByChilds,
97
+ CheckModelNameContract,
98
+ CheckModelParentsAndChilds,
99
+ CheckModelParentsDatabase,
100
+ CheckModelParentsSchema,
101
+ CheckModelTags,
102
+ CheckSourceChilds,
103
+ CheckSourceColumnsHaveDescriptions,
104
+ CheckSourceHasAllColumns,
105
+ CheckSourceHasFreshness,
106
+ # CheckSourceHasLabelsKeys,
107
+ CheckSourceHasLoader,
108
+ CheckSourceHasMetaKeys,
109
+ CheckSourceHasTestsByName,
110
+ CheckSourceHasTestsByType,
111
+ CheckSourceHasTestsByGroup,
112
+ CheckSourceHasTests,
113
+ CheckSourceTableHasDescription,
114
+ CheckSourceTags,
115
+ ]