altimate-datapilot-cli 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- altimate_datapilot_cli-0.0.8.dist-info/AUTHORS.rst +5 -0
- altimate_datapilot_cli-0.0.8.dist-info/LICENSE +9 -0
- altimate_datapilot_cli-0.0.8.dist-info/METADATA +102 -0
- altimate_datapilot_cli-0.0.8.dist-info/RECORD +139 -0
- altimate_datapilot_cli-0.0.8.dist-info/WHEEL +5 -0
- altimate_datapilot_cli-0.0.8.dist-info/entry_points.txt +4 -0
- altimate_datapilot_cli-0.0.8.dist-info/top_level.txt +1 -0
- datapilot/__init__.py +1 -0
- datapilot/__main__.py +14 -0
- datapilot/cli/__init__.py +0 -0
- datapilot/cli/main.py +11 -0
- datapilot/clients/__init__.py +0 -0
- datapilot/clients/altimate/__init__.py +0 -0
- datapilot/clients/altimate/client.py +85 -0
- datapilot/clients/altimate/utils.py +75 -0
- datapilot/config/__init__.py +0 -0
- datapilot/config/config.py +16 -0
- datapilot/config/utils.py +32 -0
- datapilot/core/__init__.py +0 -0
- datapilot/core/insights/__init__.py +2 -0
- datapilot/core/insights/base/__init__.py +0 -0
- datapilot/core/insights/base/insight.py +34 -0
- datapilot/core/insights/report.py +16 -0
- datapilot/core/insights/schema.py +24 -0
- datapilot/core/insights/sql/__init__.py +0 -0
- datapilot/core/insights/sql/base/__init__.py +0 -0
- datapilot/core/insights/sql/base/insight.py +18 -0
- datapilot/core/insights/sql/runtime/__init__.py +0 -0
- datapilot/core/insights/sql/static/__init__.py +0 -0
- datapilot/core/insights/utils.py +20 -0
- datapilot/core/platforms/__init__.py +0 -0
- datapilot/core/platforms/dbt/__init__.py +0 -0
- datapilot/core/platforms/dbt/cli/__init__.py +0 -0
- datapilot/core/platforms/dbt/cli/cli.py +112 -0
- datapilot/core/platforms/dbt/constants.py +34 -0
- datapilot/core/platforms/dbt/exceptions.py +6 -0
- datapilot/core/platforms/dbt/executor.py +157 -0
- datapilot/core/platforms/dbt/factory.py +22 -0
- datapilot/core/platforms/dbt/formatting.py +45 -0
- datapilot/core/platforms/dbt/hooks/__init__.py +0 -0
- datapilot/core/platforms/dbt/hooks/executor_hook.py +86 -0
- datapilot/core/platforms/dbt/insights/__init__.py +115 -0
- datapilot/core/platforms/dbt/insights/base.py +133 -0
- datapilot/core/platforms/dbt/insights/checks/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/checks/base.py +26 -0
- datapilot/core/platforms/dbt/insights/checks/check_column_desc_are_same.py +105 -0
- datapilot/core/platforms/dbt/insights/checks/check_column_name_contract.py +154 -0
- datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +75 -0
- datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +63 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_all_columns.py +96 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_labels_keys.py +112 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_meta_keys.py +108 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_properties_file.py +64 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +118 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +114 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +119 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +129 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +132 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +135 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_database.py +109 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_parents_schema.py +109 -0
- datapilot/core/platforms/dbt/insights/checks/check_model_tags.py +87 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_childs.py +97 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_columns_have_desc.py +96 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_all_columns.py +103 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +94 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_labels_keys.py +110 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_loader.py +62 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +117 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests.py +82 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +117 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +113 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +119 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_table_has_description.py +62 -0
- datapilot/core/platforms/dbt/insights/checks/check_source_tags.py +76 -0
- datapilot/core/platforms/dbt/insights/dbt_test/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/dbt_test/base.py +23 -0
- datapilot/core/platforms/dbt/insights/dbt_test/missing_primary_key_tests.py +130 -0
- datapilot/core/platforms/dbt/insights/dbt_test/test_coverage.py +118 -0
- datapilot/core/platforms/dbt/insights/governance/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/governance/base.py +23 -0
- datapilot/core/platforms/dbt/insights/governance/documentation_on_stale_columns.py +130 -0
- datapilot/core/platforms/dbt/insights/governance/exposures_dependent_on_private_models.py +90 -0
- datapilot/core/platforms/dbt/insights/governance/public_models_without_contracts.py +89 -0
- datapilot/core/platforms/dbt/insights/governance/undocumented_columns.py +148 -0
- datapilot/core/platforms/dbt/insights/governance/undocumented_public_models.py +110 -0
- datapilot/core/platforms/dbt/insights/modelling/README.md +15 -0
- datapilot/core/platforms/dbt/insights/modelling/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/modelling/base.py +31 -0
- datapilot/core/platforms/dbt/insights/modelling/direct_join_to_source.py +125 -0
- datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +113 -0
- datapilot/core/platforms/dbt/insights/modelling/duplicate_sources.py +85 -0
- datapilot/core/platforms/dbt/insights/modelling/hard_coded_references.py +80 -0
- datapilot/core/platforms/dbt/insights/modelling/joining_of_upstream_concepts.py +79 -0
- datapilot/core/platforms/dbt/insights/modelling/model_fanout.py +126 -0
- datapilot/core/platforms/dbt/insights/modelling/multiple_sources_joined.py +83 -0
- datapilot/core/platforms/dbt/insights/modelling/root_model.py +82 -0
- datapilot/core/platforms/dbt/insights/modelling/source_fanout.py +102 -0
- datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_downstream_models.py +103 -0
- datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_staging_models.py +89 -0
- datapilot/core/platforms/dbt/insights/modelling/unused_sources.py +59 -0
- datapilot/core/platforms/dbt/insights/performance/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/performance/base.py +26 -0
- datapilot/core/platforms/dbt/insights/performance/chain_view_linking.py +92 -0
- datapilot/core/platforms/dbt/insights/performance/exposure_parent_materializations.py +104 -0
- datapilot/core/platforms/dbt/insights/schema.py +72 -0
- datapilot/core/platforms/dbt/insights/structure/__init__.py +0 -0
- datapilot/core/platforms/dbt/insights/structure/base.py +33 -0
- datapilot/core/platforms/dbt/insights/structure/model_directories_structure.py +92 -0
- datapilot/core/platforms/dbt/insights/structure/model_naming_conventions.py +97 -0
- datapilot/core/platforms/dbt/insights/structure/source_directories_structure.py +80 -0
- datapilot/core/platforms/dbt/insights/structure/test_directory_structure.py +74 -0
- datapilot/core/platforms/dbt/insights/utils.py +9 -0
- datapilot/core/platforms/dbt/schemas/__init__.py +0 -0
- datapilot/core/platforms/dbt/schemas/catalog.py +73 -0
- datapilot/core/platforms/dbt/schemas/manifest.py +462 -0
- datapilot/core/platforms/dbt/utils.py +525 -0
- datapilot/core/platforms/dbt/wrappers/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/v1/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/catalog/v1/wrapper.py +18 -0
- datapilot/core/platforms/dbt/wrappers/catalog/wrapper.py +9 -0
- datapilot/core/platforms/dbt/wrappers/manifest/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/schemas.py +47 -0
- datapilot/core/platforms/dbt/wrappers/manifest/v11/wrapper.py +396 -0
- datapilot/core/platforms/dbt/wrappers/manifest/wrapper.py +35 -0
- datapilot/core/platforms/dbt/wrappers/run_results/__init__.py +0 -0
- datapilot/core/platforms/dbt/wrappers/run_results/run_results.py +39 -0
- datapilot/exceptions/__init__.py +0 -0
- datapilot/exceptions/exceptions.py +10 -0
- datapilot/schemas/__init__.py +0 -0
- datapilot/schemas/constants.py +5 -0
- datapilot/schemas/nodes.py +19 -0
- datapilot/schemas/sql.py +10 -0
- datapilot/utils/__init__.py +0 -0
- datapilot/utils/formatting/__init__.py +0 -0
- datapilot/utils/formatting/utils.py +59 -0
- datapilot/utils/utils.py +317 -0
@@ -0,0 +1,117 @@
|
|
1
|
+
from typing import List
|
2
|
+
from typing import Set
|
3
|
+
|
4
|
+
from datapilot.config.utils import get_insight_configuration
|
5
|
+
from datapilot.core.insights.utils import get_severity
|
6
|
+
from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
|
7
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
|
8
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
|
9
|
+
from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
|
10
|
+
from datapilot.utils.formatting.utils import numbered_list
|
11
|
+
|
12
|
+
|
13
|
+
class CheckSourceHasMetaKeys(ChecksInsight):
|
14
|
+
NAME = "Source has required metadata keys"
|
15
|
+
ALIAS = "check_source_has_meta_keys"
|
16
|
+
DESCRIPTION = "Check if the source has required metadata keys"
|
17
|
+
REASON_TO_FLAG = "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. It's important to ensure that the source includes all the required meta keys as per the configuration."
|
18
|
+
META_KEYS_STR = "meta_keys"
|
19
|
+
ALLOW_EXTRA_KEYS_STR = "allow_extra_keys"
|
20
|
+
|
21
|
+
def _build_failure_result(
|
22
|
+
self,
|
23
|
+
source_id: int,
|
24
|
+
missing: Set[str],
|
25
|
+
extra: Set[str],
|
26
|
+
) -> DBTInsightResult:
|
27
|
+
"""
|
28
|
+
Build failure result for the insight if a model's parent schema is not whitelist or in blacklist.
|
29
|
+
"""
|
30
|
+
failure_message = ""
|
31
|
+
if missing:
|
32
|
+
failure_message += f"The source:{source_id} does not have the following meta keys defined: {numbered_list(missing)}\n"
|
33
|
+
if extra:
|
34
|
+
failure_message += f"The source:{source_id} has the following extra meta keys defined: {numbered_list(extra)}\n"
|
35
|
+
|
36
|
+
recommendation = "Define the meta keys for the source to ensure consistency in analysis."
|
37
|
+
|
38
|
+
return DBTInsightResult(
|
39
|
+
type=self.TYPE,
|
40
|
+
name=self.NAME,
|
41
|
+
message=failure_message,
|
42
|
+
recommendation=recommendation,
|
43
|
+
reason_to_flag=self.REASON_TO_FLAG,
|
44
|
+
metadata={"source_id": source_id},
|
45
|
+
)
|
46
|
+
|
47
|
+
def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
|
48
|
+
"""
|
49
|
+
Generate the insight response for the check. This method is called by the insight runner to generate the insight
|
50
|
+
response for the check.
|
51
|
+
Ensures that the source has a list of valid meta keys.
|
52
|
+
meta_keys are provided in the configuration file.
|
53
|
+
"""
|
54
|
+
insights = []
|
55
|
+
self.insight_config = get_insight_configuration(self.config)
|
56
|
+
self.meta_keys = self.get_check_config(self.META_KEYS_STR) or []
|
57
|
+
self.allow_extra_keys = self.get_check_config(self.ALLOW_EXTRA_KEYS_STR)
|
58
|
+
if not self.meta_keys and not self.allow_extra_keys:
|
59
|
+
self.logger.error(f"Meta keys are not provided in the configuration file for the insight: {self.ALIAS}")
|
60
|
+
return insights
|
61
|
+
|
62
|
+
for node_id, node in self.sources.items():
|
63
|
+
if self.should_skip_model(node_id):
|
64
|
+
self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
|
65
|
+
continue
|
66
|
+
if node.resource_type == AltimateResourceType.source:
|
67
|
+
status_code, missing, extra = self._check_source_has_meta_keys(node_id)
|
68
|
+
if status_code:
|
69
|
+
insights.append(
|
70
|
+
DBTModelInsightResponse(
|
71
|
+
unique_id=node_id,
|
72
|
+
package_name=node.package_name,
|
73
|
+
original_file_path=node.original_file_path,
|
74
|
+
path=node.original_file_path,
|
75
|
+
insight=self._build_failure_result(node_id, missing, extra),
|
76
|
+
severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
|
77
|
+
)
|
78
|
+
)
|
79
|
+
return insights
|
80
|
+
|
81
|
+
def _check_source_has_meta_keys(self, source_unique_id: str):
|
82
|
+
status_code = 0
|
83
|
+
model = self.get_node(source_unique_id)
|
84
|
+
meta = model.meta.dict() if model.meta else {}
|
85
|
+
model_meta_keys = set(meta.keys())
|
86
|
+
missing_keys = None
|
87
|
+
extra_keys = None
|
88
|
+
if model.meta:
|
89
|
+
missing_keys = model_meta_keys - set(model.meta.keys())
|
90
|
+
if missing_keys:
|
91
|
+
status_code = 1
|
92
|
+
if not self.allow_extra_keys:
|
93
|
+
extra_keys = set(model.meta.keys()) - model_meta_keys
|
94
|
+
return status_code, missing_keys, extra_keys
|
95
|
+
|
96
|
+
@classmethod
|
97
|
+
def get_config_schema(cls):
|
98
|
+
config_schema = super().get_config_schema()
|
99
|
+
config_schema["config"] = {
|
100
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
101
|
+
"type": "object",
|
102
|
+
"properties": {
|
103
|
+
cls.META_KEYS_STR: {
|
104
|
+
"type": "array",
|
105
|
+
"items": {
|
106
|
+
"type": "string",
|
107
|
+
},
|
108
|
+
"description": "A list of metadata keys that should be present in the sources properties.",
|
109
|
+
},
|
110
|
+
cls.ALLOW_EXTRA_KEYS_STR: {
|
111
|
+
"type": "boolean",
|
112
|
+
"default": False,
|
113
|
+
},
|
114
|
+
},
|
115
|
+
"required": [cls.META_KEYS_STR],
|
116
|
+
}
|
117
|
+
return config_schema
|
@@ -0,0 +1,82 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from datapilot.core.insights.utils import get_severity
|
4
|
+
from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
|
5
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
|
6
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
|
7
|
+
from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
|
8
|
+
|
9
|
+
|
10
|
+
class CheckSourceHasTests(ChecksInsight):
|
11
|
+
NAME = "Source has tests"
|
12
|
+
ALIAS = "check_source_has_tests"
|
13
|
+
DESCRIPTION = "Check if the source has tests"
|
14
|
+
REASON_TO_FLAG = "The source table is missing tests. Ensure that the source table has tests."
|
15
|
+
TESTS_STR = "tests"
|
16
|
+
|
17
|
+
def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
|
18
|
+
insights = []
|
19
|
+
source_threshold = self.get_check_config(self.TESTS_STR) or 1
|
20
|
+
for node_id, node in self.sources.items():
|
21
|
+
if self.should_skip_model(node_id):
|
22
|
+
self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
|
23
|
+
continue
|
24
|
+
if node.resource_type == AltimateResourceType.source:
|
25
|
+
source_test_count = self.get_source_test_count(node_id)
|
26
|
+
if source_test_count < source_threshold:
|
27
|
+
insights.append(
|
28
|
+
DBTModelInsightResponse(
|
29
|
+
unique_id=node_id,
|
30
|
+
package_name=node.package_name,
|
31
|
+
path=node.original_file_path,
|
32
|
+
original_file_path=node.original_file_path,
|
33
|
+
insight=self._build_failure_result(node_id, source_test_count, source_threshold),
|
34
|
+
severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
|
35
|
+
)
|
36
|
+
)
|
37
|
+
return insights
|
38
|
+
|
39
|
+
def _build_failure_result(self, source_unique_id: str, source_test_count: int, source_test_count_threshold: int) -> DBTInsightResult:
|
40
|
+
failure_message = (
|
41
|
+
"The following sources do not have enough tests. Ensure that each source has at least {source_test_count_threshold} tests."
|
42
|
+
)
|
43
|
+
recommendation = "Add tests for each source listed above. Having tests ensures proper validation and data integrity."
|
44
|
+
|
45
|
+
return DBTInsightResult(
|
46
|
+
type=self.TYPE,
|
47
|
+
name=self.NAME,
|
48
|
+
message=failure_message.format(
|
49
|
+
source_test_count_threshold=source_test_count_threshold,
|
50
|
+
),
|
51
|
+
recommendation=recommendation,
|
52
|
+
reason_to_flag=self.REASON_TO_FLAG,
|
53
|
+
metadata={"source_test_count": source_test_count, "source_unique_id": source_unique_id},
|
54
|
+
)
|
55
|
+
|
56
|
+
def get_source_test_count(self, node_id: str) -> int:
|
57
|
+
"""
|
58
|
+
Getting test count of sources by checking child nodes of sources that have type test.
|
59
|
+
"""
|
60
|
+
count = 0
|
61
|
+
for child_id in self.children_map.get(node_id, []):
|
62
|
+
child = self.get_node(child_id)
|
63
|
+
if child.resource_type == AltimateResourceType.test:
|
64
|
+
count += 1
|
65
|
+
return count
|
66
|
+
|
67
|
+
@classmethod
|
68
|
+
def get_config_schema(cls):
|
69
|
+
config_schema = super().get_config_schema()
|
70
|
+
config_schema["config"] = {
|
71
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
72
|
+
"type": "object",
|
73
|
+
"properties": {
|
74
|
+
cls.TESTS_STR: {
|
75
|
+
"type": "integer",
|
76
|
+
"description": "Minimum number of tests required for each source",
|
77
|
+
"default": 0,
|
78
|
+
},
|
79
|
+
},
|
80
|
+
"required": [cls.TESTS_STR],
|
81
|
+
}
|
82
|
+
return config_schema
|
@@ -0,0 +1,117 @@
|
|
1
|
+
from typing import Dict
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from datapilot.core.insights.utils import get_severity
|
5
|
+
from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
|
6
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
|
7
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
|
8
|
+
from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
|
9
|
+
|
10
|
+
|
11
|
+
class CheckSourceHasTestsByGroup(ChecksInsight):
|
12
|
+
NAME = "Source has tests by group"
|
13
|
+
ALIAS = "check_source_has_tests_by_group"
|
14
|
+
DESCRIPTION = "Check if sources have a number of tests for specific test groups."
|
15
|
+
REASON_TO_FLAG = "Sources should have tests with specific groups for proper validation."
|
16
|
+
TESTS_LIST_STR = "tests"
|
17
|
+
TEST_GROUP_STR = "test_group"
|
18
|
+
TEST_COUNT_STR = "min_count"
|
19
|
+
|
20
|
+
def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
|
21
|
+
insights = []
|
22
|
+
self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
|
23
|
+
self.test_groups = {
|
24
|
+
tuple(test.get(self.TEST_GROUP_STR, [])): test.get(self.TEST_COUNT_STR, 0)
|
25
|
+
for test in self.test_list
|
26
|
+
if test.get(self.TEST_GROUP_STR)
|
27
|
+
}
|
28
|
+
for node_id, node in self.sources.items():
|
29
|
+
if self.should_skip_model(node_id):
|
30
|
+
self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
|
31
|
+
continue
|
32
|
+
if node.resource_type == AltimateResourceType.source:
|
33
|
+
missing_test_groups = self._source_has_tests_by_group(node_id)
|
34
|
+
|
35
|
+
if missing_test_groups:
|
36
|
+
insights.append(
|
37
|
+
DBTModelInsightResponse(
|
38
|
+
unique_id=node_id,
|
39
|
+
package_name=node.package_name,
|
40
|
+
path=node.original_file_path,
|
41
|
+
original_file_path=node.original_file_path,
|
42
|
+
insight=self._build_failure_result(node_id, missing_test_groups),
|
43
|
+
severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
|
44
|
+
)
|
45
|
+
)
|
46
|
+
return insights
|
47
|
+
|
48
|
+
def _build_failure_result(self, source_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult:
|
49
|
+
missing_test_group_str = ""
|
50
|
+
for test in missing_test_groups:
|
51
|
+
missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
|
52
|
+
|
53
|
+
failure_message = (
|
54
|
+
f"The source `{source_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. "
|
55
|
+
)
|
56
|
+
recommendation = (
|
57
|
+
"Add tests with the specified groups for each source listed above. "
|
58
|
+
"Having tests with specific groups ensures proper validation and data integrity."
|
59
|
+
)
|
60
|
+
|
61
|
+
return DBTInsightResult(
|
62
|
+
type=self.TYPE,
|
63
|
+
name=self.NAME,
|
64
|
+
message=failure_message,
|
65
|
+
recommendation=recommendation,
|
66
|
+
reason_to_flag=self.REASON_TO_FLAG,
|
67
|
+
metadata={"source_unique_id": source_unique_id, "missing_test_groups": missing_test_groups},
|
68
|
+
)
|
69
|
+
|
70
|
+
def _source_has_tests_by_group(self, node_id) -> List[Dict]:
|
71
|
+
"""
|
72
|
+
For model, check all dependencies and if node type is test, check if it has the required groups.
|
73
|
+
Only return true if all child.group in test_groups
|
74
|
+
"""
|
75
|
+
test_group_count = {}
|
76
|
+
for child_id in self.children_map.get(node_id, []):
|
77
|
+
child = self.get_node(child_id)
|
78
|
+
if child.resource_type == AltimateResourceType.test:
|
79
|
+
for group in self.test_groups:
|
80
|
+
if child.name in group:
|
81
|
+
test_group_count[group] = test_group_count.get(group, 0) + 1
|
82
|
+
missing_test_groups = []
|
83
|
+
for group, count in self.test_groups.items():
|
84
|
+
if test_group_count.get(group, 0) < count:
|
85
|
+
missing_test_groups.append(
|
86
|
+
{self.TEST_GROUP_STR: group, self.TEST_COUNT_STR: count, "actual_count": test_group_count.get(group, 0)}
|
87
|
+
)
|
88
|
+
|
89
|
+
return missing_test_groups
|
90
|
+
|
91
|
+
@classmethod
|
92
|
+
def get_config_schema(cls):
|
93
|
+
config_schema = super().get_config_schema()
|
94
|
+
config_schema["config"] = {
|
95
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
96
|
+
"type": "object",
|
97
|
+
"properties": {
|
98
|
+
cls.TESTS_LIST_STR: {
|
99
|
+
"type": "array",
|
100
|
+
"items": {
|
101
|
+
"type": "object",
|
102
|
+
"properties": {
|
103
|
+
cls.TEST_GROUP_STR: {
|
104
|
+
"type": "array",
|
105
|
+
"items": {"type": "string"},
|
106
|
+
"description": "List of tests part of a group. If a test is part of any of the groups, it will be counted.",
|
107
|
+
},
|
108
|
+
cls.TESTS_LIST_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
|
109
|
+
"required": [cls.TEST_GROUP_STR, cls.TEST_COUNT_STR],
|
110
|
+
},
|
111
|
+
},
|
112
|
+
"description": "A list of tests with names and minimum counts required.",
|
113
|
+
"default": [],
|
114
|
+
},
|
115
|
+
},
|
116
|
+
"required": [cls.TESTS_LIST_STR],
|
117
|
+
}
|
@@ -0,0 +1,113 @@
|
|
1
|
+
from typing import Dict
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from datapilot.core.insights.utils import get_severity
|
5
|
+
from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
|
6
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
|
7
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
|
8
|
+
from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
|
9
|
+
|
10
|
+
|
11
|
+
class CheckSourceHasTestsByName(ChecksInsight):
|
12
|
+
NAME = "Source has tests by name"
|
13
|
+
ALIAS = "check_source_has_tests_by_name"
|
14
|
+
DESCRIPTION = "Checks that the source has tests with specific names."
|
15
|
+
REASON_TO_FLAG = "Sources should have tests with specific names for proper validation."
|
16
|
+
TESTS_LIST_STR = "tests"
|
17
|
+
TEST_NAME_STR = "test"
|
18
|
+
TEST_COUNT_STR = "min_count"
|
19
|
+
|
20
|
+
def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
|
21
|
+
self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
|
22
|
+
self.tests = {
|
23
|
+
test.get(self.TEST_NAME_STR): test.get(self.TEST_COUNT_STR, 0) for test in self.test_list if test.get(self.TEST_NAME_STR)
|
24
|
+
}
|
25
|
+
if not self.tests:
|
26
|
+
self.logger.warning(f"No tests found in the configuration for {self.ALIAS}. Skipping the insight.")
|
27
|
+
return []
|
28
|
+
insights = []
|
29
|
+
for node_id, node in self.sources.items():
|
30
|
+
if self.should_skip_model(node_id):
|
31
|
+
self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
|
32
|
+
continue
|
33
|
+
if node.resource_type == AltimateResourceType.source:
|
34
|
+
missing_tests = self._source_has_tests_by_name(node_id)
|
35
|
+
if missing_tests:
|
36
|
+
insights.append(
|
37
|
+
DBTModelInsightResponse(
|
38
|
+
unique_id=node_id,
|
39
|
+
package_name=node.package_name,
|
40
|
+
path=node.original_file_path,
|
41
|
+
original_file_path=node.original_file_path,
|
42
|
+
insight=self._build_failure_result(node_id),
|
43
|
+
severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
|
44
|
+
)
|
45
|
+
)
|
46
|
+
return insights
|
47
|
+
|
48
|
+
def _build_failure_result(self, source_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
|
49
|
+
tests_str = ""
|
50
|
+
for test in missing_tests:
|
51
|
+
tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
|
52
|
+
|
53
|
+
failure_message = f"The source `{source_unique_id}` does not have enough tests:\n{tests_str}. "
|
54
|
+
recommendation = (
|
55
|
+
"Add tests with the specified names for each source listed above. "
|
56
|
+
"Having tests with specific names ensures proper validation and data integrity."
|
57
|
+
)
|
58
|
+
|
59
|
+
return DBTInsightResult(
|
60
|
+
type=self.TYPE,
|
61
|
+
name=self.NAME,
|
62
|
+
message=failure_message,
|
63
|
+
recommendation=recommendation,
|
64
|
+
reason_to_flag=self.REASON_TO_FLAG,
|
65
|
+
metadata={"source_unique_id": source_unique_id},
|
66
|
+
)
|
67
|
+
|
68
|
+
def _source_has_tests_by_name(self, node_id) -> bool:
|
69
|
+
"""
|
70
|
+
For model, check all dependencies and if node type is test, check if it has the required names.
|
71
|
+
Only return true if all child.name in test_names
|
72
|
+
"""
|
73
|
+
test_count = {}
|
74
|
+
|
75
|
+
for child_id in self.children_map.get(node_id, []):
|
76
|
+
child = self.get_node(child_id)
|
77
|
+
if child.resource_type == AltimateResourceType.test:
|
78
|
+
test_name = child.name
|
79
|
+
test_count[test_name] = test_count.get(test_name, 0) + 1
|
80
|
+
|
81
|
+
missing_tests = []
|
82
|
+
for test_name, min_count in self.tests.items():
|
83
|
+
if test_count.get(test_name, 0) < min_count:
|
84
|
+
missing_tests.append({"test_name": test_name, "min_count": min_count, "actual_count": test_count.get(test_name, 0)})
|
85
|
+
|
86
|
+
if missing_tests:
|
87
|
+
return False, missing_tests
|
88
|
+
|
89
|
+
return True, None
|
90
|
+
|
91
|
+
@classmethod
|
92
|
+
def get_config_schema(cls):
|
93
|
+
config_schema = super().get_config_schema()
|
94
|
+
config_schema["config"] = {
|
95
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
96
|
+
"type": "object",
|
97
|
+
"properties": {
|
98
|
+
cls.TESTS_LIST_STR: {
|
99
|
+
"type": "array",
|
100
|
+
"items": {
|
101
|
+
"type": "object",
|
102
|
+
"properties": {
|
103
|
+
cls.TEST_NAME_STR: {"type": "string", "description": "The name of the test"},
|
104
|
+
cls.TESTS_LIST_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
|
105
|
+
"required": [cls.TEST_NAME_STR, cls.TEST_COUNT_STR],
|
106
|
+
},
|
107
|
+
},
|
108
|
+
"description": "A list of tests with names and minimum counts required.",
|
109
|
+
"default": [],
|
110
|
+
},
|
111
|
+
},
|
112
|
+
"required": [cls.TESTS_LIST_STR],
|
113
|
+
}
|
@@ -0,0 +1,119 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from datapilot.core.insights.utils import get_severity
|
4
|
+
from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
|
5
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
|
6
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
|
7
|
+
from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
|
8
|
+
|
9
|
+
|
10
|
+
class CheckSourceHasTestsByType(ChecksInsight):
|
11
|
+
NAME = "Source has tests by type"
|
12
|
+
ALIAS = "check_source_has_tests_by_type"
|
13
|
+
DESCRIPTION = "Checks that the source has tests with specific types."
|
14
|
+
REASON_TO_FLAG = "Sources should have tests with specific types for proper validation."
|
15
|
+
TESTS_LIST_STR = "tests"
|
16
|
+
TEST_TYPE_STR = "test"
|
17
|
+
TEST_COUNT_STR = "min_count"
|
18
|
+
|
19
|
+
def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
|
20
|
+
self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
|
21
|
+
self.tests = {
|
22
|
+
test.get(self.TEST_TYPE_STR): test.get(self.TEST_COUNT_STR, 0) for test in self.test_list if test.get(self.TEST_NAME_STR)
|
23
|
+
}
|
24
|
+
if not self.tests:
|
25
|
+
self.logger.warning(f"No tests found in the configuration for {self.ALIAS}. Skipping the insight.")
|
26
|
+
return []
|
27
|
+
insights = []
|
28
|
+
for node_id, node in self.sources.items():
|
29
|
+
if self.should_skip_model(node_id):
|
30
|
+
self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
|
31
|
+
continue
|
32
|
+
if node.resource_type == AltimateResourceType.source:
|
33
|
+
missing_tests = self._source_has_tests_by_type(node_id)
|
34
|
+
if missing_tests:
|
35
|
+
insights.append(
|
36
|
+
DBTModelInsightResponse(
|
37
|
+
unique_id=node_id,
|
38
|
+
package_name=node.package_name,
|
39
|
+
path=node.original_file_path,
|
40
|
+
original_file_path=node.original_file_path,
|
41
|
+
insight=self._build_failure_result(node_id, missing_tests),
|
42
|
+
severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
|
43
|
+
)
|
44
|
+
)
|
45
|
+
return insights
|
46
|
+
|
47
|
+
def _build_failure_result(self, source_unique_id: str, missing_tests) -> DBTInsightResult:
|
48
|
+
missing_test_type_str = ""
|
49
|
+
for test in missing_tests:
|
50
|
+
missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
|
51
|
+
|
52
|
+
failure_message = f"The source `{source_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. "
|
53
|
+
recommendation = (
|
54
|
+
"Add tests with the specified types for each source listed above. "
|
55
|
+
"Having tests with specific types ensures proper validation and data integrity."
|
56
|
+
)
|
57
|
+
|
58
|
+
return DBTInsightResult(
|
59
|
+
type=self.TYPE,
|
60
|
+
name=self.NAME,
|
61
|
+
message=failure_message,
|
62
|
+
recommendation=recommendation,
|
63
|
+
reason_to_flag=self.REASON_TO_FLAG,
|
64
|
+
metadata={"source_unique_id": source_unique_id},
|
65
|
+
)
|
66
|
+
|
67
|
+
def _source_has_tests_by_type(self, node_id) -> bool:
|
68
|
+
"""
|
69
|
+
For model, check all dependencies and if node type is test, check if it has the required types.
|
70
|
+
Only return true if all child.type in test_types
|
71
|
+
"""
|
72
|
+
test_count = {}
|
73
|
+
|
74
|
+
for child_id in self.children_map.get(node_id, []):
|
75
|
+
child = self.get_node(child_id)
|
76
|
+
if child.resource_type == AltimateResourceType.test:
|
77
|
+
child_tags = child.tags or []
|
78
|
+
test_type = "data" if "data" in child_tags else "schema"
|
79
|
+
test_count[test_type] = test_count.get(test_type, 0) + 1
|
80
|
+
missing_tests = []
|
81
|
+
for test_type in self.tests.keys():
|
82
|
+
if test_count.get(test_type, 0) < self.tests.get(test_type, 0):
|
83
|
+
missing_tests.append(
|
84
|
+
{
|
85
|
+
self.TEST_TYPE_STR: test_type,
|
86
|
+
self.TEST_COUNT_STR: self.tests.get(test_type, 0),
|
87
|
+
"actual_count": test_count.get(test_type, 0),
|
88
|
+
}
|
89
|
+
)
|
90
|
+
return missing_tests
|
91
|
+
|
92
|
+
@classmethod
|
93
|
+
def get_config_schema(cls):
|
94
|
+
config_schema = super().get_config_schema()
|
95
|
+
config_schema["config"] = {
|
96
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
97
|
+
"type": "object",
|
98
|
+
"properties": {
|
99
|
+
cls.TESTS_LIST_STR: {
|
100
|
+
"type": "array",
|
101
|
+
"items": {
|
102
|
+
"type": "object",
|
103
|
+
"properties": {
|
104
|
+
cls.TEST_TYPE_STR: {
|
105
|
+
"type": "string",
|
106
|
+
"enum": ["schema", "data"],
|
107
|
+
"description": "The type of the test",
|
108
|
+
},
|
109
|
+
cls.TEST_COUNT_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
|
110
|
+
},
|
111
|
+
"required": [cls.TEST_TYPE_STR, cls.TEST_COUNT_STR],
|
112
|
+
},
|
113
|
+
"description": "A list of tests with names and minimum counts required.",
|
114
|
+
"default": [],
|
115
|
+
},
|
116
|
+
},
|
117
|
+
"required": [cls.TESTS_LIST_STR],
|
118
|
+
}
|
119
|
+
return config_schema
|
@@ -0,0 +1,62 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from datapilot.core.insights.utils import get_severity
|
4
|
+
from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
|
5
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
|
6
|
+
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
|
7
|
+
from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
|
8
|
+
|
9
|
+
|
10
|
+
class CheckSourceTableHasDescription(ChecksInsight):
|
11
|
+
NAME = "Source table has description"
|
12
|
+
ALIAS = "check_source_table_has_desc"
|
13
|
+
DESCRIPTION = "Ensures that the source table has a description"
|
14
|
+
REASON_TO_FLAG = "Missing description for the source table can lead to confusion and inconsistency in analysis. "
|
15
|
+
|
16
|
+
def _build_failure_result(self, source_id: int) -> DBTInsightResult:
|
17
|
+
"""
|
18
|
+
Build failure result for the insight if a model's parent schema is not whitelist or in blacklist.
|
19
|
+
"""
|
20
|
+
failure_message = f"The source:{source_id} does not have a description defined.\n"
|
21
|
+
|
22
|
+
recommendation = "Define the description for the source table to ensure consistency in analysis."
|
23
|
+
|
24
|
+
return DBTInsightResult(
|
25
|
+
type=self.TYPE,
|
26
|
+
name=self.NAME,
|
27
|
+
message=failure_message,
|
28
|
+
recommendation=recommendation,
|
29
|
+
reason_to_flag=self.REASON_TO_FLAG,
|
30
|
+
metadata={"source_id": source_id},
|
31
|
+
)
|
32
|
+
|
33
|
+
def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
|
34
|
+
"""
|
35
|
+
Generate the insight response for the check. This method is called by the insight runner to generate the insight
|
36
|
+
response for the check.
|
37
|
+
Ensures that the source table has a description
|
38
|
+
"""
|
39
|
+
insights = []
|
40
|
+
for node_id, node in self.sources.items():
|
41
|
+
if self.should_skip_model(node_id):
|
42
|
+
self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
|
43
|
+
continue
|
44
|
+
if node.resource_type == AltimateResourceType.source:
|
45
|
+
if not self._check_source_table_desc(node_id):
|
46
|
+
insights.append(
|
47
|
+
DBTModelInsightResponse(
|
48
|
+
unique_id=node_id,
|
49
|
+
package_name=node.package_name,
|
50
|
+
original_file_path=node.original_file_path,
|
51
|
+
path=node.original_file_path,
|
52
|
+
insight=self._build_failure_result(node_id),
|
53
|
+
severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
|
54
|
+
)
|
55
|
+
)
|
56
|
+
return insights
|
57
|
+
|
58
|
+
def _check_source_table_desc(self, source_unique_id: str) -> bool:
|
59
|
+
source = self.get_node(source_unique_id)
|
60
|
+
if source.description is None:
|
61
|
+
return False
|
62
|
+
return True
|