altimate-datapilot-cli 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. altimate_datapilot_cli-0.0.8.dist-info/AUTHORS.rst +5 -0
  2. altimate_datapilot_cli-0.0.8.dist-info/LICENSE +9 -0
  3. altimate_datapilot_cli-0.0.8.dist-info/METADATA +102 -0
  4. altimate_datapilot_cli-0.0.8.dist-info/RECORD +139 -0
  5. altimate_datapilot_cli-0.0.8.dist-info/WHEEL +5 -0
  6. altimate_datapilot_cli-0.0.8.dist-info/entry_points.txt +4 -0
  7. altimate_datapilot_cli-0.0.8.dist-info/top_level.txt +1 -0
  8. datapilot/__init__.py +1 -0
  9. datapilot/__main__.py +14 -0
  10. datapilot/cli/__init__.py +0 -0
  11. datapilot/cli/main.py +11 -0
  12. datapilot/clients/__init__.py +0 -0
  13. datapilot/clients/altimate/__init__.py +0 -0
  14. datapilot/clients/altimate/client.py +85 -0
  15. datapilot/clients/altimate/utils.py +75 -0
  16. datapilot/config/__init__.py +0 -0
  17. datapilot/config/config.py +16 -0
  18. datapilot/config/utils.py +32 -0
  19. datapilot/core/__init__.py +0 -0
  20. datapilot/core/insights/__init__.py +2 -0
  21. datapilot/core/insights/base/__init__.py +0 -0
  22. datapilot/core/insights/base/insight.py +34 -0
  23. datapilot/core/insights/report.py +16 -0
  24. datapilot/core/insights/schema.py +24 -0
  25. datapilot/core/insights/sql/__init__.py +0 -0
  26. datapilot/core/insights/sql/base/__init__.py +0 -0
  27. datapilot/core/insights/sql/base/insight.py +18 -0
  28. datapilot/core/insights/sql/runtime/__init__.py +0 -0
  29. datapilot/core/insights/sql/static/__init__.py +0 -0
  30. datapilot/core/insights/utils.py +20 -0
  31. datapilot/core/platforms/__init__.py +0 -0
  32. datapilot/core/platforms/dbt/__init__.py +0 -0
  33. datapilot/core/platforms/dbt/cli/__init__.py +0 -0
  34. datapilot/core/platforms/dbt/cli/cli.py +112 -0
  35. datapilot/core/platforms/dbt/constants.py +34 -0
  36. datapilot/core/platforms/dbt/exceptions.py +6 -0
  37. datapilot/core/platforms/dbt/executor.py +157 -0
  38. datapilot/core/platforms/dbt/factory.py +22 -0
  39. datapilot/core/platforms/dbt/formatting.py +45 -0
  40. datapilot/core/platforms/dbt/hooks/__init__.py +0 -0
  41. datapilot/core/platforms/dbt/hooks/executor_hook.py +86 -0
  42. datapilot/core/platforms/dbt/insights/__init__.py +115 -0
  43. datapilot/core/platforms/dbt/insights/base.py +133 -0
  44. datapilot/core/platforms/dbt/insights/checks/__init__.py +0 -0
  45. datapilot/core/platforms/dbt/insights/checks/base.py +26 -0
  46. datapilot/core/platforms/dbt/insights/checks/check_column_desc_are_same.py +105 -0
  47. datapilot/core/platforms/dbt/insights/checks/check_column_name_contract.py +154 -0
  48. datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +75 -0
  49. datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +63 -0
  50. datapilot/core/platforms/dbt/insights/checks/check_model_has_all_columns.py +96 -0
  51. datapilot/core/platforms/dbt/insights/checks/check_model_has_labels_keys.py +112 -0
  52. datapilot/core/platforms/dbt/insights/checks/check_model_has_meta_keys.py +108 -0
  53. datapilot/core/platforms/dbt/insights/checks/check_model_has_properties_file.py +64 -0
  54. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +118 -0
  55. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +114 -0
  56. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +119 -0
  57. datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +129 -0
  58. datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +132 -0
  59. datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +135 -0
  60. datapilot/core/platforms/dbt/insights/checks/check_model_parents_database.py +109 -0
  61. datapilot/core/platforms/dbt/insights/checks/check_model_parents_schema.py +109 -0
  62. datapilot/core/platforms/dbt/insights/checks/check_model_tags.py +87 -0
  63. datapilot/core/platforms/dbt/insights/checks/check_source_childs.py +97 -0
  64. datapilot/core/platforms/dbt/insights/checks/check_source_columns_have_desc.py +96 -0
  65. datapilot/core/platforms/dbt/insights/checks/check_source_has_all_columns.py +103 -0
  66. datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +94 -0
  67. datapilot/core/platforms/dbt/insights/checks/check_source_has_labels_keys.py +110 -0
  68. datapilot/core/platforms/dbt/insights/checks/check_source_has_loader.py +62 -0
  69. datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +117 -0
  70. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests.py +82 -0
  71. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +117 -0
  72. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +113 -0
  73. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +119 -0
  74. datapilot/core/platforms/dbt/insights/checks/check_source_table_has_description.py +62 -0
  75. datapilot/core/platforms/dbt/insights/checks/check_source_tags.py +76 -0
  76. datapilot/core/platforms/dbt/insights/dbt_test/__init__.py +0 -0
  77. datapilot/core/platforms/dbt/insights/dbt_test/base.py +23 -0
  78. datapilot/core/platforms/dbt/insights/dbt_test/missing_primary_key_tests.py +130 -0
  79. datapilot/core/platforms/dbt/insights/dbt_test/test_coverage.py +118 -0
  80. datapilot/core/platforms/dbt/insights/governance/__init__.py +0 -0
  81. datapilot/core/platforms/dbt/insights/governance/base.py +23 -0
  82. datapilot/core/platforms/dbt/insights/governance/documentation_on_stale_columns.py +130 -0
  83. datapilot/core/platforms/dbt/insights/governance/exposures_dependent_on_private_models.py +90 -0
  84. datapilot/core/platforms/dbt/insights/governance/public_models_without_contracts.py +89 -0
  85. datapilot/core/platforms/dbt/insights/governance/undocumented_columns.py +148 -0
  86. datapilot/core/platforms/dbt/insights/governance/undocumented_public_models.py +110 -0
  87. datapilot/core/platforms/dbt/insights/modelling/README.md +15 -0
  88. datapilot/core/platforms/dbt/insights/modelling/__init__.py +0 -0
  89. datapilot/core/platforms/dbt/insights/modelling/base.py +31 -0
  90. datapilot/core/platforms/dbt/insights/modelling/direct_join_to_source.py +125 -0
  91. datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +113 -0
  92. datapilot/core/platforms/dbt/insights/modelling/duplicate_sources.py +85 -0
  93. datapilot/core/platforms/dbt/insights/modelling/hard_coded_references.py +80 -0
  94. datapilot/core/platforms/dbt/insights/modelling/joining_of_upstream_concepts.py +79 -0
  95. datapilot/core/platforms/dbt/insights/modelling/model_fanout.py +126 -0
  96. datapilot/core/platforms/dbt/insights/modelling/multiple_sources_joined.py +83 -0
  97. datapilot/core/platforms/dbt/insights/modelling/root_model.py +82 -0
  98. datapilot/core/platforms/dbt/insights/modelling/source_fanout.py +102 -0
  99. datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_downstream_models.py +103 -0
  100. datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_staging_models.py +89 -0
  101. datapilot/core/platforms/dbt/insights/modelling/unused_sources.py +59 -0
  102. datapilot/core/platforms/dbt/insights/performance/__init__.py +0 -0
  103. datapilot/core/platforms/dbt/insights/performance/base.py +26 -0
  104. datapilot/core/platforms/dbt/insights/performance/chain_view_linking.py +92 -0
  105. datapilot/core/platforms/dbt/insights/performance/exposure_parent_materializations.py +104 -0
  106. datapilot/core/platforms/dbt/insights/schema.py +72 -0
  107. datapilot/core/platforms/dbt/insights/structure/__init__.py +0 -0
  108. datapilot/core/platforms/dbt/insights/structure/base.py +33 -0
  109. datapilot/core/platforms/dbt/insights/structure/model_directories_structure.py +92 -0
  110. datapilot/core/platforms/dbt/insights/structure/model_naming_conventions.py +97 -0
  111. datapilot/core/platforms/dbt/insights/structure/source_directories_structure.py +80 -0
  112. datapilot/core/platforms/dbt/insights/structure/test_directory_structure.py +74 -0
  113. datapilot/core/platforms/dbt/insights/utils.py +9 -0
  114. datapilot/core/platforms/dbt/schemas/__init__.py +0 -0
  115. datapilot/core/platforms/dbt/schemas/catalog.py +73 -0
  116. datapilot/core/platforms/dbt/schemas/manifest.py +462 -0
  117. datapilot/core/platforms/dbt/utils.py +525 -0
  118. datapilot/core/platforms/dbt/wrappers/__init__.py +0 -0
  119. datapilot/core/platforms/dbt/wrappers/catalog/__init__.py +0 -0
  120. datapilot/core/platforms/dbt/wrappers/catalog/v1/__init__.py +0 -0
  121. datapilot/core/platforms/dbt/wrappers/catalog/v1/wrapper.py +18 -0
  122. datapilot/core/platforms/dbt/wrappers/catalog/wrapper.py +9 -0
  123. datapilot/core/platforms/dbt/wrappers/manifest/__init__.py +0 -0
  124. datapilot/core/platforms/dbt/wrappers/manifest/v11/__init__.py +0 -0
  125. datapilot/core/platforms/dbt/wrappers/manifest/v11/schemas.py +47 -0
  126. datapilot/core/platforms/dbt/wrappers/manifest/v11/wrapper.py +396 -0
  127. datapilot/core/platforms/dbt/wrappers/manifest/wrapper.py +35 -0
  128. datapilot/core/platforms/dbt/wrappers/run_results/__init__.py +0 -0
  129. datapilot/core/platforms/dbt/wrappers/run_results/run_results.py +39 -0
  130. datapilot/exceptions/__init__.py +0 -0
  131. datapilot/exceptions/exceptions.py +10 -0
  132. datapilot/schemas/__init__.py +0 -0
  133. datapilot/schemas/constants.py +5 -0
  134. datapilot/schemas/nodes.py +19 -0
  135. datapilot/schemas/sql.py +10 -0
  136. datapilot/utils/__init__.py +0 -0
  137. datapilot/utils/formatting/__init__.py +0 -0
  138. datapilot/utils/formatting/utils.py +59 -0
  139. datapilot/utils/utils.py +317 -0
@@ -0,0 +1,117 @@
1
+ from typing import List
2
+ from typing import Set
3
+
4
+ from datapilot.config.utils import get_insight_configuration
5
+ from datapilot.core.insights.utils import get_severity
6
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
7
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
8
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
9
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
10
+ from datapilot.utils.formatting.utils import numbered_list
11
+
12
+
13
+ class CheckSourceHasMetaKeys(ChecksInsight):
14
+ NAME = "Source has required metadata keys"
15
+ ALIAS = "check_source_has_meta_keys"
16
+ DESCRIPTION = "Check if the source has required metadata keys"
17
+ REASON_TO_FLAG = "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. It's important to ensure that the source includes all the required meta keys as per the configuration."
18
+ META_KEYS_STR = "meta_keys"
19
+ ALLOW_EXTRA_KEYS_STR = "allow_extra_keys"
20
+
21
+ def _build_failure_result(
22
+ self,
23
+ source_id: int,
24
+ missing: Set[str],
25
+ extra: Set[str],
26
+ ) -> DBTInsightResult:
27
+ """
28
+ Build failure result for the insight if a model's parent schema is not whitelist or in blacklist.
29
+ """
30
+ failure_message = ""
31
+ if missing:
32
+ failure_message += f"The source:{source_id} does not have the following meta keys defined: {numbered_list(missing)}\n"
33
+ if extra:
34
+ failure_message += f"The source:{source_id} has the following extra meta keys defined: {numbered_list(extra)}\n"
35
+
36
+ recommendation = "Define the meta keys for the source to ensure consistency in analysis."
37
+
38
+ return DBTInsightResult(
39
+ type=self.TYPE,
40
+ name=self.NAME,
41
+ message=failure_message,
42
+ recommendation=recommendation,
43
+ reason_to_flag=self.REASON_TO_FLAG,
44
+ metadata={"source_id": source_id},
45
+ )
46
+
47
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
48
+ """
49
+ Generate the insight response for the check. This method is called by the insight runner to generate the insight
50
+ response for the check.
51
+ Ensures that the source has a list of valid meta keys.
52
+ meta_keys are provided in the configuration file.
53
+ """
54
+ insights = []
55
+ self.insight_config = get_insight_configuration(self.config)
56
+ self.meta_keys = self.get_check_config(self.META_KEYS_STR) or []
57
+ self.allow_extra_keys = self.get_check_config(self.ALLOW_EXTRA_KEYS_STR)
58
+ if not self.meta_keys and not self.allow_extra_keys:
59
+ self.logger.error(f"Meta keys are not provided in the configuration file for the insight: {self.ALIAS}")
60
+ return insights
61
+
62
+ for node_id, node in self.sources.items():
63
+ if self.should_skip_model(node_id):
64
+ self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
65
+ continue
66
+ if node.resource_type == AltimateResourceType.source:
67
+ status_code, missing, extra = self._check_source_has_meta_keys(node_id)
68
+ if status_code:
69
+ insights.append(
70
+ DBTModelInsightResponse(
71
+ unique_id=node_id,
72
+ package_name=node.package_name,
73
+ original_file_path=node.original_file_path,
74
+ path=node.original_file_path,
75
+ insight=self._build_failure_result(node_id, missing, extra),
76
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
77
+ )
78
+ )
79
+ return insights
80
+
81
+ def _check_source_has_meta_keys(self, source_unique_id: str):
82
+ status_code = 0
83
+ model = self.get_node(source_unique_id)
84
+ meta = model.meta.dict() if model.meta else {}
85
+ model_meta_keys = set(meta.keys())
86
+ missing_keys = None
87
+ extra_keys = None
88
+ if model.meta:
89
+ missing_keys = model_meta_keys - set(model.meta.keys())
90
+ if missing_keys:
91
+ status_code = 1
92
+ if not self.allow_extra_keys:
93
+ extra_keys = set(model.meta.keys()) - model_meta_keys
94
+ return status_code, missing_keys, extra_keys
95
+
96
+ @classmethod
97
+ def get_config_schema(cls):
98
+ config_schema = super().get_config_schema()
99
+ config_schema["config"] = {
100
+ "$schema": "http://json-schema.org/draft-07/schema#",
101
+ "type": "object",
102
+ "properties": {
103
+ cls.META_KEYS_STR: {
104
+ "type": "array",
105
+ "items": {
106
+ "type": "string",
107
+ },
108
+ "description": "A list of metadata keys that should be present in the sources properties.",
109
+ },
110
+ cls.ALLOW_EXTRA_KEYS_STR: {
111
+ "type": "boolean",
112
+ "default": False,
113
+ },
114
+ },
115
+ "required": [cls.META_KEYS_STR],
116
+ }
117
+ return config_schema
@@ -0,0 +1,82 @@
1
+ from typing import List
2
+
3
+ from datapilot.core.insights.utils import get_severity
4
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
5
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
7
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
8
+
9
+
10
+ class CheckSourceHasTests(ChecksInsight):
11
+ NAME = "Source has tests"
12
+ ALIAS = "check_source_has_tests"
13
+ DESCRIPTION = "Check if the source has tests"
14
+ REASON_TO_FLAG = "The source table is missing tests. Ensure that the source table has tests."
15
+ TESTS_STR = "tests"
16
+
17
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
18
+ insights = []
19
+ source_threshold = self.get_check_config(self.TESTS_STR) or 1
20
+ for node_id, node in self.sources.items():
21
+ if self.should_skip_model(node_id):
22
+ self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
23
+ continue
24
+ if node.resource_type == AltimateResourceType.source:
25
+ source_test_count = self.get_source_test_count(node_id)
26
+ if source_test_count < source_threshold:
27
+ insights.append(
28
+ DBTModelInsightResponse(
29
+ unique_id=node_id,
30
+ package_name=node.package_name,
31
+ path=node.original_file_path,
32
+ original_file_path=node.original_file_path,
33
+ insight=self._build_failure_result(node_id, source_test_count, source_threshold),
34
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
35
+ )
36
+ )
37
+ return insights
38
+
39
+ def _build_failure_result(self, source_unique_id: str, source_test_count: int, source_test_count_threshold: int) -> DBTInsightResult:
40
+ failure_message = (
41
+ "The following sources do not have enough tests. Ensure that each source has at least {source_test_count_threshold} tests."
42
+ )
43
+ recommendation = "Add tests for each source listed above. Having tests ensures proper validation and data integrity."
44
+
45
+ return DBTInsightResult(
46
+ type=self.TYPE,
47
+ name=self.NAME,
48
+ message=failure_message.format(
49
+ source_test_count_threshold=source_test_count_threshold,
50
+ ),
51
+ recommendation=recommendation,
52
+ reason_to_flag=self.REASON_TO_FLAG,
53
+ metadata={"source_test_count": source_test_count, "source_unique_id": source_unique_id},
54
+ )
55
+
56
+ def get_source_test_count(self, node_id: str) -> int:
57
+ """
58
+ Getting test count of sources by checking child nodes of sources that have type test.
59
+ """
60
+ count = 0
61
+ for child_id in self.children_map.get(node_id, []):
62
+ child = self.get_node(child_id)
63
+ if child.resource_type == AltimateResourceType.test:
64
+ count += 1
65
+ return count
66
+
67
+ @classmethod
68
+ def get_config_schema(cls):
69
+ config_schema = super().get_config_schema()
70
+ config_schema["config"] = {
71
+ "$schema": "http://json-schema.org/draft-07/schema#",
72
+ "type": "object",
73
+ "properties": {
74
+ cls.TESTS_STR: {
75
+ "type": "integer",
76
+ "description": "Minimum number of tests required for each source",
77
+ "default": 0,
78
+ },
79
+ },
80
+ "required": [cls.TESTS_STR],
81
+ }
82
+ return config_schema
@@ -0,0 +1,117 @@
1
+ from typing import Dict
2
+ from typing import List
3
+
4
+ from datapilot.core.insights.utils import get_severity
5
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
7
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
8
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
9
+
10
+
11
+ class CheckSourceHasTestsByGroup(ChecksInsight):
12
+ NAME = "Source has tests by group"
13
+ ALIAS = "check_source_has_tests_by_group"
14
+ DESCRIPTION = "Check if sources have a number of tests for specific test groups."
15
+ REASON_TO_FLAG = "Sources should have tests with specific groups for proper validation."
16
+ TESTS_LIST_STR = "tests"
17
+ TEST_GROUP_STR = "test_group"
18
+ TEST_COUNT_STR = "min_count"
19
+
20
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
21
+ insights = []
22
+ self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
23
+ self.test_groups = {
24
+ tuple(test.get(self.TEST_GROUP_STR, [])): test.get(self.TEST_COUNT_STR, 0)
25
+ for test in self.test_list
26
+ if test.get(self.TEST_GROUP_STR)
27
+ }
28
+ for node_id, node in self.sources.items():
29
+ if self.should_skip_model(node_id):
30
+ self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
31
+ continue
32
+ if node.resource_type == AltimateResourceType.source:
33
+ missing_test_groups = self._source_has_tests_by_group(node_id)
34
+
35
+ if missing_test_groups:
36
+ insights.append(
37
+ DBTModelInsightResponse(
38
+ unique_id=node_id,
39
+ package_name=node.package_name,
40
+ path=node.original_file_path,
41
+ original_file_path=node.original_file_path,
42
+ insight=self._build_failure_result(node_id, missing_test_groups),
43
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
44
+ )
45
+ )
46
+ return insights
47
+
48
+ def _build_failure_result(self, source_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult:
49
+ missing_test_group_str = ""
50
+ for test in missing_test_groups:
51
+ missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
52
+
53
+ failure_message = (
54
+ f"The source `{source_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. "
55
+ )
56
+ recommendation = (
57
+ "Add tests with the specified groups for each source listed above. "
58
+ "Having tests with specific groups ensures proper validation and data integrity."
59
+ )
60
+
61
+ return DBTInsightResult(
62
+ type=self.TYPE,
63
+ name=self.NAME,
64
+ message=failure_message,
65
+ recommendation=recommendation,
66
+ reason_to_flag=self.REASON_TO_FLAG,
67
+ metadata={"source_unique_id": source_unique_id, "missing_test_groups": missing_test_groups},
68
+ )
69
+
70
+ def _source_has_tests_by_group(self, node_id) -> List[Dict]:
71
+ """
72
+ For model, check all dependencies and if node type is test, check if it has the required groups.
73
+ Only return true if all child.group in test_groups
74
+ """
75
+ test_group_count = {}
76
+ for child_id in self.children_map.get(node_id, []):
77
+ child = self.get_node(child_id)
78
+ if child.resource_type == AltimateResourceType.test:
79
+ for group in self.test_groups:
80
+ if child.name in group:
81
+ test_group_count[group] = test_group_count.get(group, 0) + 1
82
+ missing_test_groups = []
83
+ for group, count in self.test_groups.items():
84
+ if test_group_count.get(group, 0) < count:
85
+ missing_test_groups.append(
86
+ {self.TEST_GROUP_STR: group, self.TEST_COUNT_STR: count, "actual_count": test_group_count.get(group, 0)}
87
+ )
88
+
89
+ return missing_test_groups
90
+
91
+ @classmethod
92
+ def get_config_schema(cls):
93
+ config_schema = super().get_config_schema()
94
+ config_schema["config"] = {
95
+ "$schema": "http://json-schema.org/draft-07/schema#",
96
+ "type": "object",
97
+ "properties": {
98
+ cls.TESTS_LIST_STR: {
99
+ "type": "array",
100
+ "items": {
101
+ "type": "object",
102
+ "properties": {
103
+ cls.TEST_GROUP_STR: {
104
+ "type": "array",
105
+ "items": {"type": "string"},
106
+ "description": "List of tests part of a group. If a test is part of any of the groups, it will be counted.",
107
+ },
108
+ cls.TESTS_LIST_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
109
+ "required": [cls.TEST_GROUP_STR, cls.TEST_COUNT_STR],
110
+ },
111
+ },
112
+ "description": "A list of tests with names and minimum counts required.",
113
+ "default": [],
114
+ },
115
+ },
116
+ "required": [cls.TESTS_LIST_STR],
117
+ }
@@ -0,0 +1,113 @@
1
+ from typing import Dict
2
+ from typing import List
3
+
4
+ from datapilot.core.insights.utils import get_severity
5
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
7
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
8
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
9
+
10
+
11
+ class CheckSourceHasTestsByName(ChecksInsight):
12
+ NAME = "Source has tests by name"
13
+ ALIAS = "check_source_has_tests_by_name"
14
+ DESCRIPTION = "Checks that the source has tests with specific names."
15
+ REASON_TO_FLAG = "Sources should have tests with specific names for proper validation."
16
+ TESTS_LIST_STR = "tests"
17
+ TEST_NAME_STR = "test"
18
+ TEST_COUNT_STR = "min_count"
19
+
20
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
21
+ self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
22
+ self.tests = {
23
+ test.get(self.TEST_NAME_STR): test.get(self.TEST_COUNT_STR, 0) for test in self.test_list if test.get(self.TEST_NAME_STR)
24
+ }
25
+ if not self.tests:
26
+ self.logger.warning(f"No tests found in the configuration for {self.ALIAS}. Skipping the insight.")
27
+ return []
28
+ insights = []
29
+ for node_id, node in self.sources.items():
30
+ if self.should_skip_model(node_id):
31
+ self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
32
+ continue
33
+ if node.resource_type == AltimateResourceType.source:
34
+ missing_tests = self._source_has_tests_by_name(node_id)
35
+ if missing_tests:
36
+ insights.append(
37
+ DBTModelInsightResponse(
38
+ unique_id=node_id,
39
+ package_name=node.package_name,
40
+ path=node.original_file_path,
41
+ original_file_path=node.original_file_path,
42
+ insight=self._build_failure_result(node_id),
43
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
44
+ )
45
+ )
46
+ return insights
47
+
48
+ def _build_failure_result(self, source_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
49
+ tests_str = ""
50
+ for test in missing_tests:
51
+ tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
52
+
53
+ failure_message = f"The source `{source_unique_id}` does not have enough tests:\n{tests_str}. "
54
+ recommendation = (
55
+ "Add tests with the specified names for each source listed above. "
56
+ "Having tests with specific names ensures proper validation and data integrity."
57
+ )
58
+
59
+ return DBTInsightResult(
60
+ type=self.TYPE,
61
+ name=self.NAME,
62
+ message=failure_message,
63
+ recommendation=recommendation,
64
+ reason_to_flag=self.REASON_TO_FLAG,
65
+ metadata={"source_unique_id": source_unique_id},
66
+ )
67
+
68
+ def _source_has_tests_by_name(self, node_id) -> bool:
69
+ """
70
+ For model, check all dependencies and if node type is test, check if it has the required names.
71
+ Only return true if all child.name in test_names
72
+ """
73
+ test_count = {}
74
+
75
+ for child_id in self.children_map.get(node_id, []):
76
+ child = self.get_node(child_id)
77
+ if child.resource_type == AltimateResourceType.test:
78
+ test_name = child.name
79
+ test_count[test_name] = test_count.get(test_name, 0) + 1
80
+
81
+ missing_tests = []
82
+ for test_name, min_count in self.tests.items():
83
+ if test_count.get(test_name, 0) < min_count:
84
+ missing_tests.append({"test_name": test_name, "min_count": min_count, "actual_count": test_count.get(test_name, 0)})
85
+
86
+ if missing_tests:
87
+ return False, missing_tests
88
+
89
+ return True, None
90
+
91
+ @classmethod
92
+ def get_config_schema(cls):
93
+ config_schema = super().get_config_schema()
94
+ config_schema["config"] = {
95
+ "$schema": "http://json-schema.org/draft-07/schema#",
96
+ "type": "object",
97
+ "properties": {
98
+ cls.TESTS_LIST_STR: {
99
+ "type": "array",
100
+ "items": {
101
+ "type": "object",
102
+ "properties": {
103
+ cls.TEST_NAME_STR: {"type": "string", "description": "The name of the test"},
104
+ cls.TESTS_LIST_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
105
+ "required": [cls.TEST_NAME_STR, cls.TEST_COUNT_STR],
106
+ },
107
+ },
108
+ "description": "A list of tests with names and minimum counts required.",
109
+ "default": [],
110
+ },
111
+ },
112
+ "required": [cls.TESTS_LIST_STR],
113
+ }
@@ -0,0 +1,119 @@
1
+ from typing import List
2
+
3
+ from datapilot.core.insights.utils import get_severity
4
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
5
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
7
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
8
+
9
+
10
+ class CheckSourceHasTestsByType(ChecksInsight):
11
+ NAME = "Source has tests by type"
12
+ ALIAS = "check_source_has_tests_by_type"
13
+ DESCRIPTION = "Checks that the source has tests with specific types."
14
+ REASON_TO_FLAG = "Sources should have tests with specific types for proper validation."
15
+ TESTS_LIST_STR = "tests"
16
+ TEST_TYPE_STR = "test"
17
+ TEST_COUNT_STR = "min_count"
18
+
19
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
20
+ self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
21
+ self.tests = {
22
+ test.get(self.TEST_TYPE_STR): test.get(self.TEST_COUNT_STR, 0) for test in self.test_list if test.get(self.TEST_NAME_STR)
23
+ }
24
+ if not self.tests:
25
+ self.logger.warning(f"No tests found in the configuration for {self.ALIAS}. Skipping the insight.")
26
+ return []
27
+ insights = []
28
+ for node_id, node in self.sources.items():
29
+ if self.should_skip_model(node_id):
30
+ self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
31
+ continue
32
+ if node.resource_type == AltimateResourceType.source:
33
+ missing_tests = self._source_has_tests_by_type(node_id)
34
+ if missing_tests:
35
+ insights.append(
36
+ DBTModelInsightResponse(
37
+ unique_id=node_id,
38
+ package_name=node.package_name,
39
+ path=node.original_file_path,
40
+ original_file_path=node.original_file_path,
41
+ insight=self._build_failure_result(node_id, missing_tests),
42
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
43
+ )
44
+ )
45
+ return insights
46
+
47
+ def _build_failure_result(self, source_unique_id: str, missing_tests) -> DBTInsightResult:
48
+ missing_test_type_str = ""
49
+ for test in missing_tests:
50
+ missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
51
+
52
+ failure_message = f"The source `{source_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. "
53
+ recommendation = (
54
+ "Add tests with the specified types for each source listed above. "
55
+ "Having tests with specific types ensures proper validation and data integrity."
56
+ )
57
+
58
+ return DBTInsightResult(
59
+ type=self.TYPE,
60
+ name=self.NAME,
61
+ message=failure_message,
62
+ recommendation=recommendation,
63
+ reason_to_flag=self.REASON_TO_FLAG,
64
+ metadata={"source_unique_id": source_unique_id},
65
+ )
66
+
67
+ def _source_has_tests_by_type(self, node_id) -> bool:
68
+ """
69
+ For model, check all dependencies and if node type is test, check if it has the required types.
70
+ Only return true if all child.type in test_types
71
+ """
72
+ test_count = {}
73
+
74
+ for child_id in self.children_map.get(node_id, []):
75
+ child = self.get_node(child_id)
76
+ if child.resource_type == AltimateResourceType.test:
77
+ child_tags = child.tags or []
78
+ test_type = "data" if "data" in child_tags else "schema"
79
+ test_count[test_type] = test_count.get(test_type, 0) + 1
80
+ missing_tests = []
81
+ for test_type in self.tests.keys():
82
+ if test_count.get(test_type, 0) < self.tests.get(test_type, 0):
83
+ missing_tests.append(
84
+ {
85
+ self.TEST_TYPE_STR: test_type,
86
+ self.TEST_COUNT_STR: self.tests.get(test_type, 0),
87
+ "actual_count": test_count.get(test_type, 0),
88
+ }
89
+ )
90
+ return missing_tests
91
+
92
+ @classmethod
93
+ def get_config_schema(cls):
94
+ config_schema = super().get_config_schema()
95
+ config_schema["config"] = {
96
+ "$schema": "http://json-schema.org/draft-07/schema#",
97
+ "type": "object",
98
+ "properties": {
99
+ cls.TESTS_LIST_STR: {
100
+ "type": "array",
101
+ "items": {
102
+ "type": "object",
103
+ "properties": {
104
+ cls.TEST_TYPE_STR: {
105
+ "type": "string",
106
+ "enum": ["schema", "data"],
107
+ "description": "The type of the test",
108
+ },
109
+ cls.TEST_COUNT_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
110
+ },
111
+ "required": [cls.TEST_TYPE_STR, cls.TEST_COUNT_STR],
112
+ },
113
+ "description": "A list of tests with names and minimum counts required.",
114
+ "default": [],
115
+ },
116
+ },
117
+ "required": [cls.TESTS_LIST_STR],
118
+ }
119
+ return config_schema
@@ -0,0 +1,62 @@
1
+ from typing import List
2
+
3
+ from datapilot.core.insights.utils import get_severity
4
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
5
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
7
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
8
+
9
+
10
+ class CheckSourceTableHasDescription(ChecksInsight):
11
+ NAME = "Source table has description"
12
+ ALIAS = "check_source_table_has_desc"
13
+ DESCRIPTION = "Ensures that the source table has a description"
14
+ REASON_TO_FLAG = "Missing description for the source table can lead to confusion and inconsistency in analysis. "
15
+
16
+ def _build_failure_result(self, source_id: int) -> DBTInsightResult:
17
+ """
18
+ Build failure result for the insight if a model's parent schema is not whitelist or in blacklist.
19
+ """
20
+ failure_message = f"The source:{source_id} does not have a description defined.\n"
21
+
22
+ recommendation = "Define the description for the source table to ensure consistency in analysis."
23
+
24
+ return DBTInsightResult(
25
+ type=self.TYPE,
26
+ name=self.NAME,
27
+ message=failure_message,
28
+ recommendation=recommendation,
29
+ reason_to_flag=self.REASON_TO_FLAG,
30
+ metadata={"source_id": source_id},
31
+ )
32
+
33
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
34
+ """
35
+ Generate the insight response for the check. This method is called by the insight runner to generate the insight
36
+ response for the check.
37
+ Ensures that the source table has a description
38
+ """
39
+ insights = []
40
+ for node_id, node in self.sources.items():
41
+ if self.should_skip_model(node_id):
42
+ self.logger.debug(f"Skipping source {node_id} as it is not enabled for selected models")
43
+ continue
44
+ if node.resource_type == AltimateResourceType.source:
45
+ if not self._check_source_table_desc(node_id):
46
+ insights.append(
47
+ DBTModelInsightResponse(
48
+ unique_id=node_id,
49
+ package_name=node.package_name,
50
+ original_file_path=node.original_file_path,
51
+ path=node.original_file_path,
52
+ insight=self._build_failure_result(node_id),
53
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
54
+ )
55
+ )
56
+ return insights
57
+
58
+ def _check_source_table_desc(self, source_unique_id: str) -> bool:
59
+ source = self.get_node(source_unique_id)
60
+ if source.description is None:
61
+ return False
62
+ return True