altimate-datapilot-cli 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. altimate_datapilot_cli-0.0.8.dist-info/AUTHORS.rst +5 -0
  2. altimate_datapilot_cli-0.0.8.dist-info/LICENSE +9 -0
  3. altimate_datapilot_cli-0.0.8.dist-info/METADATA +102 -0
  4. altimate_datapilot_cli-0.0.8.dist-info/RECORD +139 -0
  5. altimate_datapilot_cli-0.0.8.dist-info/WHEEL +5 -0
  6. altimate_datapilot_cli-0.0.8.dist-info/entry_points.txt +4 -0
  7. altimate_datapilot_cli-0.0.8.dist-info/top_level.txt +1 -0
  8. datapilot/__init__.py +1 -0
  9. datapilot/__main__.py +14 -0
  10. datapilot/cli/__init__.py +0 -0
  11. datapilot/cli/main.py +11 -0
  12. datapilot/clients/__init__.py +0 -0
  13. datapilot/clients/altimate/__init__.py +0 -0
  14. datapilot/clients/altimate/client.py +85 -0
  15. datapilot/clients/altimate/utils.py +75 -0
  16. datapilot/config/__init__.py +0 -0
  17. datapilot/config/config.py +16 -0
  18. datapilot/config/utils.py +32 -0
  19. datapilot/core/__init__.py +0 -0
  20. datapilot/core/insights/__init__.py +2 -0
  21. datapilot/core/insights/base/__init__.py +0 -0
  22. datapilot/core/insights/base/insight.py +34 -0
  23. datapilot/core/insights/report.py +16 -0
  24. datapilot/core/insights/schema.py +24 -0
  25. datapilot/core/insights/sql/__init__.py +0 -0
  26. datapilot/core/insights/sql/base/__init__.py +0 -0
  27. datapilot/core/insights/sql/base/insight.py +18 -0
  28. datapilot/core/insights/sql/runtime/__init__.py +0 -0
  29. datapilot/core/insights/sql/static/__init__.py +0 -0
  30. datapilot/core/insights/utils.py +20 -0
  31. datapilot/core/platforms/__init__.py +0 -0
  32. datapilot/core/platforms/dbt/__init__.py +0 -0
  33. datapilot/core/platforms/dbt/cli/__init__.py +0 -0
  34. datapilot/core/platforms/dbt/cli/cli.py +112 -0
  35. datapilot/core/platforms/dbt/constants.py +34 -0
  36. datapilot/core/platforms/dbt/exceptions.py +6 -0
  37. datapilot/core/platforms/dbt/executor.py +157 -0
  38. datapilot/core/platforms/dbt/factory.py +22 -0
  39. datapilot/core/platforms/dbt/formatting.py +45 -0
  40. datapilot/core/platforms/dbt/hooks/__init__.py +0 -0
  41. datapilot/core/platforms/dbt/hooks/executor_hook.py +86 -0
  42. datapilot/core/platforms/dbt/insights/__init__.py +115 -0
  43. datapilot/core/platforms/dbt/insights/base.py +133 -0
  44. datapilot/core/platforms/dbt/insights/checks/__init__.py +0 -0
  45. datapilot/core/platforms/dbt/insights/checks/base.py +26 -0
  46. datapilot/core/platforms/dbt/insights/checks/check_column_desc_are_same.py +105 -0
  47. datapilot/core/platforms/dbt/insights/checks/check_column_name_contract.py +154 -0
  48. datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py +75 -0
  49. datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py +63 -0
  50. datapilot/core/platforms/dbt/insights/checks/check_model_has_all_columns.py +96 -0
  51. datapilot/core/platforms/dbt/insights/checks/check_model_has_labels_keys.py +112 -0
  52. datapilot/core/platforms/dbt/insights/checks/check_model_has_meta_keys.py +108 -0
  53. datapilot/core/platforms/dbt/insights/checks/check_model_has_properties_file.py +64 -0
  54. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py +118 -0
  55. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py +114 -0
  56. datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py +119 -0
  57. datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py +129 -0
  58. datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py +132 -0
  59. datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py +135 -0
  60. datapilot/core/platforms/dbt/insights/checks/check_model_parents_database.py +109 -0
  61. datapilot/core/platforms/dbt/insights/checks/check_model_parents_schema.py +109 -0
  62. datapilot/core/platforms/dbt/insights/checks/check_model_tags.py +87 -0
  63. datapilot/core/platforms/dbt/insights/checks/check_source_childs.py +97 -0
  64. datapilot/core/platforms/dbt/insights/checks/check_source_columns_have_desc.py +96 -0
  65. datapilot/core/platforms/dbt/insights/checks/check_source_has_all_columns.py +103 -0
  66. datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py +94 -0
  67. datapilot/core/platforms/dbt/insights/checks/check_source_has_labels_keys.py +110 -0
  68. datapilot/core/platforms/dbt/insights/checks/check_source_has_loader.py +62 -0
  69. datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py +117 -0
  70. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests.py +82 -0
  71. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py +117 -0
  72. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py +113 -0
  73. datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py +119 -0
  74. datapilot/core/platforms/dbt/insights/checks/check_source_table_has_description.py +62 -0
  75. datapilot/core/platforms/dbt/insights/checks/check_source_tags.py +76 -0
  76. datapilot/core/platforms/dbt/insights/dbt_test/__init__.py +0 -0
  77. datapilot/core/platforms/dbt/insights/dbt_test/base.py +23 -0
  78. datapilot/core/platforms/dbt/insights/dbt_test/missing_primary_key_tests.py +130 -0
  79. datapilot/core/platforms/dbt/insights/dbt_test/test_coverage.py +118 -0
  80. datapilot/core/platforms/dbt/insights/governance/__init__.py +0 -0
  81. datapilot/core/platforms/dbt/insights/governance/base.py +23 -0
  82. datapilot/core/platforms/dbt/insights/governance/documentation_on_stale_columns.py +130 -0
  83. datapilot/core/platforms/dbt/insights/governance/exposures_dependent_on_private_models.py +90 -0
  84. datapilot/core/platforms/dbt/insights/governance/public_models_without_contracts.py +89 -0
  85. datapilot/core/platforms/dbt/insights/governance/undocumented_columns.py +148 -0
  86. datapilot/core/platforms/dbt/insights/governance/undocumented_public_models.py +110 -0
  87. datapilot/core/platforms/dbt/insights/modelling/README.md +15 -0
  88. datapilot/core/platforms/dbt/insights/modelling/__init__.py +0 -0
  89. datapilot/core/platforms/dbt/insights/modelling/base.py +31 -0
  90. datapilot/core/platforms/dbt/insights/modelling/direct_join_to_source.py +125 -0
  91. datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py +113 -0
  92. datapilot/core/platforms/dbt/insights/modelling/duplicate_sources.py +85 -0
  93. datapilot/core/platforms/dbt/insights/modelling/hard_coded_references.py +80 -0
  94. datapilot/core/platforms/dbt/insights/modelling/joining_of_upstream_concepts.py +79 -0
  95. datapilot/core/platforms/dbt/insights/modelling/model_fanout.py +126 -0
  96. datapilot/core/platforms/dbt/insights/modelling/multiple_sources_joined.py +83 -0
  97. datapilot/core/platforms/dbt/insights/modelling/root_model.py +82 -0
  98. datapilot/core/platforms/dbt/insights/modelling/source_fanout.py +102 -0
  99. datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_downstream_models.py +103 -0
  100. datapilot/core/platforms/dbt/insights/modelling/staging_model_dependent_on_staging_models.py +89 -0
  101. datapilot/core/platforms/dbt/insights/modelling/unused_sources.py +59 -0
  102. datapilot/core/platforms/dbt/insights/performance/__init__.py +0 -0
  103. datapilot/core/platforms/dbt/insights/performance/base.py +26 -0
  104. datapilot/core/platforms/dbt/insights/performance/chain_view_linking.py +92 -0
  105. datapilot/core/platforms/dbt/insights/performance/exposure_parent_materializations.py +104 -0
  106. datapilot/core/platforms/dbt/insights/schema.py +72 -0
  107. datapilot/core/platforms/dbt/insights/structure/__init__.py +0 -0
  108. datapilot/core/platforms/dbt/insights/structure/base.py +33 -0
  109. datapilot/core/platforms/dbt/insights/structure/model_directories_structure.py +92 -0
  110. datapilot/core/platforms/dbt/insights/structure/model_naming_conventions.py +97 -0
  111. datapilot/core/platforms/dbt/insights/structure/source_directories_structure.py +80 -0
  112. datapilot/core/platforms/dbt/insights/structure/test_directory_structure.py +74 -0
  113. datapilot/core/platforms/dbt/insights/utils.py +9 -0
  114. datapilot/core/platforms/dbt/schemas/__init__.py +0 -0
  115. datapilot/core/platforms/dbt/schemas/catalog.py +73 -0
  116. datapilot/core/platforms/dbt/schemas/manifest.py +462 -0
  117. datapilot/core/platforms/dbt/utils.py +525 -0
  118. datapilot/core/platforms/dbt/wrappers/__init__.py +0 -0
  119. datapilot/core/platforms/dbt/wrappers/catalog/__init__.py +0 -0
  120. datapilot/core/platforms/dbt/wrappers/catalog/v1/__init__.py +0 -0
  121. datapilot/core/platforms/dbt/wrappers/catalog/v1/wrapper.py +18 -0
  122. datapilot/core/platforms/dbt/wrappers/catalog/wrapper.py +9 -0
  123. datapilot/core/platforms/dbt/wrappers/manifest/__init__.py +0 -0
  124. datapilot/core/platforms/dbt/wrappers/manifest/v11/__init__.py +0 -0
  125. datapilot/core/platforms/dbt/wrappers/manifest/v11/schemas.py +47 -0
  126. datapilot/core/platforms/dbt/wrappers/manifest/v11/wrapper.py +396 -0
  127. datapilot/core/platforms/dbt/wrappers/manifest/wrapper.py +35 -0
  128. datapilot/core/platforms/dbt/wrappers/run_results/__init__.py +0 -0
  129. datapilot/core/platforms/dbt/wrappers/run_results/run_results.py +39 -0
  130. datapilot/exceptions/__init__.py +0 -0
  131. datapilot/exceptions/exceptions.py +10 -0
  132. datapilot/schemas/__init__.py +0 -0
  133. datapilot/schemas/constants.py +5 -0
  134. datapilot/schemas/nodes.py +19 -0
  135. datapilot/schemas/sql.py +10 -0
  136. datapilot/utils/__init__.py +0 -0
  137. datapilot/utils/formatting/__init__.py +0 -0
  138. datapilot/utils/formatting/utils.py +59 -0
  139. datapilot/utils/utils.py +317 -0
@@ -0,0 +1,112 @@
1
+ from typing import List
2
+ from typing import Sequence
3
+ from typing import Set
4
+ from typing import Tuple
5
+
6
+ from datapilot.core.insights.utils import get_severity
7
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
8
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
9
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
10
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
11
+ from datapilot.utils.formatting.utils import numbered_list
12
+
13
+
14
+ class CheckModelHasLabelsKeys(ChecksInsight):
15
+ NAME = "Model Has labels"
16
+ ALIAS = "check_model_has_labels_keys"
17
+ DESCRIPTION = "Models should have all the labels keys as per the configuration."
18
+ REASON_TO_FLAG = (
19
+ "Missing labels keys in the model can lead to inconsistency in metadata management and understanding of the model. "
20
+ "It's important to ensure that the model includes all the required labels keys as per the configuration."
21
+ )
22
+ LABEL_KEYS_STR = "labels_keys"
23
+ ALLOW_EXTRA_KEYS_STR = "allow_extra_keys"
24
+
25
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
26
+ self.labels_keys = self.get_check_config(self.LABEL_KEYS_STR)
27
+ self.allow_extra_keys = self.get_check_config(self.ALLOW_EXTRA_KEYS_STR)
28
+
29
+ insights = []
30
+ for node_id, node in self.nodes.items():
31
+ if self.should_skip_model(node_id):
32
+ self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
33
+ continue
34
+ if node.resource_type == AltimateResourceType.model:
35
+ status_code, missibg_labels, extra_labels = self._check_labels_keys(node_id)
36
+ if status_code == 1:
37
+ insights.append(
38
+ DBTModelInsightResponse(
39
+ unique_id=node_id,
40
+ package_name=node.package_name,
41
+ path=node.original_file_path,
42
+ original_file_path=node.original_file_path,
43
+ insight=self._build_failure_result(node_id, missibg_labels, extra_labels),
44
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
45
+ )
46
+ )
47
+ return insights
48
+
49
+ def _build_failure_result(self, model_unique_id: str, missing_keys: Sequence[str], extra_labels: Sequence[str]) -> DBTInsightResult:
50
+ failure_message = (
51
+ f"The following labels keys are missing in the model `{model_unique_id}`:\n{missing_keys}. "
52
+ "Ensure that the model includes all the required labels keys."
53
+ )
54
+ if not self.allow_extra_keys:
55
+ failure_message += (
56
+ f"The following extra labels keys are present in the model `{model_unique_id}`:\n{extra_labels}. "
57
+ "Ensure that the model does not include any extra labels keys."
58
+ )
59
+ recommendation = (
60
+ f"Add the missing labels keys listed above in the model `{model_unique_id}`. "
61
+ "Ensuring that the model has all the required labels keys helps in maintaining metadata consistency and understanding."
62
+ )
63
+
64
+ return DBTInsightResult(
65
+ type=self.TYPE,
66
+ name=self.NAME,
67
+ message=failure_message.format(
68
+ missing_keys=numbered_list(missing_keys),
69
+ model_unique_id=model_unique_id,
70
+ ),
71
+ recommendation=recommendation.format(model_unique_id=model_unique_id),
72
+ reason_to_flag=self.REASON_TO_FLAG,
73
+ metadata={"missing_keys": missing_keys, "model_unique_id": model_unique_id, "extra_keys": extra_labels},
74
+ )
75
+
76
+ def _check_labels_keys(self, node_id) -> Tuple[int, Set[str]]:
77
+ status_code = 0
78
+ missing_keys = set(self.labels_keys) - set(self.get_node(node_id).label)
79
+ config = self.get_node(node_id).config.dict() if self.get_node(node_id).config else {}
80
+ labels = config.get("labels", {})
81
+ label_keys = set(labels.keys())
82
+ extra_keys = set()
83
+ if missing_keys:
84
+ status_code = 1
85
+ if not self.allow_extra_keys:
86
+ extra_keys = label_keys - set(self.labels_keys)
87
+ if extra_keys:
88
+ status_code = 1
89
+ return status_code, missing_keys, extra_keys
90
+
91
+ @classmethod
92
+ def get_config_schema(cls):
93
+ config_schema = super().get_config_schema()
94
+ config_schema["config"] = {
95
+ "$schema": "http://json-schema.org/draft-07/schema#",
96
+ "type": "object",
97
+ "properties": {
98
+ cls.LABEL_KEYS_STR: {
99
+ "type": "array",
100
+ "items": {
101
+ "type": "string",
102
+ },
103
+ "description": "A list of meta keys that should be present in the model.",
104
+ },
105
+ cls.ALLOW_EXTRA_KEYS_STR: {
106
+ "type": "boolean",
107
+ "default": False,
108
+ },
109
+ },
110
+ "required": [cls.LABEL_KEYS_STR],
111
+ }
112
+ return config_schema
@@ -0,0 +1,108 @@
1
+ from typing import List
2
+ from typing import Sequence
3
+ from typing import Set
4
+ from typing import Tuple
5
+
6
+ from datapilot.core.insights.utils import get_severity
7
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
8
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
9
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
10
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
11
+ from datapilot.utils.formatting.utils import numbered_list
12
+
13
+
14
+ class CheckModelHasMetaKeys(ChecksInsight):
15
+ NAME = "Model has all valid keys in metadata"
16
+ ALIAS = "check_model_has_valid_meta_keys"
17
+ DESCRIPTION = "Model always has a list of valid metadata keys."
18
+ REASON_TO_FLAG = (
19
+ "Missing meta keys in the model can lead to inconsistency in metadata management and understanding of the model. "
20
+ "It's important to ensure that the model includes all the required meta keys as per the configuration."
21
+ )
22
+ META_KEYS_STR = "meta_keys"
23
+ ALLOW_EXTRA_KEYS_STR = "allow_extra_keys"
24
+
25
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
26
+ self.meta_keys = self.get_check_config(self.META_KEYS_STR)
27
+ self.allow_extra_keys = self.get_check_config(self.ALLOW_EXTRA_KEYS_STR)
28
+ insights = []
29
+ for node_id, node in self.nodes.items():
30
+ if self.should_skip_model(node_id):
31
+ self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
32
+ continue
33
+ if node.resource_type == AltimateResourceType.model:
34
+ status_code, missing_keys, extra_keys = self._check_meta_keys(node_id)
35
+ if status_code == 1:
36
+ insights.append(
37
+ DBTModelInsightResponse(
38
+ unique_id=node_id,
39
+ package_name=node.package_name,
40
+ path=node.original_file_path,
41
+ original_file_path=node.original_file_path,
42
+ insight=self._build_failure_result(node_id, missing_keys, extra_keys),
43
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
44
+ )
45
+ )
46
+ return insights
47
+
48
+ def _build_failure_result(self, model_unique_id: str, missing_keys: Sequence[str], extra_keys: Set[str]) -> DBTInsightResult:
49
+ failure_message = (
50
+ f"The following meta keys are missing in the model `{model_unique_id}`:\n{numbered_list(missing_keys)}. "
51
+ "Ensure that the model includes all the required meta keys."
52
+ )
53
+ if not self.allow_extra_keys:
54
+ failure_message += (
55
+ f"The following extra meta keys are present in the model `{model_unique_id}`:\n{numbered_list(extra_keys)}. "
56
+ "Ensure that the model does not include any extra meta keys."
57
+ )
58
+ recommendation = (
59
+ f"Add the missing meta keys listed above in the model `{model_unique_id}`. "
60
+ "Ensuring that the model has all the required meta keys helps in maintaining metadata consistency and understanding."
61
+ )
62
+
63
+ return DBTInsightResult(
64
+ type=self.TYPE,
65
+ name=self.NAME,
66
+ message=failure_message,
67
+ recommendation=recommendation.format(model_unique_id=model_unique_id),
68
+ reason_to_flag=self.REASON_TO_FLAG,
69
+ metadata={"missing_keys": missing_keys, "model_unique_id": model_unique_id, "extra_keys": extra_keys},
70
+ )
71
+
72
+ def _check_meta_keys(self, node_id) -> Tuple[int, Set[str], Set[str]]:
73
+ status_code = 0
74
+ model = self.get_node(node_id)
75
+ meta = model.meta.dict() if model.meta else {}
76
+ model_meta_keys = set(meta.keys())
77
+ missing_keys = None
78
+ extra_keys = None
79
+ if model.meta:
80
+ missing_keys = model_meta_keys - set(model.meta.keys())
81
+ if missing_keys:
82
+ status_code = 1
83
+ if not self.allow_extra_keys:
84
+ extra_keys = set(model.meta.keys()) - model_meta_keys
85
+ return status_code, missing_keys, extra_keys
86
+
87
+ @classmethod
88
+ def get_config_schema(cls):
89
+ config_schema = super().get_config_schema()
90
+ config_schema["config"] = {
91
+ "$schema": "http://json-schema.org/draft-07/schema#",
92
+ "type": "object",
93
+ "properties": {
94
+ cls.META_KEYS_STR: {
95
+ "type": "array",
96
+ "items": {
97
+ "type": "string",
98
+ },
99
+ "description": "A list of metadata keys that should be present in the model properties files.",
100
+ },
101
+ cls.ALLOW_EXTRA_KEYS_STR: {
102
+ "type": "boolean",
103
+ "default": False,
104
+ },
105
+ },
106
+ "required": [cls.META_KEYS_STR],
107
+ }
108
+ return config_schema
@@ -0,0 +1,64 @@
1
+ from typing import List
2
+
3
+ from datapilot.core.insights.utils import get_severity
4
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
5
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
7
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
8
+
9
+
10
+ class CheckModelHasPropertiesFile(ChecksInsight):
11
+ NAME = "Model has properties file"
12
+ ALIAS = "check_model_has_properties_file"
13
+ DESCRIPTION = "Models should have a properties/schema file (.yml) defined."
14
+ REASON_TO_FLAG = (
15
+ "Missing properties file for a model can lead to inadequate configuration and documentation, "
16
+ "resulting in potential issues in data processing and understanding."
17
+ )
18
+
19
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
20
+ insights = []
21
+ for node_id, node in self.nodes.items():
22
+ if self.should_skip_model(node_id):
23
+ self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
24
+ continue
25
+ if node.resource_type == AltimateResourceType.model:
26
+ status_code = self._check_properties_file(node_id)
27
+ if status_code == 1:
28
+ insights.append(
29
+ DBTModelInsightResponse(
30
+ unique_id=node_id,
31
+ package_name=node.package_name,
32
+ path=node.original_file_path,
33
+ original_file_path=node.original_file_path,
34
+ insight=self._build_failure_result(node_id),
35
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
36
+ )
37
+ )
38
+ return insights
39
+
40
+ def _build_failure_result(self, model_unique_id: str) -> DBTInsightResult:
41
+ failure_message = (
42
+ f"The model {model_unique_id} do not have a properties file (.yml) defined."
43
+ "Ensure that each model has a corresponding .yml file for additional configuration and documentation."
44
+ )
45
+ recommendation = (
46
+ "Add a properties file (.yml) for each model listed above. "
47
+ "Having a properties file helps in providing additional configuration and documentation for the model."
48
+ )
49
+
50
+ return DBTInsightResult(
51
+ type=self.TYPE,
52
+ name=self.NAME,
53
+ message=failure_message,
54
+ recommendation=recommendation,
55
+ reason_to_flag=self.REASON_TO_FLAG,
56
+ metadata={"model_unique_id": model_unique_id},
57
+ )
58
+
59
+ def _check_properties_file(self, node_id) -> int:
60
+ status_code = 0
61
+ node = self.get_node(node_id)
62
+ if node.resource_type == AltimateResourceType.model and not node.patch_path:
63
+ status_code = 1
64
+ return status_code
@@ -0,0 +1,118 @@
1
+ from typing import Dict
2
+ from typing import List
3
+
4
+ from datapilot.core.insights.utils import get_severity
5
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
7
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
8
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
9
+
10
+
11
+ class CheckModelHasTestsByGroup(ChecksInsight):
12
+ NAME = "Model has tests by group"
13
+ ALIAS = "check_model_has_tests_by_group"
14
+ DESCRIPTION = "Check if models have a number of tests for specific test groups."
15
+ REASON_TO_FLAG = "Models should have tests with specific groups for proper validation."
16
+ TESTS_LIST_STR = "tests"
17
+ TEST_GROUP_STR = "test_group"
18
+ TEST_COUNT_STR = "min_count"
19
+
20
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
21
+ self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
22
+ self.test_groups = {
23
+ tuple(test.get(self.TEST_GROUP_STR, [])): test.get(self.TEST_COUNT_STR, 0)
24
+ for test in self.test_list
25
+ if test.get(self.TEST_GROUP_STR)
26
+ }
27
+ if not self.test_groups:
28
+ self.logger.warning(f"No test groups found in the configuration for {self.ALIAS}. Skipping the insight.")
29
+ return []
30
+ insights = []
31
+ for node_id, node in self.nodes.items():
32
+ if self.should_skip_model(node_id):
33
+ self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
34
+ continue
35
+ if node.resource_type == AltimateResourceType.model:
36
+ missing_test_groups = self._model_has_tests_by_group(node_id)
37
+ if missing_test_groups:
38
+ insights.append(
39
+ DBTModelInsightResponse(
40
+ unique_id=node_id,
41
+ package_name=node.package_name,
42
+ path=node.original_file_path,
43
+ original_file_path=node.original_file_path,
44
+ insight=self._build_failure_result(node_id, missing_test_groups),
45
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
46
+ )
47
+ )
48
+ return insights
49
+
50
+ def _build_failure_result(self, model_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult:
51
+ missing_test_group_str = ""
52
+ for test in missing_test_groups:
53
+ missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
54
+
55
+ failure_message = f"The model `{model_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. "
56
+ recommendation = (
57
+ "Add tests with the specified groups for each model listed above. "
58
+ "Having tests with specific groups ensures proper validation and data integrity."
59
+ )
60
+
61
+ return DBTInsightResult(
62
+ type=self.TYPE,
63
+ name=self.NAME,
64
+ message=failure_message,
65
+ recommendation=recommendation,
66
+ reason_to_flag=self.REASON_TO_FLAG,
67
+ metadata={"model_unique_id": model_unique_id, "missing_test_groups": missing_test_groups},
68
+ )
69
+
70
+ def _model_has_tests_by_group(self, node_id) -> List[Dict]:
71
+ """
72
+ For model, check all dependencies and if node type is test, check if it has the required groups.
73
+ Only return true if all child.group in test_groups
74
+ """
75
+ test_group_count = {}
76
+ for child_id in self.children_map.get(node_id, []):
77
+ child = self.get_node(child_id)
78
+ if child.resource_type == AltimateResourceType.test:
79
+ for group in self.test_groups:
80
+ if child.name in group:
81
+ test_group_count[group] = test_group_count.get(group, 0) + 1
82
+ missing_test_groups = []
83
+ for group, count in self.test_groups.items():
84
+ if test_group_count.get(group, 0) < count:
85
+ missing_test_groups.append(
86
+ {self.TEST_GROUP_STR: group, self.TEST_COUNT_STR: count, "actual_count": test_group_count.get(group, 0)}
87
+ )
88
+
89
+ return missing_test_groups
90
+
91
+ @classmethod
92
+ def get_config_schema(cls):
93
+ config_schema = super().get_config_schema()
94
+ config_schema["config"] = {
95
+ "$schema": "http://json-schema.org/draft-07/schema#",
96
+ "type": "object",
97
+ "properties": {
98
+ cls.TESTS_LIST_STR: {
99
+ "type": "array",
100
+ "items": {
101
+ "type": "object",
102
+ "properties": {
103
+ cls.TEST_GROUP_STR: {
104
+ "type": "array",
105
+ "items": {"type": "string"},
106
+ "description": "List of tests part of a group. If a test is part of any of the groups, it will be counted.",
107
+ },
108
+ cls.TEST_COUNT_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
109
+ },
110
+ "required": [cls.TEST_GROUP_STR, cls.TEST_COUNT_STR],
111
+ },
112
+ "description": "A list of tests with names and minimum counts required.",
113
+ "default": [],
114
+ },
115
+ },
116
+ "required": [cls.TESTS_LIST_STR],
117
+ }
118
+ return config_schema
@@ -0,0 +1,114 @@
1
+ from typing import Dict
2
+ from typing import List
3
+
4
+ from datapilot.core.insights.utils import get_severity
5
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
7
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
8
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
9
+
10
+
11
+ class CheckModelHasTestsByName(ChecksInsight):
12
+ NAME = "Model has tests by name"
13
+ ALIAS = "check_model_has_tests_by_name"
14
+ DESCRIPTION = "Checks that the model has tests with specific names."
15
+ REASON_TO_FLAG = "Models should have tests with specific names for proper validation."
16
+ TESTS_LIST_STR = "tests"
17
+ TEST_NAME_STR = "test"
18
+ TEST_COUNT_STR = "min_count"
19
+
20
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
21
+ self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
22
+ self.tests = {
23
+ test.get(self.TEST_NAME_STR): test.get(self.TEST_COUNT_STR, 0) for test in self.test_list if test.get(self.TEST_NAME_STR)
24
+ }
25
+ if not self.tests:
26
+ self.logger.warning(f"No tests found in the configuration for {self.ALIAS}. Skipping the insight.")
27
+ return []
28
+ insights = []
29
+ for node_id, node in self.nodes.items():
30
+ if self.should_skip_model(node_id):
31
+ self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
32
+ continue
33
+ if node.resource_type == AltimateResourceType.model:
34
+ status, missing_tests = self._model_has_tests_by_name(node_id)
35
+ if not status:
36
+ insights.append(
37
+ DBTModelInsightResponse(
38
+ unique_id=node_id,
39
+ package_name=node.package_name,
40
+ path=node.original_file_path,
41
+ original_file_path=node.original_file_path,
42
+ insight=self._build_failure_result(node_id, missing_tests),
43
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
44
+ )
45
+ )
46
+ return insights
47
+
48
+ def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
49
+ tests_str = ""
50
+ for test in missing_tests:
51
+ tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
52
+
53
+ failure_message = f"The model `{model_unique_id}` does not have enough tests:\n{tests_str}. "
54
+ recommendation = (
55
+ "Add tests with the specified names for each model listed above. "
56
+ "Having tests with specific names ensures proper validation and data integrity."
57
+ )
58
+
59
+ return DBTInsightResult(
60
+ type=self.TYPE,
61
+ name=self.NAME,
62
+ message=failure_message,
63
+ recommendation=recommendation,
64
+ reason_to_flag=self.REASON_TO_FLAG,
65
+ metadata={"model_unique_id": model_unique_id},
66
+ )
67
+
68
+ def _model_has_tests_by_name(self, node_id) -> bool:
69
+ """
70
+ For model, check all dependencies and if node type is test, check if it has the required names.
71
+ Only return true if all child.name in test_names
72
+ """
73
+ test_count = {}
74
+
75
+ for child_id in self.children_map.get(node_id, []):
76
+ child = self.get_node(child_id)
77
+ if child.resource_type == AltimateResourceType.test:
78
+ test_name = child.name
79
+ test_count[test_name] = test_count.get(test_name, 0) + 1
80
+
81
+ missing_tests = []
82
+ for test_name, min_count in self.tests.items():
83
+ if test_count.get(test_name, 0) < min_count:
84
+ missing_tests.append({"test_name": test_name, "min_count": min_count, "actual_count": test_count.get(test_name, 0)})
85
+
86
+ if missing_tests:
87
+ return False, missing_tests
88
+
89
+ return True, None
90
+
91
+ @classmethod
92
+ def get_config_schema(cls):
93
+ config_schema = super().get_config_schema()
94
+ config_schema["config"] = {
95
+ "$schema": "http://json-schema.org/draft-07/schema#",
96
+ "type": "object",
97
+ "properties": {
98
+ cls.TESTS_LIST_STR: {
99
+ "type": "array",
100
+ "items": {
101
+ "type": "object",
102
+ "properties": {
103
+ cls.TEST_NAME_STR: {"type": "string", "description": "The name of the test"},
104
+ cls.TEST_COUNT_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
105
+ },
106
+ "required": [cls.TEST_NAME_STR, cls.TEST_COUNT_STR],
107
+ },
108
+ "description": "A list of tests with names and minimum counts required.",
109
+ "default": [],
110
+ },
111
+ },
112
+ "required": [cls.TESTS_LIST_STR],
113
+ }
114
+ return config_schema
@@ -0,0 +1,119 @@
1
+ from typing import Dict
2
+ from typing import List
3
+
4
+ from datapilot.core.insights.utils import get_severity
5
+ from datapilot.core.platforms.dbt.insights.checks.base import ChecksInsight
6
+ from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
7
+ from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
8
+ from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
9
+
10
+
11
+ class CheckModelHasTestsByType(ChecksInsight):
12
+ NAME = "Model has tests by type"
13
+ ALIAS = "check_model_has_tests_by_type"
14
+ DESCRIPTION = "Checks that the model has tests with specific types."
15
+ REASON_TO_FLAG = "Models should have tests with specific types for proper validation."
16
+ TESTS_LIST_STR = "tests"
17
+ TEST_TYPE_STR = "test"
18
+ TEST_COUNT_STR = "min_count"
19
+
20
+ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
21
+ self.test_list = self.get_check_config(self.TESTS_LIST_STR) or []
22
+ self.tests = {
23
+ test.get(self.TEST_TYPE_STR): test.get(self.TEST_COUNT_STR, 0) for test in self.test_list if test.get(self.TEST_TYPE_STR)
24
+ }
25
+ if not self.tests:
26
+ self.logger.warning(f"No tests found in the configuration for {self.ALIAS}. Skipping the insight.")
27
+ return []
28
+
29
+ insights = []
30
+ for node_id, node in self.nodes.items():
31
+ if self.should_skip_model(node_id):
32
+ self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
33
+ continue
34
+ if node.resource_type == AltimateResourceType.model:
35
+ missing_tests = self._model_has_tests_by_type(node_id)
36
+ if missing_tests:
37
+ insights.append(
38
+ DBTModelInsightResponse(
39
+ unique_id=node_id,
40
+ package_name=node.package_name,
41
+ path=node.original_file_path,
42
+ original_file_path=node.original_file_path,
43
+ insight=self._build_failure_result(node_id, missing_tests),
44
+ severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
45
+ )
46
+ )
47
+ return insights
48
+
49
+ def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
50
+ missing_test_type_str = ""
51
+ for test in missing_tests:
52
+ missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
53
+
54
+ failure_message = f"The model `{model_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. "
55
+ recommendation = (
56
+ "Add tests with the specified names for each model listed above. "
57
+ "Having tests with specific names ensures proper validation and data integrity."
58
+ )
59
+
60
+ return DBTInsightResult(
61
+ type=self.TYPE,
62
+ name=self.NAME,
63
+ message=failure_message,
64
+ recommendation=recommendation,
65
+ reason_to_flag=self.REASON_TO_FLAG,
66
+ metadata={"model_unique_id": model_unique_id, "missing_tests": missing_tests},
67
+ )
68
+
69
+ def _model_has_tests_by_type(self, node_id) -> bool:
70
+ """
71
+ For model, check all dependencies and if node type is test, check if it has the required types.
72
+ Only return true if all child.type in test_types
73
+ """
74
+ test_count = {}
75
+ for child_id in self.children_map.get(node_id, []):
76
+ child = self.get_node(child_id)
77
+ if child.resource_type == AltimateResourceType.test:
78
+ self.logger.info(f"child_id: {child_id}, child: {type(child)}")
79
+ test_count[child.test_type] = test_count.get(child.test_type, 0) + 1
80
+ missing_tests = []
81
+ for test_type in self.tests.keys():
82
+ if test_count.get(test_type, 0) < self.tests.get(test_type, 0):
83
+ missing_tests.append(
84
+ {
85
+ self.TEST_TYPE_STR: test_type,
86
+ self.TEST_COUNT_STR: self.tests.get(test_type, 0),
87
+ "actual_count": test_count.get(test_type, 0),
88
+ }
89
+ )
90
+ return missing_tests
91
+
92
+ @classmethod
93
+ def get_config_schema(cls):
94
+ config_schema = super().get_config_schema()
95
+ config_schema["config"] = {
96
+ "$schema": "http://json-schema.org/draft-07/schema#",
97
+ "type": "object",
98
+ "properties": {
99
+ cls.TESTS_LIST_STR: {
100
+ "type": "array",
101
+ "items": {
102
+ "type": "object",
103
+ "properties": {
104
+ cls.TEST_TYPE_STR: {
105
+ "type": "string",
106
+ "enum": ["singul", "data"],
107
+ "description": "The type of the test",
108
+ },
109
+ cls.TEST_COUNT_STR: {"type": "integer", "description": "The minimum number of tests required", "default": 1},
110
+ },
111
+ "required": [cls.TEST_TYPE_STR, cls.TEST_COUNT_STR],
112
+ },
113
+ "description": "A list of tests with names and minimum counts required.",
114
+ "default": [],
115
+ },
116
+ },
117
+ "required": [cls.TESTS_LIST_STR],
118
+ }
119
+ return config_schema