PyPI - altimate-datapilot-cli - Versions diffs - 0.0.8__py3-none-any.whl - Mend

altimate-datapilot-cli 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

datapilot/core/platforms/dbt/insights/structure/model_directories_structure.py ADDED Viewed

@@ -0,0 +1,92 @@
+from typing import List
+from typing import Optional
+from datapilot.config.utils import get_regex_configuration
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.constants import OTHER
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.insights.structure.base import DBTStructureInsight
+from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
+from datapilot.core.platforms.dbt.utils import _check_model_folder_convention
+from datapilot.core.platforms.dbt.utils import classify_model_type
+class DBTModelDirectoryStructure(DBTStructureInsight):
+    """
+    DBTModelDirectoryStructure checks if models are placed in the correct directories.
+    """
+    NAME = "Bad model directory structure"
+    ALIAS = "model_directory_structure"
+    DESCRIPTION = "This rule identifies models that are not placed in their correct directories. "
+    REASON_TO_FLAG = (
+        "Placing models in the correct directories is vital for maintaining a structured and "
+        "efficient data warehouse. Incorrectly placed models can lead to confusion, hinder "
+        "discoverability, and complicate maintenance and scaling of the dbt project."
+    )
+    FAILURE_MESSAGE = (
+        "Incorrect Directory Placement Detected: The model `{model_unique_id}` is incorrectly "
+        "placed in the current directory. As a `{model_type}` model, it should be located in "
+        "the `{convention}` directory."
+    )
+    RECOMMENDATION = (
+        "To resolve this issue, please move the model `{model_unique_id}` to the `{convention}` "
+        "directory. This change will align the model's location with the established directory "
+        "structure, improving organization and ease of access in your dbt project."
+    )
+    def _build_failure_result(self, model_unique_id: str, model_type: str, convention: Optional[str]) -> DBTInsightResult:
+        failure_message = self.FAILURE_MESSAGE.format(
+            model_unique_id=model_unique_id,
+            model_type=model_type,
+            convention=convention,
+        )
+        return DBTInsightResult(
+            name=self.NAME,
+            type=self.TYPE,
+            message=failure_message,
+            recommendation=self.RECOMMENDATION.format(model_unique_id=model_unique_id, convention=convention),
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "model": model_unique_id,
+                "model_type": model_type,
+                "convention": convention,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        insights = []
+        regex_configuration = get_regex_configuration(self.config)
+        for node in self.nodes.values():
+            if self.should_skip_model(node.unique_id):
+                self.logger.debug(f"Skipping model {node.unique_id} as it is not enabled for selected models")
+                continue
+            if node.resource_type == AltimateResourceType.model:
+                model_type = classify_model_type(node.name, node.original_file_path, regex_configuration)
+                if model_type == OTHER:
+                    continue
+                valid_convention, message = _check_model_folder_convention(
+                    model_type,
+                    node.original_file_path,
+                    regex_configuration,
+                    node=node,
+                    sources=self.sources,
+                )
+                if not valid_convention:
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=node.unique_id,
+                            package_name=node.package_name,
+                            path=node.path,
+                            original_file_path=node.original_file_path,
+                            insight=self._build_failure_result(
+                                model_unique_id=node.unique_id,
+                                model_type=model_type,
+                                convention=message,
+                            ),
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+        return insights

datapilot/core/platforms/dbt/insights/structure/model_naming_conventions.py ADDED Viewed

@@ -0,0 +1,97 @@
+from typing import List
+from typing import Optional
+from datapilot.config.utils import get_regex_configuration
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.constants import MODEL
+from datapilot.core.platforms.dbt.constants import OTHER
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.insights.structure.base import DBTStructureInsight
+from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
+from datapilot.core.platforms.dbt.utils import _check_model_naming_convention
+from datapilot.core.platforms.dbt.utils import classify_model_type
+class DBTModelNamingConvention(DBTStructureInsight):
+    """
+    DBTModelNamingConvention identifies models that do not follow the naming convention.
+    """
+    NAME = "Bad model naming convention"
+    ALIAS = "model_naming_convention_check"
+    DESCRIPTION = "This rule identifies models that do not follow the naming convention."
+    REASON_TO_FLAG = (
+        "Inconsistent or unclear naming conventions can lead to confusion and errors in querying the data warehouse. "
+        "A well-defined naming convention clarifies the model type and purpose, promoting better understanding "
+        "and effective data management. This rule flags models that deviate from established naming standards."
+    )
+    FAILURE_MESSAGE = (
+        "Naming Convention Violation Detected: The model `{model_unique_id}` does not comply with the "
+        "established naming convention. It is identified as a `{model_type}` model, but its name does not "
+        "reflect the required prefix or convention `{convention}`. Please update the model name to align "
+        "with the naming standards."
+    )
+    RECOMMENDATION = "Please rename the model `{model_unique_id}` to follow the appropriate naming convention. "
+    def _build_failure_result(self, model_unique_id: str, model_type: str, convention: Optional[str]) -> DBTInsightResult:
+        if model_type != OTHER:
+            failure_message = self.FAILURE_MESSAGE.format(
+                model_unique_id=model_unique_id,
+                model_type=model_type,
+                convention=convention,
+            )
+        else:
+            failure_message = (
+                f"The model `{model_unique_id}` was not classified as any of the known model types. "
+                "The naming conventions for it may not be appropriate"
+            )
+        return DBTInsightResult(
+            name=self.NAME,
+            type=self.TYPE,
+            message=failure_message,
+            recommendation=self.RECOMMENDATION.format(model_unique_id=model_unique_id),
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "model": model_unique_id,
+                "model_type": model_type,
+                "convention": convention,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        insights = []
+        regex_configuration = get_regex_configuration(self.config)
+        for node in self.nodes.values():
+            if self.should_skip_model(node.unique_id):
+                self.logger.debug(f"Skipping model {node.unique_id} as it is not enabled for selected models")
+                continue
+            if node.resource_type == AltimateResourceType.model:
+                model_type = classify_model_type(node.name, node.original_file_path, regex_configuration)
+                if model_type == OTHER:
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=node.unique_id,
+                            package_name=node.package_name,
+                            path=node.path,
+                            original_file_path=node.original_file_path,
+                            insight=self._build_failure_result(node.unique_id, model_type, None),
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+                    continue
+                valid_name, expected_model_type = _check_model_naming_convention(node.name, model_type, regex_configuration.get(MODEL))
+                if not valid_name:
+                    insight_result = self._build_failure_result(node.unique_id, model_type, expected_model_type)
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=node.unique_id,
+                            package_name=node.package_name,
+                            path=node.path,
+                            original_file_path=node.original_file_path,
+                            insight=insight_result,
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+        return insights

datapilot/core/platforms/dbt/insights/structure/source_directories_structure.py ADDED Viewed

@@ -0,0 +1,80 @@
+from typing import List
+from typing import Optional
+from datapilot.config.utils import get_regex_configuration
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.insights.structure.base import DBTStructureInsight
+from datapilot.core.platforms.dbt.utils import _check_source_folder_convention
+class DBTSourceDirectoryStructure(DBTStructureInsight):
+    """
+    DBTSourcesDirectoryStructure checks if sources are placed in the correct directories.
+    """
+    NAME = "Bad source directory structure"
+    ALIAS = "source_directory_structure"
+    DESCRIPTION = "This rule identifies sources that are not placed in their correct directories. "
+    REASON_TO_FLAG = (
+        "Sources need to be organized in the correct directories to ensure an efficient and "
+        "maintainable data architecture. Proper directory structure facilitates easy navigation, "
+        "improves readability, and aids in managing the data sources effectively."
+    )
+    FAILURE_MESSAGE = (
+        "Inappropriate Directory Placement Detected: The source file for {source_id} is currently "
+        "placed in an incorrect directory. This can lead to organizational issues and hinder "
+        "efficient source management."
+    )
+    RECOMMENDATION = (
+        "To address this issue, please move the source file for {source_id} to the appropriate "
+        "directory. The recommended directory structure is {convention}, which aligns with best "
+        "practices for organizing source files in dbt projects."
+    )
+    def _build_failure_result(self, model_unique_id: str, convention: Optional[str]) -> DBTInsightResult:
+        failure_message = self.FAILURE_MESSAGE.format(
+            source_id=model_unique_id,
+        )
+        return DBTInsightResult(
+            name=self.NAME,
+            type=self.TYPE,
+            message=failure_message,
+            recommendation=self.RECOMMENDATION.format(source_id=model_unique_id, convention=convention),
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "source_id": model_unique_id,
+                "convention": convention,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        insights = []
+        regex_configuration = get_regex_configuration(self.config)
+        for source_id, source in self.sources.items():
+            if self.should_skip_model(source_id):
+                self.logger.debug(f"Skipping model {source_id} as it is not enabled for selected models")
+                continue
+            valid_convention, expected_directory = _check_source_folder_convention(
+                source_name=source.source_name,
+                folder_path=source.original_file_path,
+                patterns=regex_configuration,
+            )
+            if not valid_convention:
+                insight = self._build_failure_result(
+                    model_unique_id=source_id,
+                    convention=expected_directory,
+                )
+                insights.append(
+                    DBTModelInsightResponse(
+                        unique_id=source.unique_id,
+                        package_name=source.package_name,
+                        path=source.path,
+                        original_file_path=source.original_file_path,
+                        insight=insight,
+                        severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                    )
+                )
+        return insights

datapilot/core/platforms/dbt/insights/structure/test_directory_structure.py ADDED Viewed

@@ -0,0 +1,74 @@
+from typing import List
+from typing import Optional
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.insights.structure.base import DBTStructureInsight
+from datapilot.utils.utils import get_dir_path
+class DBTTestDirectoryStructure(DBTStructureInsight):
+    """
+    DBTTestDirectoryStructure checks if tests are placed in the correct directories.
+    """
+    NAME = "Bad test directory structure"
+    ALIAS = "test_directory_structure"
+    DESCRIPTION = "This rule checks if tests are correctly placed in the same directories as their corresponding models."
+    REASON_TO_FLAG = (
+        "It is important for tests to be placed in the same directory as their corresponding models to maintain "
+        "a coherent and easy-to-navigate project structure. This practice enhances the ease of understanding "
+        "and updating tests in parallel with model changes."
+    )
+    FAILURE_MESSAGE = (
+        "Incorrect Test Placement Detected: The test `{model_unique_id}` is not in the correct directory. "
+        "For consistent project structure and easy maintenance, it should be placed in the same directory as "
+        "its corresponding model."
+    )
+    RECOMMENDATION = (
+        "To rectify this, move the test `{model_unique_id}` to the directory `{convention}`, where its corresponding "
+        "model is located. This adjustment will align your test's location with best practices for"
+        " project organization."
+    )
+    def _build_failure_result(self, model_unique_id: str, convention: Optional[str]) -> DBTInsightResult:
+        failure_message = self.FAILURE_MESSAGE.format(
+            model_unique_id=model_unique_id,
+        )
+        return DBTInsightResult(
+            name=self.NAME,
+            type=self.TYPE,
+            message=failure_message,
+            recommendation=self.RECOMMENDATION.format(model_unique_id=model_unique_id, convention=convention),
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "model": model_unique_id,
+                "convention": convention,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        insights = []
+        for test_id, test in self.tests.items():
+            if self.should_skip_model(test_id):
+                self.logger.debug(f"Skipping model {test_id} as it is not enabled for selected models")
+                continue
+            test_file_path = get_dir_path(test_id)
+            for node_id in test.depends_on.nodes:
+                node = self.get_node(node_id)
+                if not node:
+                    continue
+                expected_dir_path = get_dir_path(node_id)
+                if expected_dir_path != test_file_path:
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=test_id,
+                            package_name=test.package_name,
+                            path=test.path,
+                            original_file_path=test.original_file_path,
+                            insight=self._build_failure_result(test_id, expected_dir_path),
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+        return insights

datapilot/core/platforms/dbt/insights/utils.py ADDED Viewed

@@ -0,0 +1,9 @@
+from datapilot.core.platforms.dbt.insights import INSIGHTS
+def get_insight_with_configs():
+    return [insight.get_config_schema() for insight in INSIGHTS]
+def insights_require_catalog(insights):
+    return any(insight.requires_catalog() for insight in insights)

datapilot/core/platforms/dbt/schemas/__init__.py ADDED Viewed

File without changes

datapilot/core/platforms/dbt/schemas/catalog.py ADDED Viewed

@@ -0,0 +1,73 @@
+from datetime import datetime
+from typing import ClassVar
+from typing import Dict
+from typing import List
+from typing import Optional
+from typing import Union
+from pydantic.config import Extra
+from pydantic.main import BaseModel
+class AltimateCatalogMetadata(BaseModel):
+    class Config:
+        extra = Extra.forbid
+    dbt_schema_version: Optional[str] = "https://schemas.getdbt.com/dbt/catalog/v1.json"
+    dbt_version: Optional[str] = "0.19.0"
+    generated_at: Optional[datetime] = "2021-02-10T04:42:33.680487Z"
+    invocation_id: Optional[Optional[str]] = None
+    env: ClassVar[Optional[Dict[str, str]]] = {}
+class AltimateCatalogTableMetadata(BaseModel):
+    class Config:
+        extra = Extra.forbid
+    type: str
+    database: Optional[Optional[str]] = None
+    schema_name: str
+    name: str
+    comment: Optional[Optional[str]] = None
+    owner: Optional[Optional[str]] = None
+class AltimateCatalogColumnMetadata(BaseModel):
+    class Config:
+        extra = Extra.forbid
+    type: str
+    comment: Optional[Optional[str]] = None
+    index: int
+    name: str
+class AltimateCatalogStatsItem(BaseModel):
+    class Config:
+        extra = Extra.forbid
+    id: str
+    label: str
+    value: Optional[Optional[Union[bool, str, float]]] = None
+    description: Optional[Optional[str]] = None
+    include: bool
+class AltimateCatalogTable(BaseModel):
+    class Config:
+        extra = Extra.forbid
+    metadata: AltimateCatalogTableMetadata
+    columns: Dict[str, AltimateCatalogColumnMetadata]
+    stats: Dict[str, AltimateCatalogStatsItem]
+    unique_id: Optional[Optional[str]] = None
+class AltimateCatalogCatalogV1(BaseModel):
+    class Config:
+        extra = Extra.forbid
+    metadata: AltimateCatalogMetadata
+    nodes: Dict[str, AltimateCatalogTable]
+    sources: Dict[str, AltimateCatalogTable]
+    errors: Optional[Optional[List[str]]] = None