PyPI - altimate-datapilot-cli - Versions diffs - 0.0.8__py3-none-any.whl - Mend

altimate-datapilot-cli 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

datapilot/core/platforms/dbt/insights/modelling/duplicate_sources.py ADDED Viewed

@@ -0,0 +1,85 @@
+from collections import defaultdict
+from typing import List
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.modelling.base import DBTModellingInsight
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTProjectInsightResponse
+from datapilot.core.platforms.dbt.utils import get_table_name_from_source
+from datapilot.utils.formatting.utils import numbered_list
+class DBTDuplicateSources(DBTModellingInsight):
+    """
+    Check if the DBT project has duplicate sources.
+    Ref: https://github.com/dbt-labs/dbt-project-evaluator/blob/main/models/marts/dag/fct_duplicate_sources.sql
+    """
+    NAME = "Duplicate sources"
+    ALIAS = "Duplicate_Sources"
+    DESCRIPTION = "Duplicate sources should be avoided."
+    REASON_TO_FLAG = (
+        "Having multiple source nodes pointing to the same database location can lead to an inaccurate "
+        "representation of data lineage and potential confusion in data management."
+    )
+    FAILURE_MESSAGE = (
+        "Duplicate source nodes detected: Multiple source nodes are referencing the same database object. "
+        "Database location {source_table} is referenced by:\n {source_nodes_list}"
+    )
+    RECOMMENDATION = (
+        "Consolidate the duplicate source nodes so that each database location has only a single source definition "
+        "in your dbt project. This will help maintain clear and accurate data lineage."
+    )
+    def _build_failure_result(self, source_table: str, source_ids: List[str]) -> DBTInsightResult:
+        """
+        Build Insight result if a source table has multiple source models defined.
+        :param source_table: Name of the source table.
+        :param source_ids: List of source IDs which are referencing the source table.
+        :return: An instance of DBTInsightResult containing failure message and recommendation and metadata.
+        """
+        self.logger.debug(f"Building failure result for source table {source_table}")
+        return DBTInsightResult(
+            name=self.NAME,
+            type=self.TYPE,
+            reason_to_flag=self.REASON_TO_FLAG,
+            message=self.FAILURE_MESSAGE.format(source_table=source_table, source_nodes_list=numbered_list(source_ids)),
+            recommendation=self.RECOMMENDATION.format(source_table=source_table),
+            metadata={
+                "source_table": source_table,
+                "source_ids": source_ids,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTProjectInsightResponse]:
+        """
+        Generate a list of InsightResponse objects for each model in the DBT project,
+        containing insights about direct source dependencies.
+        :return: A list of InsightResponse objects.
+        """
+        self.logger.debug(f"Generating insights for DBTDuplicateSources for project {self.project_name}")
+        source_table_to_id_map = defaultdict(list)
+        for source_id, source in self.sources.items():
+            table_name = get_table_name_from_source(source)
+            source_table_to_id_map[table_name].append(source_id)
+        self.logger.debug(f"source_table_to_id_map: {source_table_to_id_map}")
+        insight_results = []
+        for source_table, source_ids in source_table_to_id_map.items():
+            if len(source_ids) > 1:
+                insight_results.append(self._build_failure_result(source_table, source_ids))
+        if insight_results:
+            self.logger.debug("Duplicate source models found")
+            return [
+                DBTProjectInsightResponse(
+                    package_name=self.project_name,
+                    insights=insight_results,
+                    severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                )
+            ]
+        self.logger.debug("No duplicate sources found")
+        return []

datapilot/core/platforms/dbt/insights/modelling/hard_coded_references.py ADDED Viewed

@@ -0,0 +1,80 @@
+from typing import List
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.constants import SQL
+from datapilot.core.platforms.dbt.insights.modelling.base import DBTModellingInsight
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
+from datapilot.core.platforms.dbt.utils import get_hard_coded_references
+from datapilot.utils.formatting.utils import numbered_list
+class DBTHardCodedReferences(DBTModellingInsight):
+    """
+    Checks if the dbt model has hard coded references to other models.
+    """
+    NAME = "Hard coded references"
+    ALIAS = "hard_coded_references"
+    DESCRIPTION = "Models should not have hard-coded references to tables"
+    REASON_TO_FLAG = (
+        "Hard-coded references in SQL prevent easy identification and tracking of data lineage, "
+        "and can lead to issues in maintainability and scalability of the data models."
+    )
+    SOURCE_FANOUT_THRESHOLD = 1  # Default threshold, can be overridden as needed
+    FAILURE_MESSAGE = (
+        "Model `{model_unique_id}` contains hard-coded references, which may obscure data lineage. "
+        "Detected hard-coded references: \n{hard_coded_references}"
+    )
+    RECOMMENDATION = (
+        "Replace hard-coded references in `{model_unique_id}` with dbt sources or model references to "
+        "improve clarity and maintainability of data lineage."
+    )
+    def _build_failure_result(self, model_unique_id: str, hard_coded_references: List[str]) -> DBTInsightResult:
+        failure_message = self.FAILURE_MESSAGE.format(
+            model_unique_id=model_unique_id,
+            hard_coded_references=numbered_list(hard_coded_references),
+        )
+        return DBTInsightResult(
+            name=self.NAME,
+            type=self.TYPE,
+            message=failure_message,
+            recommendation=self.RECOMMENDATION.format(model_unique_id=model_unique_id),
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "model": model_unique_id,
+                "hard_coded_references": hard_coded_references,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        insights = []
+        for node in self.nodes.values():
+            if self.should_skip_model(node.unique_id):
+                self.logger.debug(f"Skipping model {node.unique_id} as it is not enabled for selected models")
+                continue
+            if node.resource_type == AltimateResourceType.model:
+                raw_code = node.raw_code
+                if (not raw_code) or node.language != SQL:
+                    continue
+                hard_coded_references = get_hard_coded_references(raw_code)
+                if hard_coded_references:
+                    insight_result = self._build_failure_result(
+                        model_unique_id=node.unique_id,
+                        hard_coded_references=hard_coded_references,
+                    )
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=node.unique_id,
+                            package_name=node.package_name,
+                            path=node.path,
+                            original_file_path=node.original_file_path,
+                            insight=insight_result,
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+        return insights

datapilot/core/platforms/dbt/insights/modelling/joining_of_upstream_concepts.py ADDED Viewed

@@ -0,0 +1,79 @@
+from typing import List
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.modelling.base import DBTModellingInsight
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+class DBTRejoiningOfUpstreamConcepts(DBTModellingInsight):
+    """
+    DBTRejoiningOfUpstreamConcepts identifies cases in the dbt project where a parent model's direct child
+    is also the direct child of another one of the parent's direct children, with the condition that the intermediate
+    model has no other downstream dependencies.
+    """
+    NAME = "Rejoining of upstream Concepts"
+    ALIAS = "rejoining_upstream_concepts"
+    DESCRIPTION = (
+        "Detects scenarios where a parent's direct child is also a direct child of another one " "of the parent's direct children."
+    )
+    REASON_TO_FLAG = (
+        "Flagged to identify cases where a parent model has a direct child that is also a direct child "
+        "of another one of the parent's direct children. Such patterns can suggest loops or redundancies in the DAG."
+    )
+    FAILURE_MESSAGE = (
+        "Model `{child}` has a rejoining upstream concept with parent model `{parent_model}` "
+        "and downstream child: `{downstream_child}`. This may indicate a loop or redundancy in the DAG."
+    )
+    RECOMMENDATION = (
+        "Review and potentially refactor the model relationships in `{child}`,"
+        " `{parent_model}`, and `{downstream_child}` to simplify the DAG and "
+        "avoid unnecessary complexity or potential loops."
+    )
+    def _build_failure_result(self, child: str, parent_model: str, children_list: List[str]) -> DBTInsightResult:
+        failure_message = self.FAILURE_MESSAGE.format(child=child, parent_model=parent_model, downstream_child=children_list[0])
+        recommendation = self.RECOMMENDATION.format(child=child, parent_model=parent_model, downstream_child=children_list[0])
+        return DBTInsightResult(
+            type=self.TYPE,
+            name=self.NAME,
+            message=failure_message,
+            recommendation=recommendation,
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "model": parent_model,
+                "children": children_list,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        insights = []
+        for parent_model, children in self.children_map.items():
+            for child in children:
+                child_child_is_also_parent_child = any(
+                    dwn_stream_child in self.children_map[child] for dwn_stream_child in self.children_map[parent_model]
+                )
+                if child_child_is_also_parent_child and len(self.children_map[child]) == 1:
+                    insight_result = self._build_failure_result(
+                        child=child,
+                        parent_model=parent_model,
+                        children_list=list(self.children_map[child]),
+                    )
+                    child_node = self.get_node(child)
+                    if self.should_skip_model(child_node.unique_id):
+                        self.logger.debug(f"Skipping model {child_node.unique_id} as it is not enabled for selected models")
+                        continue
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=child_node.unique_id,
+                            package_name=child_node.package_name,
+                            path=child_node.path,
+                            original_file_path=child_node.original_file_path,
+                            insight=insight_result,
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+        return insights

datapilot/core/platforms/dbt/insights/modelling/model_fanout.py ADDED Viewed

@@ -0,0 +1,126 @@
+from typing import List
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.modelling.base import DBTModellingInsight
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
+class DBTModelFanout(DBTModellingInsight):
+    """
+    DBTModelFanout identifies parent models in a dbt project with more than a specified number
+    of direct leaf children, indicating a high model fanout.
+    """
+    NAME = "Model fanout analysis"
+    ALIAS = "model_fanout"
+    DESCRIPTION = "Identifies parent models with an unusually high number of children. "
+    REASON_TO_FLAG = (
+        "Flagged to highlight parent models with an unusually high number of leaf children. This can suggest areas "
+        "in the data pipeline where complexity is increased and transformations might be optimized."
+    )
+    FANOUT_THRESHOLD = 3  # Default threshold, can be overridden as needed
+    FAILURE_MESSAGE = (
+        "Model `{parent_model_unique_id}` has `{leaf_children}` leaf children, "
+        "exceeding the fanout threshold of `{fanout_threshold}`. This level of fanout may lead to increased complexity."
+    )
+    RECOMMENDATION = (
+        "Consider reviewing and restructuring `{parent_model_unique_id}` to simplify its dependencies. "
+        "Reducing the number of leaf children can lead to a more streamlined and maintainable data pipeline."
+    )
+    FANOUT_THRESHOLD_STR = "max_fanout"
+    def _build_failure_result(
+        self,
+        parent_model_unique_id: str,
+        leaf_children: List[str],
+        fanout_threshold: int,
+    ) -> DBTInsightResult:
+        # Logic to build the failure result
+        self.logger.debug(f"Found {len(leaf_children)} leaf children for {parent_model_unique_id}")
+        failure_message = self.FAILURE_MESSAGE.format(
+            parent_model_unique_id=parent_model_unique_id,
+            leaf_children=len(leaf_children),
+            fanout_threshold=fanout_threshold,
+        )
+        recommendation = self.RECOMMENDATION.format(
+            parent_model_unique_id=parent_model_unique_id,
+        )
+        return DBTInsightResult(
+            type=self.TYPE,
+            name=self.NAME,
+            message=failure_message,
+            recommendation=recommendation,
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "model": parent_model_unique_id,
+                "leaf_children_count": len(leaf_children),
+                "leaf_children": leaf_children,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        fanout_threshold = self.get_check_config(self.FANOUT_THRESHOLD_STR) or self.FANOUT_THRESHOLD
+        insights = []
+        self.logger.debug(f"Checking for models with fanout greater than {fanout_threshold}")
+        for parent, children_set in self.children_map.items():
+            if self.should_skip_model(parent):
+                self.logger.debug(f"Skipping model {parent} as it is not enabled for selected models")
+                continue
+            node = self.get_node(parent)
+            if node.resource_type != AltimateResourceType.model:
+                continue
+            leaf_children = [
+                child
+                for child in children_set
+                if len(self.children_map[child]) == 0
+                and self.get_node(child).resource_type
+                not in [
+                    AltimateResourceType.test,
+                    AltimateResourceType.analysis,
+                    AltimateResourceType.metric,
+                ]
+            ]
+            if len(leaf_children) > fanout_threshold:
+                insight_result = self._build_failure_result(parent, leaf_children, fanout_threshold)
+                insights.append(
+                    DBTModelInsightResponse(
+                        unique_id=parent,
+                        package_name=node.package_name,
+                        path=node.path,
+                        original_file_path=node.original_file_path,
+                        insight=insight_result,
+                        severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                    )
+                )
+        self.logger.debug(f"Found {len(insights)} models with high fanout")
+        return insights
+    @classmethod
+    def get_config_schema(cls):
+        """
+        :return: The configuration schema for the test coverage insight.
+        """
+        config_schema = super().get_config_schema()
+        config_schema["config"] = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+                cls.FANOUT_THRESHOLD_STR: {
+                    "type": "integer",
+                    "description": "The maximum number of direct leaf children a model can have before being flagged.",
+                    "default": cls.FANOUT_THRESHOLD,
+                },
+            },
+            "required": [cls.FANOUT_THRESHOLD_STR],
+        }
+        return config_schema

datapilot/core/platforms/dbt/insights/modelling/multiple_sources_joined.py ADDED Viewed

@@ -0,0 +1,83 @@
+from typing import List
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.modelling.base import DBTModellingInsight
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
+from datapilot.utils.formatting.utils import numbered_list
+class DBTModelsMultipleSourcesJoined(DBTModellingInsight):
+    """
+    DBTModelsMultipleSourcesJoined identifies models in a dbt project that reference more than one source.
+    """
+    NAME = "Multiple sources joined"
+    ALIAS = "multiple_sources_joined"
+    DESCRIPTION = "Models should not directly join multiple sources."
+    REASON_TO_FLAG = (
+        "Best practice is to have a single staging model per source and use this staging model as a "
+        "dependency for downstream models. Directly joining multiple sources in a single model can "
+        "lead to data management complexities and inconsistencies."
+    )
+    FAILURE_MESSAGE = (
+        "Model `{model_id}` directly uses multiple sources, which may complicate data management and lineage tracking. "
+        "Detected sources: \n{sources_list}"
+    )
+    RECOMMENDATION = (
+        "Consider refactoring `{model_id}` to reference a single source or "
+        "intermediate models that consolidate these sources. This approach simplifies data lineage"
+        " and improves maintainability."
+    )
+    def _build_failure_result(self, model_id: str, source_dependencies: List[str]) -> DBTInsightResult:
+        failure = self.FAILURE_MESSAGE.format(
+            model_id=model_id,
+            sources_list=numbered_list(source_dependencies),
+        )
+        recommendation = self.RECOMMENDATION.format(model_id=model_id)
+        return DBTInsightResult(
+            type=self.TYPE,
+            name=self.NAME,
+            message=failure,
+            recommendation=recommendation,
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "model": model_id,
+                "source_dependencies": source_dependencies,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        self.logger.debug(f"Generating insights for DBTModelsMultipleSourcesJoined for project {self.manifest.get_package()}")
+        insights = []
+        for node_id, node in self.nodes.items():
+            if self.should_skip_model(node_id):
+                self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
+                continue
+            if node.resource_type == AltimateResourceType.model:
+                source_dependencies = [
+                    dependent_node_id
+                    for dependent_node_id in node.depends_on.nodes
+                    if self.get_node(dependent_node_id).resource_type == AltimateResourceType.source
+                ]
+                if len(source_dependencies) > 1:
+                    self.logger.debug(f"Model {node_id} references multiple sources")
+                    insight_result = self._build_failure_result(node_id, source_dependencies)
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=node_id,
+                            package_name=node.package_name,
+                            path=node.path,
+                            original_file_path=node.original_file_path,
+                            insight=insight_result,
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+        return insights

datapilot/core/platforms/dbt/insights/modelling/root_model.py ADDED Viewed

@@ -0,0 +1,82 @@
+from typing import List
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.modelling.base import DBTModellingInsight
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
+class DBTRootModel(DBTModellingInsight):
+    """
+    DBTRootModels is used to identify models in a dbt project with 0 direct parents,
+    meaning these models cannot be traced back to a declared source or model.
+    """
+    NAME = "Root model traceability"
+    ALIAS = "root_model"
+    DESCRIPTION = "Identifies models in a dbt project with 0 direct parents, meaning these models cannot be traced back to a declared source or model."
+    REASON_TO_FLAG = (
+        "Best Practice is to ensure all models can be traced back to a source or another model in the project. "
+        "Root models with no direct parents can lead to challenges in tracking data lineage and understanding"
+        " the overall data model."
+    )
+    FAILURE_MESSAGE = (
+        "Model `{current_model_unique_id}` is identified as a root model with no direct parents. "
+        "This can hinder traceability and clarity in the data model."
+    )
+    RECOMMENDATION = (
+        "Ensure that model `{current_model_unique_id}` is appropriately linked to a source or another model "
+        "within the dbt project. This linkage is crucial for maintaining clear data lineage and project coherence."
+    )
+    def _build_failure_result(self, current_model_unique_id: str) -> DBTInsightResult:
+        """
+        Build failure result for the insight if a model is a root model with 0 direct parents.
+        :param current_model_unique_id: Unique ID of the current model being evaluated.
+        :return: An instance of InsightResult containing failure message and recommendation.
+        """
+        self.logger.debug(f"Building failure result for root model {current_model_unique_id}")
+        failure = self.FAILURE_MESSAGE.format(current_model_unique_id=current_model_unique_id)
+        recommendation = self.RECOMMENDATION.format(current_model_unique_id=current_model_unique_id)
+        return DBTInsightResult(
+            type=self.TYPE,
+            name=self.NAME,
+            message=failure,
+            recommendation=recommendation,
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={"model": current_model_unique_id},
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        """
+        Generate a list of InsightResponse objects for each model in the DBT project,
+        identifying root models with 0 direct parents.
+        :return: A list of InsightResponse objects.
+        """
+        self.logger.debug(f"Generating insights for DBTRootModels for project {self.project_name}")
+        insights = []
+        for node_id, node in self.nodes.items():
+            if self.should_skip_model(node_id):
+                self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
+                continue
+            if node.resource_type == AltimateResourceType.model and not node.depends_on.nodes:
+                self.logger.debug(f"Found root model {node_id} with no direct parents")
+                insight_result = self._build_failure_result(node.unique_id)
+                insights.append(
+                    DBTModelInsightResponse(
+                        unique_id=node_id,
+                        package_name=node.package_name,
+                        path=node.path,
+                        original_file_path=node.original_file_path,
+                        insight=insight_result,
+                        severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                    )
+                )
+        self.logger.debug(f"Found {len(insights)} root models")
+        return insights

datapilot/core/platforms/dbt/insights/modelling/source_fanout.py ADDED Viewed

@@ -0,0 +1,102 @@
+from typing import List
+from datapilot.core.insights.utils import get_severity
+from datapilot.core.platforms.dbt.insights.modelling.base import DBTModellingInsight
+from datapilot.core.platforms.dbt.insights.schema import DBTInsightResult
+from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
+from datapilot.core.platforms.dbt.schemas.manifest import AltimateResourceType
+class DBTSourceFanout(DBTModellingInsight):
+    """
+    DBTSourceFanout identifies instances where a source is the direct parent of multiple resources in the DAG.
+    """
+    NAME = "Source fanout analysis"
+    ALIAS = "source_fanout"
+    DESCRIPTION = "Identifies sources with a high number of direct children."
+    REASON_TO_FLAG = (
+        "Identifying sources with high fanout can indicate areas where the data model might be overly complex "
+        "or dependent on a single source. Such dependencies can introduce risks and "
+        "complicate maintenance and scalability."
+    )
+    SOURCE_FANOUT_THRESHOLD = 1  # Default threshold, can be overridden as needed
+    FAILURE_MESSAGE = (
+        "Source `{source_unique_id}` has `{children_count}` direct children, "
+        "exceeding the fanout threshold of `{fanout_threshold}`. This level of fanout may lead to increased complexity."
+    )
+    RECOMMENDATION = (
+        "Review the source `{source_unique_id}` to identify opportunities to reduce its direct dependencies. "
+        "This can help in simplifying the data model and reducing the risk associated with high source reliance."
+    )
+    SOURCE_FANOUT_THRESHOLD_STR = "max_fanout"
+    def _build_failure_result(self, source_unique_id: str, children_count: int, fanout_threshold: int) -> DBTInsightResult:
+        failure_message = self.FAILURE_MESSAGE.format(
+            source_unique_id=source_unique_id,
+            children_count=children_count,
+            fanout_threshold=fanout_threshold,
+        )
+        recommendation = self.RECOMMENDATION.format(source_unique_id=source_unique_id)
+        return DBTInsightResult(
+            type=self.TYPE,
+            name=self.NAME,
+            message=failure_message,
+            recommendation=recommendation,
+            reason_to_flag=self.REASON_TO_FLAG,
+            metadata={
+                "source": source_unique_id,
+                "direct_children_count": children_count,
+            },
+        )
+    def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
+        fanout_threshold = self.get_check_config(self.SOURCE_FANOUT_THRESHOLD_STR) or self.SOURCE_FANOUT_THRESHOLD
+        insights = []
+        for node_id, children_set in self.children_map.items():
+            if self.should_skip_model(node_id):
+                self.logger.debug(f"Skipping model {node_id} as it is not enabled for selected models")
+                continue
+            node = self.get_node(node_id)
+            if node.resource_type == AltimateResourceType.source:
+                if len(children_set) > fanout_threshold:
+                    insight_result = self._build_failure_result(
+                        source_unique_id=node_id,
+                        children_count=len(children_set),
+                        fanout_threshold=fanout_threshold,
+                    )
+                    insights.append(
+                        DBTModelInsightResponse(
+                            unique_id=node_id,
+                            package_name=node.package_name,
+                            path=node.path,
+                            original_file_path=node.original_file_path,
+                            insight=insight_result,
+                            severity=get_severity(self.config, self.ALIAS, self.DEFAULT_SEVERITY),
+                        )
+                    )
+        return insights
+    @classmethod
+    def get_config_schema(cls):
+        """
+        :return: The configuration schema for the test coverage insight.
+        """
+        config_schema = super().get_config_schema()
+        config_schema["config"] = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+                cls.SOURCE_FANOUT_THRESHOLD_STR: {
+                    "type": "integer",
+                    "description": "The maximum number of direct children a source can have before being flagged.",
+                    "default": cls.SOURCE_FANOUT_THRESHOLD,
+                },
+            },
+            "required": [cls.SOURCE_FANOUT_THRESHOLD_STR],
+        }
+        return config_schema