acryl-datahub 1.2.0.1__py3-none-any.whl → 1.2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (54) hide show
  1. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/METADATA +2574 -2572
  2. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/RECORD +54 -46
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +13 -1
  5. datahub/emitter/rest_emitter.py +3 -1
  6. datahub/ingestion/autogenerated/capability_summary.json +97 -6
  7. datahub/ingestion/source/abs/source.py +5 -29
  8. datahub/ingestion/source/aws/glue.py +8 -0
  9. datahub/ingestion/source/cassandra/cassandra.py +5 -7
  10. datahub/ingestion/source/common/subtypes.py +2 -0
  11. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  12. datahub/ingestion/source/datahub/datahub_source.py +3 -0
  13. datahub/ingestion/source/dbt/dbt_common.py +69 -2
  14. datahub/ingestion/source/delta_lake/source.py +1 -0
  15. datahub/ingestion/source/ge_data_profiler.py +9 -1
  16. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  17. datahub/ingestion/source/grafana/field_utils.py +307 -0
  18. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  19. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  20. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  21. datahub/ingestion/source/grafana/lineage.py +202 -0
  22. datahub/ingestion/source/grafana/models.py +120 -0
  23. datahub/ingestion/source/grafana/report.py +91 -0
  24. datahub/ingestion/source/grafana/types.py +16 -0
  25. datahub/ingestion/source/hex/hex.py +8 -0
  26. datahub/ingestion/source/looker/looker_common.py +40 -4
  27. datahub/ingestion/source/looker/looker_source.py +9 -0
  28. datahub/ingestion/source/looker/lookml_source.py +8 -0
  29. datahub/ingestion/source/mongodb.py +11 -1
  30. datahub/ingestion/source/redshift/redshift.py +8 -1
  31. datahub/ingestion/source/s3/source.py +14 -34
  32. datahub/ingestion/source/sql/athena.py +8 -2
  33. datahub/ingestion/source/sql/clickhouse.py +9 -0
  34. datahub/ingestion/source/sql/postgres.py +190 -1
  35. datahub/ingestion/source/sql_queries.py +111 -76
  36. datahub/ingestion/source/unity/proxy.py +8 -8
  37. datahub/metadata/_internal_schema_classes.py +96 -0
  38. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
  39. datahub/metadata/schema.avsc +69 -0
  40. datahub/metadata/schemas/CorpUserSettings.avsc +10 -1
  41. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +42 -0
  42. datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -0
  43. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  44. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  45. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  46. datahub/sdk/dataset.py +44 -0
  47. datahub/sdk/search_filters.py +84 -15
  48. datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
  49. datahub/telemetry/telemetry.py +4 -1
  50. datahub/upgrade/upgrade.py +5 -3
  51. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/WHEEL +0 -0
  52. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/entry_points.txt +0 -0
  53. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/licenses/LICENSE +0 -0
  54. {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,202 @@
1
+ import logging
2
+ from typing import Dict, List, Optional, Tuple
3
+
4
+ from datahub.emitter.mce_builder import (
5
+ make_dataset_urn_with_platform_instance,
6
+ make_schema_field_urn,
7
+ )
8
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
9
+ from datahub.ingestion.graph.client import DataHubGraph
10
+ from datahub.ingestion.source.grafana.grafana_config import PlatformConnectionConfig
11
+ from datahub.ingestion.source.grafana.models import (
12
+ DatasourceRef,
13
+ GrafanaQueryTarget,
14
+ Panel,
15
+ )
16
+ from datahub.ingestion.source.grafana.report import GrafanaSourceReport
17
+ from datahub.metadata.schema_classes import (
18
+ DatasetLineageTypeClass,
19
+ FineGrainedLineageClass,
20
+ FineGrainedLineageDownstreamTypeClass,
21
+ FineGrainedLineageUpstreamTypeClass,
22
+ UpstreamClass,
23
+ UpstreamLineageClass,
24
+ )
25
+ from datahub.sql_parsing.sqlglot_lineage import (
26
+ SqlParsingResult,
27
+ create_lineage_sql_parsed_result,
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ class LineageExtractor:
34
+ """Handles extraction of lineage information from Grafana panels"""
35
+
36
+ def __init__(
37
+ self,
38
+ platform: str,
39
+ platform_instance: Optional[str],
40
+ env: str,
41
+ connection_to_platform_map: Dict[str, PlatformConnectionConfig],
42
+ report: GrafanaSourceReport,
43
+ graph: Optional[DataHubGraph] = None,
44
+ include_column_lineage: bool = True,
45
+ ):
46
+ self.platform = platform
47
+ self.platform_instance = platform_instance
48
+ self.env = env
49
+ self.connection_map = connection_to_platform_map
50
+ self.graph = graph
51
+ self.report = report
52
+ self.include_column_lineage = include_column_lineage
53
+
54
+ def extract_panel_lineage(
55
+ self, panel: Panel
56
+ ) -> Optional[MetadataChangeProposalWrapper]:
57
+ """Extract lineage information from a panel."""
58
+ if not panel.datasource_ref:
59
+ return None
60
+
61
+ ds_type, ds_uid = self._extract_datasource_info(panel.datasource_ref)
62
+ raw_sql = self._extract_raw_sql(panel.query_targets)
63
+ ds_urn = self._build_dataset_urn(ds_type, ds_uid, panel.id)
64
+
65
+ # Handle platform-specific lineage
66
+ if ds_uid in self.connection_map:
67
+ if raw_sql:
68
+ parsed_sql = self._parse_sql(raw_sql, self.connection_map[ds_uid])
69
+ if parsed_sql:
70
+ lineage = self._create_column_lineage(ds_urn, parsed_sql)
71
+ if lineage:
72
+ return lineage
73
+
74
+ # Fall back to basic lineage if SQL parsing fails or no column lineage created
75
+ return self._create_basic_lineage(
76
+ ds_uid, self.connection_map[ds_uid], ds_urn
77
+ )
78
+
79
+ return None
80
+
81
+ def _extract_datasource_info(
82
+ self, datasource_ref: "DatasourceRef"
83
+ ) -> Tuple[str, str]:
84
+ """Extract datasource type and UID."""
85
+ return datasource_ref.type or "unknown", datasource_ref.uid or "unknown"
86
+
87
+ def _extract_raw_sql(
88
+ self, query_targets: List["GrafanaQueryTarget"]
89
+ ) -> Optional[str]:
90
+ """Extract raw SQL from panel query targets."""
91
+ for target in query_targets:
92
+ if target.get("rawSql"):
93
+ return target["rawSql"]
94
+ return None
95
+
96
+ def _build_dataset_urn(self, ds_type: str, ds_uid: str, panel_id: str) -> str:
97
+ """Build dataset URN."""
98
+ dataset_name = f"{ds_type}.{ds_uid}.{panel_id}"
99
+ return make_dataset_urn_with_platform_instance(
100
+ platform=self.platform,
101
+ name=dataset_name,
102
+ platform_instance=self.platform_instance,
103
+ env=self.env,
104
+ )
105
+
106
+ def _create_basic_lineage(
107
+ self, ds_uid: str, platform_config: PlatformConnectionConfig, ds_urn: str
108
+ ) -> MetadataChangeProposalWrapper:
109
+ """Create basic upstream lineage."""
110
+ name = (
111
+ f"{platform_config.database}.{ds_uid}"
112
+ if platform_config.database
113
+ else ds_uid
114
+ )
115
+
116
+ upstream_urn = make_dataset_urn_with_platform_instance(
117
+ platform=platform_config.platform,
118
+ name=name,
119
+ platform_instance=platform_config.platform_instance,
120
+ env=platform_config.env,
121
+ )
122
+
123
+ logger.info(f"Generated upstream URN: {upstream_urn}")
124
+
125
+ return MetadataChangeProposalWrapper(
126
+ entityUrn=ds_urn,
127
+ aspect=UpstreamLineageClass(
128
+ upstreams=[
129
+ UpstreamClass(
130
+ dataset=upstream_urn,
131
+ type=DatasetLineageTypeClass.TRANSFORMED,
132
+ )
133
+ ]
134
+ ),
135
+ )
136
+
137
+ def _parse_sql(
138
+ self, sql: str, platform_config: PlatformConnectionConfig
139
+ ) -> Optional[SqlParsingResult]:
140
+ """Parse SQL query for lineage information."""
141
+ if not self.graph:
142
+ logger.warning("No DataHub graph specified for SQL parsing.")
143
+ return None
144
+
145
+ try:
146
+ return create_lineage_sql_parsed_result(
147
+ query=sql,
148
+ platform=platform_config.platform,
149
+ platform_instance=platform_config.platform_instance,
150
+ env=platform_config.env,
151
+ default_db=platform_config.database,
152
+ default_schema=platform_config.database_schema,
153
+ graph=self.graph,
154
+ )
155
+ except ValueError as e:
156
+ logger.error(f"SQL parsing error for query: {sql}", exc_info=e)
157
+ except Exception as e:
158
+ logger.exception(f"Unexpected error during SQL parsing: {sql}", exc_info=e)
159
+
160
+ return None
161
+
162
+ def _create_column_lineage(
163
+ self,
164
+ dataset_urn: str,
165
+ parsed_sql: SqlParsingResult,
166
+ ) -> Optional[MetadataChangeProposalWrapper]:
167
+ """Create column-level lineage"""
168
+ if not parsed_sql.column_lineage or not self.include_column_lineage:
169
+ return None
170
+
171
+ upstream_lineages = []
172
+ for col_lineage in parsed_sql.column_lineage:
173
+ upstream_lineages.append(
174
+ FineGrainedLineageClass(
175
+ downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
176
+ downstreams=[
177
+ make_schema_field_urn(
178
+ dataset_urn, col_lineage.downstream.column
179
+ )
180
+ ],
181
+ upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
182
+ upstreams=[
183
+ make_schema_field_urn(upstream_dataset, col.column)
184
+ for col in col_lineage.upstreams
185
+ for upstream_dataset in parsed_sql.in_tables
186
+ ],
187
+ )
188
+ )
189
+
190
+ return MetadataChangeProposalWrapper(
191
+ entityUrn=dataset_urn,
192
+ aspect=UpstreamLineageClass(
193
+ upstreams=[
194
+ UpstreamClass(
195
+ dataset=table,
196
+ type=DatasetLineageTypeClass.TRANSFORMED,
197
+ )
198
+ for table in parsed_sql.in_tables
199
+ ],
200
+ fineGrainedLineages=upstream_lineages,
201
+ ),
202
+ )
@@ -0,0 +1,120 @@
1
+ """Grafana data models for DataHub ingestion.
2
+
3
+ References:
4
+ - Grafana HTTP API: https://grafana.com/docs/grafana/latest/developers/http_api/
5
+ - Dashboard API: https://grafana.com/docs/grafana/latest/developers/http_api/dashboard/
6
+ - Folder API: https://grafana.com/docs/grafana/latest/developers/http_api/folder/
7
+ - Search API: https://grafana.com/docs/grafana/latest/developers/http_api/other/#search-api
8
+ - Dashboard JSON structure: https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/view-dashboard-json-model/
9
+ """
10
+
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from pydantic import BaseModel, Field
14
+
15
+ from datahub.emitter.mcp_builder import ContainerKey
16
+
17
+ # Grafana-specific type definitions for better type safety
18
+ GrafanaQueryTarget = Dict[
19
+ str, Any
20
+ ] # Query targets: refId, expr/query, datasource, hide, etc.
21
+ GrafanaFieldConfig = Dict[
22
+ str, Any
23
+ ] # Field config: defaults, overrides, display settings
24
+ GrafanaTransformation = Dict[str, Any] # Transformations: id, options
25
+
26
+
27
+ class DatasourceRef(BaseModel):
28
+ """Reference to a Grafana datasource."""
29
+
30
+ type: Optional[str] = None # Datasource type (prometheus, mysql, postgres, etc.)
31
+ uid: Optional[str] = None # Datasource unique identifier
32
+ name: Optional[str] = None # Datasource display name
33
+
34
+
35
+ class Panel(BaseModel):
36
+ """Represents a Grafana dashboard panel."""
37
+
38
+ id: str
39
+ title: str
40
+ description: str = ""
41
+ type: Optional[str]
42
+ # Query targets - each contains refId (A,B,C...), query/expr, datasource ref, etc.
43
+ query_targets: List[GrafanaQueryTarget] = Field(
44
+ default_factory=list, alias="targets"
45
+ )
46
+ # Datasource reference - contains type, uid, name
47
+ datasource_ref: Optional[DatasourceRef] = Field(default=None, alias="datasource")
48
+ # Field configuration - display settings, defaults, overrides
49
+ field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig")
50
+ # Data transformations - each contains id and transformation-specific options
51
+ transformations: List[GrafanaTransformation] = Field(default_factory=list)
52
+
53
+
54
+ class Dashboard(BaseModel):
55
+ """Represents a Grafana dashboard."""
56
+
57
+ uid: str
58
+ title: str
59
+ description: str = ""
60
+ version: Optional[str]
61
+ panels: List[Panel]
62
+ tags: List[str]
63
+ timezone: Optional[str]
64
+ refresh: Optional[str] = None
65
+ schema_version: Optional[str] = Field(default=None, alias="schemaVersion")
66
+ folder_id: Optional[str] = Field(default=None, alias="meta.folderId")
67
+ created_by: Optional[str] = None
68
+
69
+ @staticmethod
70
+ def extract_panels(panels_data: List[Dict[str, Any]]) -> List[Panel]:
71
+ """Extract panels, including nested ones."""
72
+ panels: List[Panel] = []
73
+ for panel_data in panels_data:
74
+ if panel_data.get("type") == "row" and "panels" in panel_data:
75
+ panels.extend(
76
+ Panel.parse_obj(p)
77
+ for p in panel_data["panels"]
78
+ if p.get("type") != "row"
79
+ )
80
+ elif panel_data.get("type") != "row":
81
+ panels.append(Panel.parse_obj(panel_data))
82
+ return panels
83
+
84
+ @classmethod
85
+ def parse_obj(cls, data: Dict[str, Any]) -> "Dashboard":
86
+ """Custom parsing to handle nested panel extraction."""
87
+ dashboard_data = data.get("dashboard", {})
88
+ panels = cls.extract_panels(dashboard_data.get("panels", []))
89
+
90
+ # Extract meta.folderId from nested structure
91
+ meta = dashboard_data.get("meta", {})
92
+ folder_id = meta.get("folderId")
93
+
94
+ # Create dashboard data without meta to avoid conflicts
95
+ dashboard_dict = {**dashboard_data, "panels": panels, "folder_id": folder_id}
96
+ if "meta" in dashboard_dict:
97
+ del dashboard_dict["meta"]
98
+
99
+ return super().parse_obj(dashboard_dict)
100
+
101
+
102
+ class Folder(BaseModel):
103
+ """Represents a Grafana folder."""
104
+
105
+ id: str
106
+ title: str
107
+ description: Optional[str] = ""
108
+
109
+
110
+ class FolderKey(ContainerKey):
111
+ """Key for identifying a Grafana folder."""
112
+
113
+ folder_id: str
114
+
115
+
116
+ class DashboardContainerKey(ContainerKey):
117
+ """Key for identifying a Grafana dashboard."""
118
+
119
+ dashboard_id: str
120
+ folder_id: Optional[str] = None # Reference to parent folder
@@ -0,0 +1,91 @@
1
+ from dataclasses import dataclass
2
+
3
+ from datahub.ingestion.source.state.stale_entity_removal_handler import (
4
+ StaleEntityRemovalSourceReport,
5
+ )
6
+ from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport
7
+
8
+
9
+ @dataclass
10
+ class GrafanaSourceReport(StaleEntityRemovalSourceReport, IngestionStageReport):
11
+ # Entity counters
12
+ dashboards_scanned: int = 0
13
+ charts_scanned: int = 0
14
+ folders_scanned: int = 0
15
+ datasets_scanned: int = 0
16
+
17
+ # Lineage counters
18
+ panels_with_lineage: int = 0
19
+ panels_without_lineage: int = 0
20
+ lineage_extraction_failures: int = 0
21
+ sql_parsing_attempts: int = 0
22
+ sql_parsing_successes: int = 0
23
+ sql_parsing_failures: int = 0
24
+
25
+ # Schema extraction counters
26
+ panels_with_schema_fields: int = 0
27
+ panels_without_schema_fields: int = 0
28
+
29
+ # Warning counters
30
+ permission_warnings: int = 0
31
+ datasource_warnings: int = 0
32
+ panel_parsing_warnings: int = 0
33
+
34
+ def report_dashboard_scanned(self) -> None:
35
+ self.dashboards_scanned += 1
36
+
37
+ def report_chart_scanned(self) -> None:
38
+ self.charts_scanned += 1
39
+
40
+ def report_folder_scanned(self) -> None:
41
+ self.folders_scanned += 1
42
+
43
+ def report_dataset_scanned(self) -> None:
44
+ self.datasets_scanned += 1
45
+
46
+ # Lineage reporting methods
47
+ def report_lineage_extracted(self) -> None:
48
+ """Report successful lineage extraction for a panel"""
49
+ self.panels_with_lineage += 1
50
+
51
+ def report_no_lineage(self) -> None:
52
+ """Report that no lineage was found for a panel"""
53
+ self.panels_without_lineage += 1
54
+
55
+ def report_lineage_extraction_failure(self) -> None:
56
+ """Report failure to extract lineage for a panel"""
57
+ self.lineage_extraction_failures += 1
58
+
59
+ def report_sql_parsing_attempt(self) -> None:
60
+ """Report attempt to parse SQL"""
61
+ self.sql_parsing_attempts += 1
62
+
63
+ def report_sql_parsing_success(self) -> None:
64
+ """Report successful SQL parsing"""
65
+ self.sql_parsing_successes += 1
66
+
67
+ def report_sql_parsing_failure(self) -> None:
68
+ """Report failed SQL parsing"""
69
+ self.sql_parsing_failures += 1
70
+
71
+ # Schema field reporting methods
72
+ def report_schema_fields_extracted(self) -> None:
73
+ """Report that schema fields were extracted for a panel"""
74
+ self.panels_with_schema_fields += 1
75
+
76
+ def report_no_schema_fields(self) -> None:
77
+ """Report that no schema fields were found for a panel"""
78
+ self.panels_without_schema_fields += 1
79
+
80
+ # Warning reporting methods
81
+ def report_permission_warning(self) -> None:
82
+ """Report a permission-related warning"""
83
+ self.permission_warnings += 1
84
+
85
+ def report_datasource_warning(self) -> None:
86
+ """Report a datasource-related warning"""
87
+ self.datasource_warnings += 1
88
+
89
+ def report_panel_parsing_warning(self) -> None:
90
+ """Report a panel parsing warning"""
91
+ self.panel_parsing_warnings += 1
@@ -0,0 +1,16 @@
1
+ from datahub.metadata.schema_classes import (
2
+ ChartTypeClass,
3
+ )
4
+
5
+ CHART_TYPE_MAPPINGS = {
6
+ "graph": ChartTypeClass.LINE,
7
+ "timeseries": ChartTypeClass.LINE,
8
+ "table": ChartTypeClass.TABLE,
9
+ "stat": ChartTypeClass.TEXT,
10
+ "gauge": ChartTypeClass.TEXT,
11
+ "bargauge": ChartTypeClass.TEXT,
12
+ "bar": ChartTypeClass.BAR,
13
+ "pie": ChartTypeClass.PIE,
14
+ "heatmap": ChartTypeClass.TABLE,
15
+ "histogram": ChartTypeClass.BAR,
16
+ }
@@ -22,6 +22,7 @@ from datahub.ingestion.api.decorators import (
22
22
  )
23
23
  from datahub.ingestion.api.source import MetadataWorkUnitProcessor
24
24
  from datahub.ingestion.api.workunit import MetadataWorkUnit
25
+ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
25
26
  from datahub.ingestion.source.hex.api import HexApi, HexApiReport
26
27
  from datahub.ingestion.source.hex.constants import (
27
28
  DATAHUB_API_PAGE_SIZE_DEFAULT,
@@ -179,6 +180,13 @@ class HexReport(
179
180
  @capability(SourceCapability.OWNERSHIP, "Supported by default")
180
181
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
181
182
  @capability(SourceCapability.CONTAINERS, "Enabled by default")
183
+ @capability(
184
+ SourceCapability.USAGE_STATS,
185
+ "Supported by default",
186
+ subtype_modifier=[
187
+ SourceCapabilityModifier.HEX_PROJECT,
188
+ ],
189
+ )
182
190
  class HexSource(StatefulIngestionSourceBase):
183
191
  def __init__(self, config: HexSourceConfig, ctx: PipelineContext):
184
192
  super().__init__(config, ctx)
@@ -242,13 +242,19 @@ class LookerViewId:
242
242
 
243
243
  dataset_name = config.view_naming_pattern.replace_variables(n_mapping)
244
244
 
245
- return builder.make_dataset_urn_with_platform_instance(
245
+ generated_urn = builder.make_dataset_urn_with_platform_instance(
246
246
  platform=config.platform_name,
247
247
  name=dataset_name,
248
248
  platform_instance=config.platform_instance,
249
249
  env=config.env,
250
250
  )
251
251
 
252
+ logger.debug(
253
+ f"LookerViewId.get_urn for view '{self.view_name}': project='{self.project_name}', model='{self.model_name}', file_path='{self.file_path}', dataset_name='{dataset_name}', generated_urn='{generated_urn}'"
254
+ )
255
+
256
+ return generated_urn
257
+
252
258
  def get_browse_path(self, config: LookerCommonConfig) -> str:
253
259
  browse_path = config.view_browse_pattern.replace_variables(
254
260
  self.get_mapping(config)
@@ -452,15 +458,36 @@ class ExploreUpstreamViewField:
452
458
  )
453
459
 
454
460
 
455
- def create_view_project_map(view_fields: List[ViewField]) -> Dict[str, str]:
461
+ def create_view_project_map(
462
+ view_fields: List[ViewField],
463
+ explore_primary_view: Optional[str] = None,
464
+ explore_project_name: Optional[str] = None,
465
+ ) -> Dict[str, str]:
456
466
  """
457
467
  Each view in a model has unique name.
458
468
  Use this function in scope of a model.
469
+
470
+ Args:
471
+ view_fields: List of ViewField objects
472
+ explore_primary_view: The primary view name of the explore (explore.view_name)
473
+ explore_project_name: The project name of the explore (explore.project_name)
459
474
  """
460
475
  view_project_map: Dict[str, str] = {}
461
476
  for view_field in view_fields:
462
477
  if view_field.view_name is not None and view_field.project_name is not None:
463
- view_project_map[view_field.view_name] = view_field.project_name
478
+ # Override field-level project assignment for the primary view when different
479
+ if (
480
+ view_field.view_name == explore_primary_view
481
+ and explore_project_name is not None
482
+ and explore_project_name != view_field.project_name
483
+ ):
484
+ logger.debug(
485
+ f"Overriding project assignment for primary view '{view_field.view_name}': "
486
+ f"field-level project '{view_field.project_name}' → explore-level project '{explore_project_name}'"
487
+ )
488
+ view_project_map[view_field.view_name] = explore_project_name
489
+ else:
490
+ view_project_map[view_field.view_name] = view_field.project_name
464
491
 
465
492
  return view_project_map
466
493
 
@@ -953,6 +980,9 @@ class LookerExplore:
953
980
  f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
954
981
  )
955
982
  else:
983
+ logger.debug(
984
+ f"LookerExplore.from_dict adding upstream view for explore '{dict['name']}' (model='{model_name}'): view_name='{view_name}', info[0].project='{info[0].project}'"
985
+ )
956
986
  upstream_views.append(
957
987
  ProjectInclude(project=info[0].project, include=view_name)
958
988
  )
@@ -981,6 +1011,7 @@ class LookerExplore:
981
1011
  ) -> Optional["LookerExplore"]:
982
1012
  try:
983
1013
  explore = client.lookml_model_explore(model, explore_name)
1014
+
984
1015
  views: Set[str] = set()
985
1016
  lkml_fields: List[LookmlModelExploreField] = (
986
1017
  explore_field_set_to_lkml_fields(explore)
@@ -1117,7 +1148,11 @@ class LookerExplore:
1117
1148
  )
1118
1149
  )
1119
1150
 
1120
- view_project_map: Dict[str, str] = create_view_project_map(view_fields)
1151
+ view_project_map: Dict[str, str] = create_view_project_map(
1152
+ view_fields,
1153
+ explore_primary_view=explore.view_name,
1154
+ explore_project_name=explore.project_name,
1155
+ )
1121
1156
  if view_project_map:
1122
1157
  logger.debug(f"views and their projects: {view_project_map}")
1123
1158
 
@@ -1289,6 +1324,7 @@ class LookerExplore:
1289
1324
  if self.upstream_views_file_path[view_ref.include] is not None
1290
1325
  else ViewFieldValue.NOT_AVAILABLE.value
1291
1326
  )
1327
+
1292
1328
  view_urn = LookerViewId(
1293
1329
  project_name=(
1294
1330
  view_ref.project
@@ -51,6 +51,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
51
51
  from datahub.ingestion.source.common.subtypes import (
52
52
  BIAssetSubTypes,
53
53
  BIContainerSubTypes,
54
+ SourceCapabilityModifier,
54
55
  )
55
56
  from datahub.ingestion.source.looker import looker_usage
56
57
  from datahub.ingestion.source.looker.looker_common import (
@@ -127,6 +128,14 @@ logger = logging.getLogger(__name__)
127
128
  "Enabled by default, configured using `extract_usage_history`",
128
129
  )
129
130
  @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
131
+ @capability(
132
+ SourceCapability.CONTAINERS,
133
+ "Enabled by default",
134
+ subtype_modifier=[
135
+ SourceCapabilityModifier.LOOKML_MODEL,
136
+ SourceCapabilityModifier.LOOKER_FOLDER,
137
+ ],
138
+ )
130
139
  class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
131
140
  """
132
141
  This plugin extracts the following:
@@ -27,6 +27,7 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
27
27
  from datahub.ingestion.source.common.subtypes import (
28
28
  BIContainerSubTypes,
29
29
  DatasetSubTypes,
30
+ SourceCapabilityModifier,
30
31
  )
31
32
  from datahub.ingestion.source.git.git_import import GitClone
32
33
  from datahub.ingestion.source.looker.looker_common import (
@@ -273,6 +274,13 @@ class LookerManifest:
273
274
  SourceCapability.LINEAGE_FINE,
274
275
  "Enabled by default, configured using `extract_column_level_lineage`",
275
276
  )
277
+ @capability(
278
+ SourceCapability.CONTAINERS,
279
+ "Enabled by default",
280
+ subtype_modifier=[
281
+ SourceCapabilityModifier.LOOKML_PROJECT,
282
+ ],
283
+ )
276
284
  class LookMLSource(StatefulIngestionSourceBase):
277
285
  """
278
286
  This plugin extracts the following:
@@ -36,7 +36,10 @@ from datahub.ingestion.api.decorators import (
36
36
  )
37
37
  from datahub.ingestion.api.source import MetadataWorkUnitProcessor
38
38
  from datahub.ingestion.api.workunit import MetadataWorkUnit
39
- from datahub.ingestion.source.common.subtypes import DatasetContainerSubTypes
39
+ from datahub.ingestion.source.common.subtypes import (
40
+ DatasetContainerSubTypes,
41
+ SourceCapabilityModifier,
42
+ )
40
43
  from datahub.ingestion.source.schema_inference.object import (
41
44
  SchemaDescription,
42
45
  construct_schema,
@@ -249,6 +252,13 @@ def construct_schema_pymongo(
249
252
  @support_status(SupportStatus.CERTIFIED)
250
253
  @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
251
254
  @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default")
255
+ @capability(
256
+ SourceCapability.CONTAINERS,
257
+ "Enabled by default",
258
+ subtype_modifier=[
259
+ SourceCapabilityModifier.DATABASE,
260
+ ],
261
+ )
252
262
  @dataclass
253
263
  class MongoDBSource(StatefulIngestionSourceBase):
254
264
  """
@@ -47,6 +47,7 @@ from datahub.ingestion.source.common.data_reader import DataReader
47
47
  from datahub.ingestion.source.common.subtypes import (
48
48
  DatasetContainerSubTypes,
49
49
  DatasetSubTypes,
50
+ SourceCapabilityModifier,
50
51
  )
51
52
  from datahub.ingestion.source.redshift.config import RedshiftConfig
52
53
  from datahub.ingestion.source.redshift.datashares import RedshiftDatasharesHelper
@@ -126,7 +127,13 @@ logger: logging.Logger = logging.getLogger(__name__)
126
127
  @platform_name("Redshift")
127
128
  @config_class(RedshiftConfig)
128
129
  @support_status(SupportStatus.CERTIFIED)
129
- @capability(SourceCapability.CONTAINERS, "Enabled by default")
130
+ @capability(
131
+ SourceCapability.CONTAINERS,
132
+ "Enabled by default",
133
+ subtype_modifier=[
134
+ SourceCapabilityModifier.DATABASE,
135
+ ],
136
+ )
130
137
  @capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
131
138
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
132
139
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default")