acryl-datahub 1.2.0.1rc1__py3-none-any.whl → 1.2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (54) hide show
  1. {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2.dist-info}/METADATA +2525 -2523
  2. {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2.dist-info}/RECORD +54 -46
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/dataset/dataset.py +13 -1
  5. datahub/emitter/rest_emitter.py +3 -1
  6. datahub/ingestion/autogenerated/capability_summary.json +97 -6
  7. datahub/ingestion/source/abs/source.py +5 -29
  8. datahub/ingestion/source/aws/glue.py +8 -0
  9. datahub/ingestion/source/cassandra/cassandra.py +5 -7
  10. datahub/ingestion/source/common/subtypes.py +2 -0
  11. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  12. datahub/ingestion/source/datahub/datahub_source.py +3 -0
  13. datahub/ingestion/source/dbt/dbt_common.py +69 -2
  14. datahub/ingestion/source/delta_lake/source.py +1 -0
  15. datahub/ingestion/source/ge_data_profiler.py +9 -1
  16. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  17. datahub/ingestion/source/grafana/field_utils.py +307 -0
  18. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  19. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  20. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  21. datahub/ingestion/source/grafana/lineage.py +202 -0
  22. datahub/ingestion/source/grafana/models.py +120 -0
  23. datahub/ingestion/source/grafana/report.py +91 -0
  24. datahub/ingestion/source/grafana/types.py +16 -0
  25. datahub/ingestion/source/hex/hex.py +8 -0
  26. datahub/ingestion/source/looker/looker_common.py +40 -4
  27. datahub/ingestion/source/looker/looker_source.py +9 -0
  28. datahub/ingestion/source/looker/lookml_source.py +8 -0
  29. datahub/ingestion/source/mongodb.py +11 -1
  30. datahub/ingestion/source/redshift/redshift.py +8 -1
  31. datahub/ingestion/source/s3/source.py +14 -34
  32. datahub/ingestion/source/sql/athena.py +8 -2
  33. datahub/ingestion/source/sql/clickhouse.py +9 -0
  34. datahub/ingestion/source/sql/postgres.py +190 -1
  35. datahub/ingestion/source/sql_queries.py +111 -76
  36. datahub/ingestion/source/unity/proxy.py +8 -8
  37. datahub/metadata/_internal_schema_classes.py +96 -0
  38. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
  39. datahub/metadata/schema.avsc +69 -0
  40. datahub/metadata/schemas/CorpUserSettings.avsc +10 -1
  41. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +42 -0
  42. datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -0
  43. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  44. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  45. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  46. datahub/sdk/dataset.py +44 -0
  47. datahub/sdk/search_filters.py +84 -15
  48. datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
  49. datahub/telemetry/telemetry.py +4 -1
  50. datahub/upgrade/upgrade.py +5 -3
  51. {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2.dist-info}/WHEEL +0 -0
  52. {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2.dist-info}/entry_points.txt +0 -0
  53. {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2.dist-info}/licenses/LICENSE +0 -0
  54. {acryl_datahub-1.2.0.1rc1.dist-info → acryl_datahub-1.2.0.2.dist-info}/top_level.txt +0 -0
@@ -44,7 +44,10 @@ from datahub.ingestion.source.azure.abs_utils import (
44
44
  get_key_prefix,
45
45
  strip_abs_prefix,
46
46
  )
47
- from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
47
+ from datahub.ingestion.source.data_lake_common.data_lake_utils import (
48
+ ContainerWUCreator,
49
+ add_partition_columns_to_schema,
50
+ )
48
51
  from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet
49
52
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
50
53
  StaleEntityRemovalHandler,
@@ -53,10 +56,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
53
56
  StatefulIngestionSourceBase,
54
57
  )
55
58
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
56
- SchemaField,
57
- SchemaFieldDataType,
58
59
  SchemaMetadata,
59
- StringTypeClass,
60
60
  )
61
61
  from datahub.metadata.schema_classes import (
62
62
  DataPlatformInstanceClass,
@@ -223,36 +223,12 @@ class ABSSource(StatefulIngestionSourceBase):
223
223
  fields = sorted(fields, key=lambda f: f.fieldPath)
224
224
 
225
225
  if self.source_config.add_partition_columns_to_schema:
226
- self.add_partition_columns_to_schema(
226
+ add_partition_columns_to_schema(
227
227
  fields=fields, path_spec=path_spec, full_path=table_data.full_path
228
228
  )
229
229
 
230
230
  return fields
231
231
 
232
- def add_partition_columns_to_schema(
233
- self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
234
- ) -> None:
235
- vars = path_spec.get_named_vars(full_path)
236
- if vars is not None and "partition" in vars:
237
- for partition in vars["partition"].values():
238
- partition_arr = partition.split("=")
239
- if len(partition_arr) != 2:
240
- logger.debug(
241
- f"Could not derive partition key from partition field {partition}"
242
- )
243
- continue
244
- partition_key = partition_arr[0]
245
- fields.append(
246
- SchemaField(
247
- fieldPath=f"{partition_key}",
248
- nativeDataType="string",
249
- type=SchemaFieldDataType(StringTypeClass()),
250
- isPartitioningKey=True,
251
- nullable=True,
252
- recursive=False,
253
- )
254
- )
255
-
256
232
  def _create_table_operation_aspect(self, table_data: TableData) -> OperationClass:
257
233
  reported_time = int(time.time() * 1000)
258
234
 
@@ -75,6 +75,7 @@ from datahub.ingestion.source.aws.tag_entities import (
75
75
  from datahub.ingestion.source.common.subtypes import (
76
76
  DatasetContainerSubTypes,
77
77
  DatasetSubTypes,
78
+ SourceCapabilityModifier,
78
79
  )
79
80
  from datahub.ingestion.source.glue_profiling_config import GlueProfilingConfig
80
81
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
@@ -275,6 +276,13 @@ class GlueSourceReport(StaleEntityRemovalSourceReport):
275
276
  @capability(
276
277
  SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field"
277
278
  )
279
+ @capability(
280
+ SourceCapability.CONTAINERS,
281
+ "Enabled by default",
282
+ subtype_modifier=[
283
+ SourceCapabilityModifier.DATABASE,
284
+ ],
285
+ )
278
286
  class GlueSource(StatefulIngestionSourceBase):
279
287
  """
280
288
  Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
@@ -296,13 +296,11 @@ class CassandraSource(StatefulIngestionSourceBase):
296
296
  qualified_name=dataset_name,
297
297
  description=view.comment,
298
298
  custom_properties=self._get_dataset_custom_props(view),
299
- extra_aspects=[
300
- ViewPropertiesClass(
301
- materialized=True,
302
- viewLogic=view.where_clause, # Use the WHERE clause as view logic
303
- viewLanguage="CQL", # Use "CQL" as the language
304
- ),
305
- ],
299
+ view_definition=ViewPropertiesClass(
300
+ materialized=True,
301
+ viewLogic=view.where_clause, # Use the WHERE clause as view logic
302
+ viewLanguage="CQL", # Use "CQL" as the language
303
+ ),
306
304
  )
307
305
 
308
306
  # Construct and emit lineage off of 'base_table_name'
@@ -69,6 +69,8 @@ class BIContainerSubTypes(StrEnum):
69
69
  SIGMA_WORKSPACE = "Sigma Workspace"
70
70
  SIGMA_WORKBOOK = "Sigma Workbook"
71
71
  MODE_COLLECTION = "Collection"
72
+ GRAFANA_FOLDER = "Folder"
73
+ GRAFANA_DASHBOARD = "Dashboard"
72
74
 
73
75
 
74
76
  class FlowContainerSubTypes(StrEnum):
@@ -25,10 +25,16 @@ from datahub.ingestion.source.data_lake_common.object_store import (
25
25
  get_object_store_bucket_name,
26
26
  get_object_store_for_uri,
27
27
  )
28
+ from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
28
29
  from datahub.ingestion.source.gcs.gcs_utils import (
29
30
  get_gcs_prefix,
30
31
  is_gcs_uri,
31
32
  )
33
+ from datahub.metadata.schema_classes import (
34
+ SchemaFieldClass,
35
+ SchemaFieldDataTypeClass,
36
+ StringTypeClass,
37
+ )
32
38
 
33
39
  # hide annoying debug errors from py4j
34
40
  logging.getLogger("py4j").setLevel(logging.ERROR)
@@ -39,6 +45,37 @@ PLATFORM_GCS = "gcs"
39
45
  PLATFORM_ABS = "abs"
40
46
 
41
47
 
48
+ def add_partition_columns_to_schema(
49
+ path_spec: PathSpec, full_path: str, fields: List[SchemaFieldClass]
50
+ ) -> None:
51
+ # Check if using fieldPath v2 format
52
+ is_fieldpath_v2 = any(
53
+ field.fieldPath.startswith("[version=2.0]") for field in fields
54
+ )
55
+
56
+ # Extract partition information from path
57
+ partition_keys = path_spec.get_partition_from_path(full_path)
58
+ if not partition_keys:
59
+ return
60
+
61
+ # Add partition fields to schema
62
+ for partition_key in partition_keys:
63
+ fields.append(
64
+ SchemaFieldClass(
65
+ fieldPath=(
66
+ f"{partition_key[0]}"
67
+ if not is_fieldpath_v2
68
+ else f"[version=2.0].[type=string].{partition_key[0]}"
69
+ ),
70
+ nativeDataType="string",
71
+ type=SchemaFieldDataTypeClass(StringTypeClass()),
72
+ isPartitioningKey=True,
73
+ nullable=False,
74
+ recursive=False,
75
+ )
76
+ )
77
+
78
+
42
79
  class ContainerWUCreator:
43
80
  processed_containers: List[str]
44
81
 
@@ -6,7 +6,9 @@ from typing import Dict, Iterable, List, Optional
6
6
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
7
7
  from datahub.ingestion.api.common import PipelineContext
8
8
  from datahub.ingestion.api.decorators import (
9
+ SourceCapability,
9
10
  SupportStatus,
11
+ capability,
10
12
  config_class,
11
13
  platform_name,
12
14
  support_status,
@@ -37,6 +39,7 @@ logger = logging.getLogger(__name__)
37
39
  @platform_name("DataHub")
38
40
  @config_class(DataHubSourceConfig)
39
41
  @support_status(SupportStatus.TESTING)
42
+ @capability(SourceCapability.CONTAINERS, "Enabled by default")
40
43
  class DataHubSource(StatefulIngestionSourceBase):
41
44
  platform: str = "datahub"
42
45
 
@@ -145,6 +145,9 @@ class DBTSourceReport(StaleEntityRemovalSourceReport):
145
145
 
146
146
  nodes_filtered: LossyList[str] = field(default_factory=LossyList)
147
147
 
148
+ duplicate_sources_dropped: Optional[int] = None
149
+ duplicate_sources_references_updated: Optional[int] = None
150
+
148
151
 
149
152
  class EmitDirective(ConfigEnum):
150
153
  """A holder for directives for emission for specific types of entities"""
@@ -370,6 +373,12 @@ class DBTCommonConfig(
370
373
  "Set to False to skip it for engines like AWS Athena where it's not required.",
371
374
  )
372
375
 
376
+ drop_duplicate_sources: bool = Field(
377
+ default=True,
378
+ description="When enabled, drops sources that have the same name in the target platform as a model. "
379
+ "This ensures that lineage is generated reliably, but will lose any documentation associated only with the source.",
380
+ )
381
+
373
382
  @validator("target_platform")
374
383
  def validate_target_platform_value(cls, target_platform: str) -> str:
375
384
  if target_platform.lower() == DBT_PLATFORM:
@@ -509,7 +518,7 @@ class DBTNode:
509
518
  raw_code: Optional[str]
510
519
 
511
520
  dbt_adapter: str
512
- dbt_name: str
521
+ dbt_name: str # dbt unique identifier
513
522
  dbt_file_path: Optional[str]
514
523
  dbt_package_name: Optional[str] # this is pretty much always present
515
524
 
@@ -975,6 +984,8 @@ class DBTSourceBase(StatefulIngestionSourceBase):
975
984
  self._infer_schemas_and_update_cll(all_nodes_map)
976
985
 
977
986
  nodes = self._filter_nodes(all_nodes)
987
+ nodes = self._drop_duplicate_sources(nodes)
988
+
978
989
  non_test_nodes = [
979
990
  dataset_node for dataset_node in nodes if dataset_node.node_type != "test"
980
991
  ]
@@ -1000,7 +1011,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1000
1011
  return self.config.node_name_pattern.allowed(key)
1001
1012
 
1002
1013
  def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]:
1003
- nodes = []
1014
+ nodes: List[DBTNode] = []
1004
1015
  for node in all_nodes:
1005
1016
  key = node.dbt_name
1006
1017
 
@@ -1012,6 +1023,62 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1012
1023
 
1013
1024
  return nodes
1014
1025
 
1026
+ def _drop_duplicate_sources(self, original_nodes: List[DBTNode]) -> List[DBTNode]:
1027
+ """Detect and correct cases where a model and source have the same name.
1028
+
1029
+ In these cases, we don't want to generate both because they'll have the same
1030
+ urn and hence overwrite each other. Instead, we drop the source and update
1031
+ references to it to point at the model.
1032
+
1033
+ The risk here is that the source might have documentation that'd be lost,
1034
+ which is why we maintain optionality with a config flag.
1035
+ """
1036
+ if not self.config.drop_duplicate_sources:
1037
+ return original_nodes
1038
+
1039
+ self.report.duplicate_sources_dropped = 0
1040
+ self.report.duplicate_sources_references_updated = 0
1041
+
1042
+ # Pass 1 - find all model names in the warehouse.
1043
+ warehouse_model_names: Dict[str, str] = {} # warehouse name -> model unique id
1044
+ for node in original_nodes:
1045
+ if node.node_type == "model" and node.exists_in_target_platform:
1046
+ warehouse_model_names[node.get_db_fqn()] = node.dbt_name
1047
+
1048
+ # Pass 2 - identify + drop duplicate sources.
1049
+ source_references_to_update: Dict[
1050
+ str, str
1051
+ ] = {} # source unique id -> model unique id
1052
+ nodes: List[DBTNode] = []
1053
+ for node in original_nodes:
1054
+ if (
1055
+ node.node_type == "source"
1056
+ and node.exists_in_target_platform
1057
+ and (model_name := warehouse_model_names.get(node.get_db_fqn()))
1058
+ ):
1059
+ self.report.warning(
1060
+ title="Duplicate model and source names detected",
1061
+ message="We found a dbt model and dbt source with the same name. To ensure reliable lineage generation, the source node was ignored. "
1062
+ "If you associated documentation/tags/other metadata with the source, it will be lost. "
1063
+ "To avoid this, you should remove the source node from your dbt project and replace any `source(<source_name>)` calls with `ref(<model_name>)`.",
1064
+ context=f"{node.dbt_name} (called {node.get_db_fqn()} in {self.config.target_platform}) duplicates {model_name}",
1065
+ )
1066
+ self.report.duplicate_sources_dropped += 1
1067
+ source_references_to_update[node.dbt_name] = model_name
1068
+ else:
1069
+ nodes.append(node)
1070
+
1071
+ # Pass 3 - update references to the dropped sources.
1072
+ for node in nodes:
1073
+ for i, current_upstream in enumerate(node.upstream_nodes):
1074
+ if current_upstream in source_references_to_update:
1075
+ node.upstream_nodes[i] = source_references_to_update[
1076
+ current_upstream
1077
+ ]
1078
+ self.report.duplicate_sources_references_updated += 1
1079
+
1080
+ return nodes
1081
+
1015
1082
  @staticmethod
1016
1083
  def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo:
1017
1084
  return {column.fieldPath: column.nativeDataType for column in schema_fields}
@@ -85,6 +85,7 @@ OPERATION_STATEMENT_TYPES = {
85
85
  @config_class(DeltaLakeSourceConfig)
86
86
  @support_status(SupportStatus.INCUBATING)
87
87
  @capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled")
88
+ @capability(SourceCapability.CONTAINERS, "Enabled by default")
88
89
  class DeltaLakeSource(StatefulIngestionSourceBase):
89
90
  """
90
91
  This plugin extracts:
@@ -1497,9 +1497,17 @@ class DatahubGEProfiler:
1497
1497
  logger.error(
1498
1498
  f"Unexpected {pretty_name} while profiling. Should have 3 parts but has {len(name_parts)} parts."
1499
1499
  )
1500
+ if platform == DATABRICKS:
1501
+ # TODO: Review logic for BigQuery as well, probably project.dataset.table should be quoted there as well
1502
+ quoted_name = ".".join(
1503
+ batch.engine.dialect.identifier_preparer.quote(part)
1504
+ for part in name_parts
1505
+ )
1506
+ batch._table = sa.text(quoted_name)
1507
+ logger.debug(f"Setting quoted table name to be {batch._table}")
1500
1508
  # If we only have two parts that means the project_id is missing from the table name and we add it
1501
1509
  # Temp tables has 3 parts while normal tables only has 2 parts
1502
- if len(str(batch._table).split(".")) == 2:
1510
+ elif len(str(batch._table).split(".")) == 2:
1503
1511
  batch._table = sa.text(f"{name_parts[0]}.{str(batch._table)}")
1504
1512
  logger.debug(f"Setting table name to be {batch._table}")
1505
1513
 
@@ -0,0 +1,272 @@
1
+ from typing import Dict, List, Optional, Tuple
2
+
3
+ from datahub.emitter.mce_builder import (
4
+ make_chart_urn,
5
+ make_dashboard_urn,
6
+ make_data_platform_urn,
7
+ make_dataplatform_instance_urn,
8
+ make_dataset_urn_with_platform_instance,
9
+ make_tag_urn,
10
+ make_user_urn,
11
+ )
12
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
13
+ from datahub.ingestion.source.grafana.models import Dashboard, Panel
14
+ from datahub.ingestion.source.grafana.types import CHART_TYPE_MAPPINGS
15
+ from datahub.metadata.schema_classes import (
16
+ ChangeAuditStampsClass,
17
+ ChartInfoClass,
18
+ DashboardInfoClass,
19
+ DataPlatformInstanceClass,
20
+ GlobalTagsClass,
21
+ OwnerClass,
22
+ OwnershipClass,
23
+ OwnershipTypeClass,
24
+ StatusClass,
25
+ TagAssociationClass,
26
+ )
27
+
28
+
29
+ def build_chart_mcps(
30
+ panel: Panel,
31
+ dashboard: Dashboard,
32
+ platform: str,
33
+ platform_instance: Optional[str],
34
+ env: str,
35
+ base_url: str,
36
+ ingest_tags: bool,
37
+ ) -> Tuple[Optional[str], str, List[MetadataChangeProposalWrapper]]:
38
+ """Build chart metadata change proposals"""
39
+ ds_urn = None
40
+ mcps = []
41
+
42
+ chart_urn = make_chart_urn(
43
+ platform,
44
+ f"{dashboard.uid}.{panel.id}",
45
+ platform_instance,
46
+ )
47
+
48
+ # Platform instance aspect
49
+ mcps.append(
50
+ MetadataChangeProposalWrapper(
51
+ entityUrn=chart_urn,
52
+ aspect=DataPlatformInstanceClass(
53
+ platform=make_data_platform_urn(platform),
54
+ instance=make_dataplatform_instance_urn(
55
+ platform=platform,
56
+ instance=platform_instance,
57
+ )
58
+ if platform_instance
59
+ else None,
60
+ ),
61
+ )
62
+ )
63
+
64
+ # Status aspect
65
+ mcps.append(
66
+ MetadataChangeProposalWrapper(
67
+ entityUrn=chart_urn,
68
+ aspect=StatusClass(removed=False),
69
+ )
70
+ )
71
+
72
+ # Get input datasets
73
+ input_datasets = []
74
+ if panel.datasource_ref:
75
+ ds_type = panel.datasource_ref.type or "unknown"
76
+ ds_uid = panel.datasource_ref.uid or "unknown"
77
+
78
+ # Add Grafana dataset
79
+ dataset_name = f"{ds_type}.{ds_uid}.{panel.id}"
80
+ ds_urn = make_dataset_urn_with_platform_instance(
81
+ platform=platform,
82
+ name=dataset_name,
83
+ platform_instance=platform_instance,
84
+ env=env,
85
+ )
86
+ input_datasets.append(ds_urn)
87
+
88
+ # Chart info aspect
89
+ title = panel.title or f"Panel {panel.id}"
90
+ mcps.append(
91
+ MetadataChangeProposalWrapper(
92
+ entityUrn=chart_urn,
93
+ aspect=ChartInfoClass(
94
+ type=CHART_TYPE_MAPPINGS.get(panel.type) if panel.type else None,
95
+ description=panel.description,
96
+ title=title,
97
+ lastModified=ChangeAuditStampsClass(),
98
+ chartUrl=f"{base_url}/d/{dashboard.uid}?viewPanel={panel.id}",
99
+ customProperties=_build_custom_properties(panel),
100
+ inputs=input_datasets,
101
+ ),
102
+ )
103
+ )
104
+
105
+ # Tags aspect
106
+ if dashboard.tags and ingest_tags:
107
+ tags = []
108
+ for tag in dashboard.tags:
109
+ if ":" in tag:
110
+ key, value = tag.split(":", 1)
111
+ tag_urn = make_tag_urn(f"{key}.{value}")
112
+ else:
113
+ tag_urn = make_tag_urn(tag)
114
+ tags.append(TagAssociationClass(tag=tag_urn))
115
+
116
+ if tags:
117
+ mcps.append(
118
+ MetadataChangeProposalWrapper(
119
+ entityUrn=chart_urn,
120
+ aspect=GlobalTagsClass(tags=tags),
121
+ )
122
+ )
123
+
124
+ return ds_urn, chart_urn, mcps
125
+
126
+
127
+ def build_dashboard_mcps(
128
+ dashboard: Dashboard,
129
+ platform: str,
130
+ platform_instance: Optional[str],
131
+ chart_urns: List[str],
132
+ base_url: str,
133
+ ingest_owners: bool,
134
+ ingest_tags: bool,
135
+ ) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
136
+ """Build dashboard metadata change proposals"""
137
+ mcps = []
138
+ dashboard_urn = make_dashboard_urn(platform, dashboard.uid, platform_instance)
139
+
140
+ # Platform instance aspect
141
+ mcps.append(
142
+ MetadataChangeProposalWrapper(
143
+ entityUrn=dashboard_urn,
144
+ aspect=DataPlatformInstanceClass(
145
+ platform=make_data_platform_urn(platform),
146
+ instance=make_dataplatform_instance_urn(
147
+ platform=platform,
148
+ instance=platform_instance,
149
+ )
150
+ if platform_instance
151
+ else None,
152
+ ),
153
+ )
154
+ )
155
+
156
+ # Dashboard info aspect
157
+ mcps.append(
158
+ MetadataChangeProposalWrapper(
159
+ entityUrn=dashboard_urn,
160
+ aspect=DashboardInfoClass(
161
+ description=dashboard.description,
162
+ title=dashboard.title,
163
+ charts=chart_urns,
164
+ lastModified=ChangeAuditStampsClass(),
165
+ dashboardUrl=f"{base_url}/d/{dashboard.uid}",
166
+ customProperties=_build_dashboard_properties(dashboard),
167
+ ),
168
+ )
169
+ )
170
+
171
+ # Ownership aspect
172
+ if dashboard.uid and ingest_owners:
173
+ owner = _build_ownership(dashboard)
174
+ if owner:
175
+ mcps.append(
176
+ MetadataChangeProposalWrapper(
177
+ entityUrn=dashboard_urn,
178
+ aspect=owner,
179
+ )
180
+ )
181
+
182
+ # Tags aspect
183
+ if dashboard.tags and ingest_tags:
184
+ tags = [TagAssociationClass(tag=make_tag_urn(tag)) for tag in dashboard.tags]
185
+ if tags:
186
+ mcps.append(
187
+ MetadataChangeProposalWrapper(
188
+ entityUrn=dashboard_urn,
189
+ aspect=GlobalTagsClass(tags=tags),
190
+ )
191
+ )
192
+
193
+ # Status aspect
194
+ mcps.append(
195
+ MetadataChangeProposalWrapper(
196
+ entityUrn=dashboard_urn,
197
+ aspect=StatusClass(removed=False),
198
+ )
199
+ )
200
+
201
+ return dashboard_urn, mcps
202
+
203
+
204
+ def _build_custom_properties(panel: Panel) -> Dict[str, str]:
205
+ """Build custom properties for chart"""
206
+ props = {}
207
+
208
+ if panel.type:
209
+ props["type"] = panel.type
210
+
211
+ if panel.datasource_ref:
212
+ props["datasourceType"] = panel.datasource_ref.type or ""
213
+ props["datasourceUid"] = panel.datasource_ref.uid or ""
214
+
215
+ for key in [
216
+ "description",
217
+ "format",
218
+ "pluginVersion",
219
+ "repeatDirection",
220
+ "maxDataPoints",
221
+ ]:
222
+ value = getattr(panel, key, None)
223
+ if value:
224
+ props[key] = str(value)
225
+
226
+ if panel.query_targets:
227
+ props["targetsCount"] = str(len(panel.query_targets))
228
+
229
+ return props
230
+
231
+
232
+ def _build_dashboard_properties(dashboard: Dashboard) -> Dict[str, str]:
233
+ """Build custom properties for dashboard"""
234
+ props = {}
235
+
236
+ if dashboard.timezone:
237
+ props["timezone"] = dashboard.timezone
238
+
239
+ if dashboard.schema_version:
240
+ props["schema_version"] = dashboard.schema_version
241
+
242
+ if dashboard.version:
243
+ props["version"] = dashboard.version
244
+
245
+ if dashboard.refresh:
246
+ props["refresh"] = dashboard.refresh
247
+
248
+ return props
249
+
250
+
251
+ def _build_ownership(dashboard: Dashboard) -> Optional[OwnershipClass]:
252
+ """Build ownership information"""
253
+ owners = []
254
+
255
+ if dashboard.uid:
256
+ owners.append(
257
+ OwnerClass(
258
+ owner=make_user_urn(dashboard.uid),
259
+ type=OwnershipTypeClass.TECHNICAL_OWNER,
260
+ )
261
+ )
262
+
263
+ if dashboard.created_by:
264
+ owner_id = dashboard.created_by.split("@")[0]
265
+ owners.append(
266
+ OwnerClass(
267
+ owner=make_user_urn(owner_id),
268
+ type=OwnershipTypeClass.DATAOWNER,
269
+ )
270
+ )
271
+
272
+ return OwnershipClass(owners=owners) if owners else None