acryl-datahub 1.2.0.1__py3-none-any.whl → 1.2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/METADATA +2574 -2572
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/RECORD +54 -46
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +13 -1
- datahub/emitter/rest_emitter.py +3 -1
- datahub/ingestion/autogenerated/capability_summary.json +97 -6
- datahub/ingestion/source/abs/source.py +5 -29
- datahub/ingestion/source/aws/glue.py +8 -0
- datahub/ingestion/source/cassandra/cassandra.py +5 -7
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
- datahub/ingestion/source/datahub/datahub_source.py +3 -0
- datahub/ingestion/source/dbt/dbt_common.py +69 -2
- datahub/ingestion/source/delta_lake/source.py +1 -0
- datahub/ingestion/source/ge_data_profiler.py +9 -1
- datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
- datahub/ingestion/source/grafana/field_utils.py +307 -0
- datahub/ingestion/source/grafana/grafana_api.py +142 -0
- datahub/ingestion/source/grafana/grafana_config.py +104 -0
- datahub/ingestion/source/grafana/grafana_source.py +522 -84
- datahub/ingestion/source/grafana/lineage.py +202 -0
- datahub/ingestion/source/grafana/models.py +120 -0
- datahub/ingestion/source/grafana/report.py +91 -0
- datahub/ingestion/source/grafana/types.py +16 -0
- datahub/ingestion/source/hex/hex.py +8 -0
- datahub/ingestion/source/looker/looker_common.py +40 -4
- datahub/ingestion/source/looker/looker_source.py +9 -0
- datahub/ingestion/source/looker/lookml_source.py +8 -0
- datahub/ingestion/source/mongodb.py +11 -1
- datahub/ingestion/source/redshift/redshift.py +8 -1
- datahub/ingestion/source/s3/source.py +14 -34
- datahub/ingestion/source/sql/athena.py +8 -2
- datahub/ingestion/source/sql/clickhouse.py +9 -0
- datahub/ingestion/source/sql/postgres.py +190 -1
- datahub/ingestion/source/sql_queries.py +111 -76
- datahub/ingestion/source/unity/proxy.py +8 -8
- datahub/metadata/_internal_schema_classes.py +96 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +2 -0
- datahub/metadata/schema.avsc +69 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +10 -1
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +42 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +18 -0
- datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
- datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
- datahub/metadata/schemas/SystemMetadata.avsc +61 -0
- datahub/sdk/dataset.py +44 -0
- datahub/sdk/search_filters.py +84 -15
- datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
- datahub/telemetry/telemetry.py +4 -1
- datahub/upgrade/upgrade.py +5 -3
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.1.dist-info → acryl_datahub-1.2.0.2.dist-info}/top_level.txt +0 -0
|
@@ -44,7 +44,10 @@ from datahub.ingestion.source.azure.abs_utils import (
|
|
|
44
44
|
get_key_prefix,
|
|
45
45
|
strip_abs_prefix,
|
|
46
46
|
)
|
|
47
|
-
from datahub.ingestion.source.data_lake_common.data_lake_utils import
|
|
47
|
+
from datahub.ingestion.source.data_lake_common.data_lake_utils import (
|
|
48
|
+
ContainerWUCreator,
|
|
49
|
+
add_partition_columns_to_schema,
|
|
50
|
+
)
|
|
48
51
|
from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet
|
|
49
52
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
50
53
|
StaleEntityRemovalHandler,
|
|
@@ -53,10 +56,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
53
56
|
StatefulIngestionSourceBase,
|
|
54
57
|
)
|
|
55
58
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
56
|
-
SchemaField,
|
|
57
|
-
SchemaFieldDataType,
|
|
58
59
|
SchemaMetadata,
|
|
59
|
-
StringTypeClass,
|
|
60
60
|
)
|
|
61
61
|
from datahub.metadata.schema_classes import (
|
|
62
62
|
DataPlatformInstanceClass,
|
|
@@ -223,36 +223,12 @@ class ABSSource(StatefulIngestionSourceBase):
|
|
|
223
223
|
fields = sorted(fields, key=lambda f: f.fieldPath)
|
|
224
224
|
|
|
225
225
|
if self.source_config.add_partition_columns_to_schema:
|
|
226
|
-
|
|
226
|
+
add_partition_columns_to_schema(
|
|
227
227
|
fields=fields, path_spec=path_spec, full_path=table_data.full_path
|
|
228
228
|
)
|
|
229
229
|
|
|
230
230
|
return fields
|
|
231
231
|
|
|
232
|
-
def add_partition_columns_to_schema(
|
|
233
|
-
self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
|
|
234
|
-
) -> None:
|
|
235
|
-
vars = path_spec.get_named_vars(full_path)
|
|
236
|
-
if vars is not None and "partition" in vars:
|
|
237
|
-
for partition in vars["partition"].values():
|
|
238
|
-
partition_arr = partition.split("=")
|
|
239
|
-
if len(partition_arr) != 2:
|
|
240
|
-
logger.debug(
|
|
241
|
-
f"Could not derive partition key from partition field {partition}"
|
|
242
|
-
)
|
|
243
|
-
continue
|
|
244
|
-
partition_key = partition_arr[0]
|
|
245
|
-
fields.append(
|
|
246
|
-
SchemaField(
|
|
247
|
-
fieldPath=f"{partition_key}",
|
|
248
|
-
nativeDataType="string",
|
|
249
|
-
type=SchemaFieldDataType(StringTypeClass()),
|
|
250
|
-
isPartitioningKey=True,
|
|
251
|
-
nullable=True,
|
|
252
|
-
recursive=False,
|
|
253
|
-
)
|
|
254
|
-
)
|
|
255
|
-
|
|
256
232
|
def _create_table_operation_aspect(self, table_data: TableData) -> OperationClass:
|
|
257
233
|
reported_time = int(time.time() * 1000)
|
|
258
234
|
|
|
@@ -75,6 +75,7 @@ from datahub.ingestion.source.aws.tag_entities import (
|
|
|
75
75
|
from datahub.ingestion.source.common.subtypes import (
|
|
76
76
|
DatasetContainerSubTypes,
|
|
77
77
|
DatasetSubTypes,
|
|
78
|
+
SourceCapabilityModifier,
|
|
78
79
|
)
|
|
79
80
|
from datahub.ingestion.source.glue_profiling_config import GlueProfilingConfig
|
|
80
81
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
@@ -275,6 +276,13 @@ class GlueSourceReport(StaleEntityRemovalSourceReport):
|
|
|
275
276
|
@capability(
|
|
276
277
|
SourceCapability.LINEAGE_FINE, "Support via the `emit_s3_lineage` config field"
|
|
277
278
|
)
|
|
279
|
+
@capability(
|
|
280
|
+
SourceCapability.CONTAINERS,
|
|
281
|
+
"Enabled by default",
|
|
282
|
+
subtype_modifier=[
|
|
283
|
+
SourceCapabilityModifier.DATABASE,
|
|
284
|
+
],
|
|
285
|
+
)
|
|
278
286
|
class GlueSource(StatefulIngestionSourceBase):
|
|
279
287
|
"""
|
|
280
288
|
Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
|
|
@@ -296,13 +296,11 @@ class CassandraSource(StatefulIngestionSourceBase):
|
|
|
296
296
|
qualified_name=dataset_name,
|
|
297
297
|
description=view.comment,
|
|
298
298
|
custom_properties=self._get_dataset_custom_props(view),
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
),
|
|
305
|
-
],
|
|
299
|
+
view_definition=ViewPropertiesClass(
|
|
300
|
+
materialized=True,
|
|
301
|
+
viewLogic=view.where_clause, # Use the WHERE clause as view logic
|
|
302
|
+
viewLanguage="CQL", # Use "CQL" as the language
|
|
303
|
+
),
|
|
306
304
|
)
|
|
307
305
|
|
|
308
306
|
# Construct and emit lineage off of 'base_table_name'
|
|
@@ -25,10 +25,16 @@ from datahub.ingestion.source.data_lake_common.object_store import (
|
|
|
25
25
|
get_object_store_bucket_name,
|
|
26
26
|
get_object_store_for_uri,
|
|
27
27
|
)
|
|
28
|
+
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
|
|
28
29
|
from datahub.ingestion.source.gcs.gcs_utils import (
|
|
29
30
|
get_gcs_prefix,
|
|
30
31
|
is_gcs_uri,
|
|
31
32
|
)
|
|
33
|
+
from datahub.metadata.schema_classes import (
|
|
34
|
+
SchemaFieldClass,
|
|
35
|
+
SchemaFieldDataTypeClass,
|
|
36
|
+
StringTypeClass,
|
|
37
|
+
)
|
|
32
38
|
|
|
33
39
|
# hide annoying debug errors from py4j
|
|
34
40
|
logging.getLogger("py4j").setLevel(logging.ERROR)
|
|
@@ -39,6 +45,37 @@ PLATFORM_GCS = "gcs"
|
|
|
39
45
|
PLATFORM_ABS = "abs"
|
|
40
46
|
|
|
41
47
|
|
|
48
|
+
def add_partition_columns_to_schema(
|
|
49
|
+
path_spec: PathSpec, full_path: str, fields: List[SchemaFieldClass]
|
|
50
|
+
) -> None:
|
|
51
|
+
# Check if using fieldPath v2 format
|
|
52
|
+
is_fieldpath_v2 = any(
|
|
53
|
+
field.fieldPath.startswith("[version=2.0]") for field in fields
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Extract partition information from path
|
|
57
|
+
partition_keys = path_spec.get_partition_from_path(full_path)
|
|
58
|
+
if not partition_keys:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
# Add partition fields to schema
|
|
62
|
+
for partition_key in partition_keys:
|
|
63
|
+
fields.append(
|
|
64
|
+
SchemaFieldClass(
|
|
65
|
+
fieldPath=(
|
|
66
|
+
f"{partition_key[0]}"
|
|
67
|
+
if not is_fieldpath_v2
|
|
68
|
+
else f"[version=2.0].[type=string].{partition_key[0]}"
|
|
69
|
+
),
|
|
70
|
+
nativeDataType="string",
|
|
71
|
+
type=SchemaFieldDataTypeClass(StringTypeClass()),
|
|
72
|
+
isPartitioningKey=True,
|
|
73
|
+
nullable=False,
|
|
74
|
+
recursive=False,
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
42
79
|
class ContainerWUCreator:
|
|
43
80
|
processed_containers: List[str]
|
|
44
81
|
|
|
@@ -6,7 +6,9 @@ from typing import Dict, Iterable, List, Optional
|
|
|
6
6
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
7
7
|
from datahub.ingestion.api.common import PipelineContext
|
|
8
8
|
from datahub.ingestion.api.decorators import (
|
|
9
|
+
SourceCapability,
|
|
9
10
|
SupportStatus,
|
|
11
|
+
capability,
|
|
10
12
|
config_class,
|
|
11
13
|
platform_name,
|
|
12
14
|
support_status,
|
|
@@ -37,6 +39,7 @@ logger = logging.getLogger(__name__)
|
|
|
37
39
|
@platform_name("DataHub")
|
|
38
40
|
@config_class(DataHubSourceConfig)
|
|
39
41
|
@support_status(SupportStatus.TESTING)
|
|
42
|
+
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
|
40
43
|
class DataHubSource(StatefulIngestionSourceBase):
|
|
41
44
|
platform: str = "datahub"
|
|
42
45
|
|
|
@@ -145,6 +145,9 @@ class DBTSourceReport(StaleEntityRemovalSourceReport):
|
|
|
145
145
|
|
|
146
146
|
nodes_filtered: LossyList[str] = field(default_factory=LossyList)
|
|
147
147
|
|
|
148
|
+
duplicate_sources_dropped: Optional[int] = None
|
|
149
|
+
duplicate_sources_references_updated: Optional[int] = None
|
|
150
|
+
|
|
148
151
|
|
|
149
152
|
class EmitDirective(ConfigEnum):
|
|
150
153
|
"""A holder for directives for emission for specific types of entities"""
|
|
@@ -370,6 +373,12 @@ class DBTCommonConfig(
|
|
|
370
373
|
"Set to False to skip it for engines like AWS Athena where it's not required.",
|
|
371
374
|
)
|
|
372
375
|
|
|
376
|
+
drop_duplicate_sources: bool = Field(
|
|
377
|
+
default=True,
|
|
378
|
+
description="When enabled, drops sources that have the same name in the target platform as a model. "
|
|
379
|
+
"This ensures that lineage is generated reliably, but will lose any documentation associated only with the source.",
|
|
380
|
+
)
|
|
381
|
+
|
|
373
382
|
@validator("target_platform")
|
|
374
383
|
def validate_target_platform_value(cls, target_platform: str) -> str:
|
|
375
384
|
if target_platform.lower() == DBT_PLATFORM:
|
|
@@ -509,7 +518,7 @@ class DBTNode:
|
|
|
509
518
|
raw_code: Optional[str]
|
|
510
519
|
|
|
511
520
|
dbt_adapter: str
|
|
512
|
-
dbt_name: str
|
|
521
|
+
dbt_name: str # dbt unique identifier
|
|
513
522
|
dbt_file_path: Optional[str]
|
|
514
523
|
dbt_package_name: Optional[str] # this is pretty much always present
|
|
515
524
|
|
|
@@ -975,6 +984,8 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
975
984
|
self._infer_schemas_and_update_cll(all_nodes_map)
|
|
976
985
|
|
|
977
986
|
nodes = self._filter_nodes(all_nodes)
|
|
987
|
+
nodes = self._drop_duplicate_sources(nodes)
|
|
988
|
+
|
|
978
989
|
non_test_nodes = [
|
|
979
990
|
dataset_node for dataset_node in nodes if dataset_node.node_type != "test"
|
|
980
991
|
]
|
|
@@ -1000,7 +1011,7 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1000
1011
|
return self.config.node_name_pattern.allowed(key)
|
|
1001
1012
|
|
|
1002
1013
|
def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]:
|
|
1003
|
-
nodes = []
|
|
1014
|
+
nodes: List[DBTNode] = []
|
|
1004
1015
|
for node in all_nodes:
|
|
1005
1016
|
key = node.dbt_name
|
|
1006
1017
|
|
|
@@ -1012,6 +1023,62 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1012
1023
|
|
|
1013
1024
|
return nodes
|
|
1014
1025
|
|
|
1026
|
+
def _drop_duplicate_sources(self, original_nodes: List[DBTNode]) -> List[DBTNode]:
|
|
1027
|
+
"""Detect and correct cases where a model and source have the same name.
|
|
1028
|
+
|
|
1029
|
+
In these cases, we don't want to generate both because they'll have the same
|
|
1030
|
+
urn and hence overwrite each other. Instead, we drop the source and update
|
|
1031
|
+
references to it to point at the model.
|
|
1032
|
+
|
|
1033
|
+
The risk here is that the source might have documentation that'd be lost,
|
|
1034
|
+
which is why we maintain optionality with a config flag.
|
|
1035
|
+
"""
|
|
1036
|
+
if not self.config.drop_duplicate_sources:
|
|
1037
|
+
return original_nodes
|
|
1038
|
+
|
|
1039
|
+
self.report.duplicate_sources_dropped = 0
|
|
1040
|
+
self.report.duplicate_sources_references_updated = 0
|
|
1041
|
+
|
|
1042
|
+
# Pass 1 - find all model names in the warehouse.
|
|
1043
|
+
warehouse_model_names: Dict[str, str] = {} # warehouse name -> model unique id
|
|
1044
|
+
for node in original_nodes:
|
|
1045
|
+
if node.node_type == "model" and node.exists_in_target_platform:
|
|
1046
|
+
warehouse_model_names[node.get_db_fqn()] = node.dbt_name
|
|
1047
|
+
|
|
1048
|
+
# Pass 2 - identify + drop duplicate sources.
|
|
1049
|
+
source_references_to_update: Dict[
|
|
1050
|
+
str, str
|
|
1051
|
+
] = {} # source unique id -> model unique id
|
|
1052
|
+
nodes: List[DBTNode] = []
|
|
1053
|
+
for node in original_nodes:
|
|
1054
|
+
if (
|
|
1055
|
+
node.node_type == "source"
|
|
1056
|
+
and node.exists_in_target_platform
|
|
1057
|
+
and (model_name := warehouse_model_names.get(node.get_db_fqn()))
|
|
1058
|
+
):
|
|
1059
|
+
self.report.warning(
|
|
1060
|
+
title="Duplicate model and source names detected",
|
|
1061
|
+
message="We found a dbt model and dbt source with the same name. To ensure reliable lineage generation, the source node was ignored. "
|
|
1062
|
+
"If you associated documentation/tags/other metadata with the source, it will be lost. "
|
|
1063
|
+
"To avoid this, you should remove the source node from your dbt project and replace any `source(<source_name>)` calls with `ref(<model_name>)`.",
|
|
1064
|
+
context=f"{node.dbt_name} (called {node.get_db_fqn()} in {self.config.target_platform}) duplicates {model_name}",
|
|
1065
|
+
)
|
|
1066
|
+
self.report.duplicate_sources_dropped += 1
|
|
1067
|
+
source_references_to_update[node.dbt_name] = model_name
|
|
1068
|
+
else:
|
|
1069
|
+
nodes.append(node)
|
|
1070
|
+
|
|
1071
|
+
# Pass 3 - update references to the dropped sources.
|
|
1072
|
+
for node in nodes:
|
|
1073
|
+
for i, current_upstream in enumerate(node.upstream_nodes):
|
|
1074
|
+
if current_upstream in source_references_to_update:
|
|
1075
|
+
node.upstream_nodes[i] = source_references_to_update[
|
|
1076
|
+
current_upstream
|
|
1077
|
+
]
|
|
1078
|
+
self.report.duplicate_sources_references_updated += 1
|
|
1079
|
+
|
|
1080
|
+
return nodes
|
|
1081
|
+
|
|
1015
1082
|
@staticmethod
|
|
1016
1083
|
def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo:
|
|
1017
1084
|
return {column.fieldPath: column.nativeDataType for column in schema_fields}
|
|
@@ -85,6 +85,7 @@ OPERATION_STATEMENT_TYPES = {
|
|
|
85
85
|
@config_class(DeltaLakeSourceConfig)
|
|
86
86
|
@support_status(SupportStatus.INCUBATING)
|
|
87
87
|
@capability(SourceCapability.TAGS, "Can extract S3 object/bucket tags if enabled")
|
|
88
|
+
@capability(SourceCapability.CONTAINERS, "Enabled by default")
|
|
88
89
|
class DeltaLakeSource(StatefulIngestionSourceBase):
|
|
89
90
|
"""
|
|
90
91
|
This plugin extracts:
|
|
@@ -1497,9 +1497,17 @@ class DatahubGEProfiler:
|
|
|
1497
1497
|
logger.error(
|
|
1498
1498
|
f"Unexpected {pretty_name} while profiling. Should have 3 parts but has {len(name_parts)} parts."
|
|
1499
1499
|
)
|
|
1500
|
+
if platform == DATABRICKS:
|
|
1501
|
+
# TODO: Review logic for BigQuery as well, probably project.dataset.table should be quoted there as well
|
|
1502
|
+
quoted_name = ".".join(
|
|
1503
|
+
batch.engine.dialect.identifier_preparer.quote(part)
|
|
1504
|
+
for part in name_parts
|
|
1505
|
+
)
|
|
1506
|
+
batch._table = sa.text(quoted_name)
|
|
1507
|
+
logger.debug(f"Setting quoted table name to be {batch._table}")
|
|
1500
1508
|
# If we only have two parts that means the project_id is missing from the table name and we add it
|
|
1501
1509
|
# Temp tables has 3 parts while normal tables only has 2 parts
|
|
1502
|
-
|
|
1510
|
+
elif len(str(batch._table).split(".")) == 2:
|
|
1503
1511
|
batch._table = sa.text(f"{name_parts[0]}.{str(batch._table)}")
|
|
1504
1512
|
logger.debug(f"Setting table name to be {batch._table}")
|
|
1505
1513
|
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
from typing import Dict, List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
from datahub.emitter.mce_builder import (
|
|
4
|
+
make_chart_urn,
|
|
5
|
+
make_dashboard_urn,
|
|
6
|
+
make_data_platform_urn,
|
|
7
|
+
make_dataplatform_instance_urn,
|
|
8
|
+
make_dataset_urn_with_platform_instance,
|
|
9
|
+
make_tag_urn,
|
|
10
|
+
make_user_urn,
|
|
11
|
+
)
|
|
12
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
13
|
+
from datahub.ingestion.source.grafana.models import Dashboard, Panel
|
|
14
|
+
from datahub.ingestion.source.grafana.types import CHART_TYPE_MAPPINGS
|
|
15
|
+
from datahub.metadata.schema_classes import (
|
|
16
|
+
ChangeAuditStampsClass,
|
|
17
|
+
ChartInfoClass,
|
|
18
|
+
DashboardInfoClass,
|
|
19
|
+
DataPlatformInstanceClass,
|
|
20
|
+
GlobalTagsClass,
|
|
21
|
+
OwnerClass,
|
|
22
|
+
OwnershipClass,
|
|
23
|
+
OwnershipTypeClass,
|
|
24
|
+
StatusClass,
|
|
25
|
+
TagAssociationClass,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_chart_mcps(
|
|
30
|
+
panel: Panel,
|
|
31
|
+
dashboard: Dashboard,
|
|
32
|
+
platform: str,
|
|
33
|
+
platform_instance: Optional[str],
|
|
34
|
+
env: str,
|
|
35
|
+
base_url: str,
|
|
36
|
+
ingest_tags: bool,
|
|
37
|
+
) -> Tuple[Optional[str], str, List[MetadataChangeProposalWrapper]]:
|
|
38
|
+
"""Build chart metadata change proposals"""
|
|
39
|
+
ds_urn = None
|
|
40
|
+
mcps = []
|
|
41
|
+
|
|
42
|
+
chart_urn = make_chart_urn(
|
|
43
|
+
platform,
|
|
44
|
+
f"{dashboard.uid}.{panel.id}",
|
|
45
|
+
platform_instance,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Platform instance aspect
|
|
49
|
+
mcps.append(
|
|
50
|
+
MetadataChangeProposalWrapper(
|
|
51
|
+
entityUrn=chart_urn,
|
|
52
|
+
aspect=DataPlatformInstanceClass(
|
|
53
|
+
platform=make_data_platform_urn(platform),
|
|
54
|
+
instance=make_dataplatform_instance_urn(
|
|
55
|
+
platform=platform,
|
|
56
|
+
instance=platform_instance,
|
|
57
|
+
)
|
|
58
|
+
if platform_instance
|
|
59
|
+
else None,
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Status aspect
|
|
65
|
+
mcps.append(
|
|
66
|
+
MetadataChangeProposalWrapper(
|
|
67
|
+
entityUrn=chart_urn,
|
|
68
|
+
aspect=StatusClass(removed=False),
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Get input datasets
|
|
73
|
+
input_datasets = []
|
|
74
|
+
if panel.datasource_ref:
|
|
75
|
+
ds_type = panel.datasource_ref.type or "unknown"
|
|
76
|
+
ds_uid = panel.datasource_ref.uid or "unknown"
|
|
77
|
+
|
|
78
|
+
# Add Grafana dataset
|
|
79
|
+
dataset_name = f"{ds_type}.{ds_uid}.{panel.id}"
|
|
80
|
+
ds_urn = make_dataset_urn_with_platform_instance(
|
|
81
|
+
platform=platform,
|
|
82
|
+
name=dataset_name,
|
|
83
|
+
platform_instance=platform_instance,
|
|
84
|
+
env=env,
|
|
85
|
+
)
|
|
86
|
+
input_datasets.append(ds_urn)
|
|
87
|
+
|
|
88
|
+
# Chart info aspect
|
|
89
|
+
title = panel.title or f"Panel {panel.id}"
|
|
90
|
+
mcps.append(
|
|
91
|
+
MetadataChangeProposalWrapper(
|
|
92
|
+
entityUrn=chart_urn,
|
|
93
|
+
aspect=ChartInfoClass(
|
|
94
|
+
type=CHART_TYPE_MAPPINGS.get(panel.type) if panel.type else None,
|
|
95
|
+
description=panel.description,
|
|
96
|
+
title=title,
|
|
97
|
+
lastModified=ChangeAuditStampsClass(),
|
|
98
|
+
chartUrl=f"{base_url}/d/{dashboard.uid}?viewPanel={panel.id}",
|
|
99
|
+
customProperties=_build_custom_properties(panel),
|
|
100
|
+
inputs=input_datasets,
|
|
101
|
+
),
|
|
102
|
+
)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Tags aspect
|
|
106
|
+
if dashboard.tags and ingest_tags:
|
|
107
|
+
tags = []
|
|
108
|
+
for tag in dashboard.tags:
|
|
109
|
+
if ":" in tag:
|
|
110
|
+
key, value = tag.split(":", 1)
|
|
111
|
+
tag_urn = make_tag_urn(f"{key}.{value}")
|
|
112
|
+
else:
|
|
113
|
+
tag_urn = make_tag_urn(tag)
|
|
114
|
+
tags.append(TagAssociationClass(tag=tag_urn))
|
|
115
|
+
|
|
116
|
+
if tags:
|
|
117
|
+
mcps.append(
|
|
118
|
+
MetadataChangeProposalWrapper(
|
|
119
|
+
entityUrn=chart_urn,
|
|
120
|
+
aspect=GlobalTagsClass(tags=tags),
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return ds_urn, chart_urn, mcps
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def build_dashboard_mcps(
|
|
128
|
+
dashboard: Dashboard,
|
|
129
|
+
platform: str,
|
|
130
|
+
platform_instance: Optional[str],
|
|
131
|
+
chart_urns: List[str],
|
|
132
|
+
base_url: str,
|
|
133
|
+
ingest_owners: bool,
|
|
134
|
+
ingest_tags: bool,
|
|
135
|
+
) -> Tuple[str, List[MetadataChangeProposalWrapper]]:
|
|
136
|
+
"""Build dashboard metadata change proposals"""
|
|
137
|
+
mcps = []
|
|
138
|
+
dashboard_urn = make_dashboard_urn(platform, dashboard.uid, platform_instance)
|
|
139
|
+
|
|
140
|
+
# Platform instance aspect
|
|
141
|
+
mcps.append(
|
|
142
|
+
MetadataChangeProposalWrapper(
|
|
143
|
+
entityUrn=dashboard_urn,
|
|
144
|
+
aspect=DataPlatformInstanceClass(
|
|
145
|
+
platform=make_data_platform_urn(platform),
|
|
146
|
+
instance=make_dataplatform_instance_urn(
|
|
147
|
+
platform=platform,
|
|
148
|
+
instance=platform_instance,
|
|
149
|
+
)
|
|
150
|
+
if platform_instance
|
|
151
|
+
else None,
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Dashboard info aspect
|
|
157
|
+
mcps.append(
|
|
158
|
+
MetadataChangeProposalWrapper(
|
|
159
|
+
entityUrn=dashboard_urn,
|
|
160
|
+
aspect=DashboardInfoClass(
|
|
161
|
+
description=dashboard.description,
|
|
162
|
+
title=dashboard.title,
|
|
163
|
+
charts=chart_urns,
|
|
164
|
+
lastModified=ChangeAuditStampsClass(),
|
|
165
|
+
dashboardUrl=f"{base_url}/d/{dashboard.uid}",
|
|
166
|
+
customProperties=_build_dashboard_properties(dashboard),
|
|
167
|
+
),
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Ownership aspect
|
|
172
|
+
if dashboard.uid and ingest_owners:
|
|
173
|
+
owner = _build_ownership(dashboard)
|
|
174
|
+
if owner:
|
|
175
|
+
mcps.append(
|
|
176
|
+
MetadataChangeProposalWrapper(
|
|
177
|
+
entityUrn=dashboard_urn,
|
|
178
|
+
aspect=owner,
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Tags aspect
|
|
183
|
+
if dashboard.tags and ingest_tags:
|
|
184
|
+
tags = [TagAssociationClass(tag=make_tag_urn(tag)) for tag in dashboard.tags]
|
|
185
|
+
if tags:
|
|
186
|
+
mcps.append(
|
|
187
|
+
MetadataChangeProposalWrapper(
|
|
188
|
+
entityUrn=dashboard_urn,
|
|
189
|
+
aspect=GlobalTagsClass(tags=tags),
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Status aspect
|
|
194
|
+
mcps.append(
|
|
195
|
+
MetadataChangeProposalWrapper(
|
|
196
|
+
entityUrn=dashboard_urn,
|
|
197
|
+
aspect=StatusClass(removed=False),
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
return dashboard_urn, mcps
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _build_custom_properties(panel: Panel) -> Dict[str, str]:
|
|
205
|
+
"""Build custom properties for chart"""
|
|
206
|
+
props = {}
|
|
207
|
+
|
|
208
|
+
if panel.type:
|
|
209
|
+
props["type"] = panel.type
|
|
210
|
+
|
|
211
|
+
if panel.datasource_ref:
|
|
212
|
+
props["datasourceType"] = panel.datasource_ref.type or ""
|
|
213
|
+
props["datasourceUid"] = panel.datasource_ref.uid or ""
|
|
214
|
+
|
|
215
|
+
for key in [
|
|
216
|
+
"description",
|
|
217
|
+
"format",
|
|
218
|
+
"pluginVersion",
|
|
219
|
+
"repeatDirection",
|
|
220
|
+
"maxDataPoints",
|
|
221
|
+
]:
|
|
222
|
+
value = getattr(panel, key, None)
|
|
223
|
+
if value:
|
|
224
|
+
props[key] = str(value)
|
|
225
|
+
|
|
226
|
+
if panel.query_targets:
|
|
227
|
+
props["targetsCount"] = str(len(panel.query_targets))
|
|
228
|
+
|
|
229
|
+
return props
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _build_dashboard_properties(dashboard: Dashboard) -> Dict[str, str]:
|
|
233
|
+
"""Build custom properties for dashboard"""
|
|
234
|
+
props = {}
|
|
235
|
+
|
|
236
|
+
if dashboard.timezone:
|
|
237
|
+
props["timezone"] = dashboard.timezone
|
|
238
|
+
|
|
239
|
+
if dashboard.schema_version:
|
|
240
|
+
props["schema_version"] = dashboard.schema_version
|
|
241
|
+
|
|
242
|
+
if dashboard.version:
|
|
243
|
+
props["version"] = dashboard.version
|
|
244
|
+
|
|
245
|
+
if dashboard.refresh:
|
|
246
|
+
props["refresh"] = dashboard.refresh
|
|
247
|
+
|
|
248
|
+
return props
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _build_ownership(dashboard: Dashboard) -> Optional[OwnershipClass]:
|
|
252
|
+
"""Build ownership information"""
|
|
253
|
+
owners = []
|
|
254
|
+
|
|
255
|
+
if dashboard.uid:
|
|
256
|
+
owners.append(
|
|
257
|
+
OwnerClass(
|
|
258
|
+
owner=make_user_urn(dashboard.uid),
|
|
259
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if dashboard.created_by:
|
|
264
|
+
owner_id = dashboard.created_by.split("@")[0]
|
|
265
|
+
owners.append(
|
|
266
|
+
OwnerClass(
|
|
267
|
+
owner=make_user_urn(owner_id),
|
|
268
|
+
type=OwnershipTypeClass.DATAOWNER,
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
return OwnershipClass(owners=owners) if owners else None
|