acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/METADATA +2558 -2531
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/RECORD +221 -187
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -1
- datahub/api/entities/external/__init__.py +0 -0
- datahub/api/entities/external/external_entities.py +239 -0
- datahub/api/entities/external/external_tag.py +145 -0
- datahub/api/entities/external/lake_formation_external_entites.py +161 -0
- datahub/api/entities/external/restricted_text.py +247 -0
- datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
- datahub/cli/check_cli.py +88 -7
- datahub/cli/cli_utils.py +63 -0
- datahub/cli/container_cli.py +5 -0
- datahub/cli/delete_cli.py +124 -27
- datahub/cli/docker_check.py +107 -12
- datahub/cli/docker_cli.py +149 -227
- datahub/cli/exists_cli.py +0 -2
- datahub/cli/get_cli.py +0 -2
- datahub/cli/iceberg_cli.py +5 -0
- datahub/cli/ingest_cli.py +3 -15
- datahub/cli/migrate.py +2 -0
- datahub/cli/put_cli.py +1 -4
- datahub/cli/quickstart_versioning.py +50 -7
- datahub/cli/specific/assertions_cli.py +0 -4
- datahub/cli/specific/datacontract_cli.py +0 -3
- datahub/cli/specific/dataproduct_cli.py +0 -11
- datahub/cli/specific/dataset_cli.py +1 -8
- datahub/cli/specific/forms_cli.py +0 -4
- datahub/cli/specific/group_cli.py +0 -2
- datahub/cli/specific/structuredproperties_cli.py +1 -4
- datahub/cli/specific/user_cli.py +0 -2
- datahub/cli/state_cli.py +0 -2
- datahub/cli/timeline_cli.py +0 -2
- datahub/emitter/rest_emitter.py +70 -12
- datahub/entrypoints.py +4 -3
- datahub/ingestion/api/decorators.py +15 -3
- datahub/ingestion/api/report.py +332 -3
- datahub/ingestion/api/sink.py +3 -0
- datahub/ingestion/api/source.py +48 -44
- datahub/ingestion/autogenerated/__init__.py +0 -0
- datahub/ingestion/autogenerated/capability_summary.json +3449 -0
- datahub/ingestion/autogenerated/lineage.json +401 -0
- datahub/ingestion/autogenerated/lineage_helper.py +177 -0
- datahub/ingestion/extractor/schema_util.py +13 -4
- datahub/ingestion/glossary/classification_mixin.py +5 -0
- datahub/ingestion/graph/client.py +100 -15
- datahub/ingestion/graph/config.py +1 -0
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
- datahub/ingestion/run/pipeline.py +54 -2
- datahub/ingestion/sink/datahub_rest.py +13 -0
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/aws/aws_common.py +4 -0
- datahub/ingestion/source/aws/glue.py +489 -244
- datahub/ingestion/source/aws/tag_entities.py +292 -0
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
- datahub/ingestion/source/bigquery_v2/common.py +1 -1
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +3 -3
- datahub/ingestion/source/cassandra/cassandra.py +1 -1
- datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
- datahub/ingestion/source/common/subtypes.py +45 -0
- datahub/ingestion/source/data_lake_common/object_store.py +115 -27
- datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
- datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
- datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
- datahub/ingestion/source/dbt/dbt_common.py +6 -2
- datahub/ingestion/source/dbt/dbt_core.py +3 -0
- datahub/ingestion/source/debug/__init__.py +0 -0
- datahub/ingestion/source/debug/datahub_debug.py +300 -0
- datahub/ingestion/source/dremio/dremio_api.py +114 -73
- datahub/ingestion/source/dremio/dremio_config.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
- datahub/ingestion/source/dremio/dremio_source.py +94 -81
- datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
- datahub/ingestion/source/file.py +3 -0
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/gcs/gcs_source.py +13 -2
- datahub/ingestion/source/ge_data_profiler.py +76 -28
- datahub/ingestion/source/ge_profiling_config.py +11 -0
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/iceberg/iceberg.py +3 -1
- datahub/ingestion/source/identity/azure_ad.py +1 -1
- datahub/ingestion/source/identity/okta.py +1 -14
- datahub/ingestion/source/kafka/kafka.py +16 -0
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
- datahub/ingestion/source/looker/looker_source.py +1 -0
- datahub/ingestion/source/mlflow.py +11 -1
- datahub/ingestion/source/mock_data/__init__.py +0 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +472 -0
- datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
- datahub/ingestion/source/mock_data/table_naming_helper.py +91 -0
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -5
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
- datahub/ingestion/source/preset.py +2 -2
- datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
- datahub/ingestion/source/redshift/redshift.py +21 -1
- datahub/ingestion/source/redshift/usage.py +4 -3
- datahub/ingestion/source/s3/report.py +4 -2
- datahub/ingestion/source/s3/source.py +367 -115
- datahub/ingestion/source/sac/sac.py +3 -1
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/sigma/sigma.py +7 -1
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
- datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
- datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
- datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
- datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
- datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
- datahub/ingestion/source/sql/athena.py +119 -11
- datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
- datahub/ingestion/source/sql/clickhouse.py +3 -1
- datahub/ingestion/source/sql/cockroachdb.py +0 -1
- datahub/ingestion/source/sql/hana.py +3 -1
- datahub/ingestion/source/sql/hive_metastore.py +3 -11
- datahub/ingestion/source/sql/mariadb.py +0 -1
- datahub/ingestion/source/sql/mssql/source.py +239 -34
- datahub/ingestion/source/sql/mysql.py +0 -1
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/sql/postgres.py +0 -1
- datahub/ingestion/source/sql/sql_common.py +121 -34
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +997 -235
- datahub/ingestion/source/sql/vertica.py +10 -6
- datahub/ingestion/source/sql_queries.py +2 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
- datahub/ingestion/source/superset.py +58 -3
- datahub/ingestion/source/tableau/tableau.py +58 -37
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/ingestion/source/tableau/tableau_constant.py +0 -4
- datahub/ingestion/source/unity/config.py +5 -0
- datahub/ingestion/source/unity/proxy.py +118 -0
- datahub/ingestion/source/unity/source.py +195 -17
- datahub/ingestion/source/unity/tag_entities.py +295 -0
- datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
- datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/integrations/assertion/snowflake/compiler.py +4 -3
- datahub/metadata/_internal_schema_classes.py +1433 -546
- datahub/metadata/_urns/urn_defs.py +1826 -1658
- datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +17736 -17112
- datahub/metadata/schemas/ApplicationKey.avsc +31 -0
- datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
- datahub/metadata/schemas/Applications.avsc +38 -0
- datahub/metadata/schemas/ChartKey.avsc +1 -0
- datahub/metadata/schemas/ContainerKey.avsc +1 -0
- datahub/metadata/schemas/ContainerProperties.avsc +8 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
- datahub/metadata/schemas/DashboardKey.avsc +1 -0
- datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
- datahub/metadata/schemas/DataFlowKey.avsc +1 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- datahub/metadata/schemas/DataJobInfo.avsc +8 -0
- datahub/metadata/schemas/DataJobKey.avsc +1 -0
- datahub/metadata/schemas/DataProcessKey.avsc +8 -0
- datahub/metadata/schemas/DataProductKey.avsc +1 -0
- datahub/metadata/schemas/DataProductProperties.avsc +1 -1
- datahub/metadata/schemas/DatasetKey.avsc +11 -1
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
- datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
- datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
- datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
- datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
- datahub/metadata/schemas/MLModelKey.avsc +9 -0
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
- datahub/metadata/schemas/NotebookKey.avsc +1 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/__init__.py +3 -3
- datahub/sdk/__init__.py +2 -0
- datahub/sdk/_all_entities.py +7 -0
- datahub/sdk/_shared.py +116 -0
- datahub/sdk/chart.py +315 -0
- datahub/sdk/container.py +7 -0
- datahub/sdk/dashboard.py +432 -0
- datahub/sdk/dataflow.py +7 -0
- datahub/sdk/datajob.py +45 -13
- datahub/sdk/dataset.py +8 -2
- datahub/sdk/entity_client.py +82 -2
- datahub/sdk/lineage_client.py +683 -82
- datahub/sdk/main_client.py +46 -16
- datahub/sdk/mlmodel.py +101 -38
- datahub/sdk/mlmodelgroup.py +7 -0
- datahub/sdk/search_client.py +4 -3
- datahub/specific/chart.py +1 -1
- datahub/specific/dataproduct.py +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
- datahub/sql_parsing/sqlglot_lineage.py +62 -13
- datahub/telemetry/telemetry.py +17 -11
- datahub/testing/sdk_v2_helpers.py +7 -1
- datahub/upgrade/upgrade.py +46 -13
- datahub/utilities/server_config_util.py +8 -0
- datahub/utilities/sqlalchemy_query_combiner.py +5 -2
- datahub/utilities/stats_collections.py +4 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import re
|
|
3
|
+
import time
|
|
3
4
|
from concurrent.futures import ThreadPoolExecutor
|
|
4
5
|
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
5
6
|
from urllib.parse import urljoin
|
|
6
7
|
|
|
8
|
+
from datahub.api.entities.external.external_entities import PlatformResourceRepository
|
|
9
|
+
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
7
10
|
from datahub.emitter.mce_builder import (
|
|
8
11
|
make_data_platform_urn,
|
|
9
12
|
make_dataplatform_instance_urn,
|
|
@@ -53,6 +56,7 @@ from datahub.ingestion.source.aws.s3_util import (
|
|
|
53
56
|
from datahub.ingestion.source.common.subtypes import (
|
|
54
57
|
DatasetContainerSubTypes,
|
|
55
58
|
DatasetSubTypes,
|
|
59
|
+
SourceCapabilityModifier,
|
|
56
60
|
)
|
|
57
61
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
58
62
|
StaleEntityRemovalHandler,
|
|
@@ -78,6 +82,7 @@ from datahub.ingestion.source.unity.proxy_types import (
|
|
|
78
82
|
Catalog,
|
|
79
83
|
Column,
|
|
80
84
|
CustomCatalogType,
|
|
85
|
+
HiveTableType,
|
|
81
86
|
Metastore,
|
|
82
87
|
Notebook,
|
|
83
88
|
NotebookId,
|
|
@@ -87,8 +92,17 @@ from datahub.ingestion.source.unity.proxy_types import (
|
|
|
87
92
|
TableReference,
|
|
88
93
|
)
|
|
89
94
|
from datahub.ingestion.source.unity.report import UnityCatalogReport
|
|
95
|
+
from datahub.ingestion.source.unity.tag_entities import (
|
|
96
|
+
UnityCatalogTagPlatformResource,
|
|
97
|
+
UnityCatalogTagPlatformResourceId,
|
|
98
|
+
)
|
|
90
99
|
from datahub.ingestion.source.unity.usage import UnityCatalogUsageExtractor
|
|
91
|
-
from datahub.metadata.com.linkedin.pegasus2avro.common import
|
|
100
|
+
from datahub.metadata.com.linkedin.pegasus2avro.common import (
|
|
101
|
+
GlobalTags,
|
|
102
|
+
MetadataAttribution,
|
|
103
|
+
Siblings,
|
|
104
|
+
TagAssociation,
|
|
105
|
+
)
|
|
92
106
|
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
|
|
93
107
|
DatasetLineageType,
|
|
94
108
|
FineGrainedLineage,
|
|
@@ -116,6 +130,7 @@ from datahub.metadata.schema_classes import (
|
|
|
116
130
|
UpstreamClass,
|
|
117
131
|
UpstreamLineageClass,
|
|
118
132
|
)
|
|
133
|
+
from datahub.metadata.urns import TagUrn
|
|
119
134
|
from datahub.sql_parsing.schema_resolver import SchemaResolver
|
|
120
135
|
from datahub.sql_parsing.sqlglot_lineage import (
|
|
121
136
|
SqlParsingResult,
|
|
@@ -138,16 +153,24 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
138
153
|
@capability(SourceCapability.USAGE_STATS, "Enabled by default")
|
|
139
154
|
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
|
|
140
155
|
@capability(SourceCapability.DOMAINS, "Supported via the `domain` config field")
|
|
141
|
-
@capability(
|
|
156
|
+
@capability(
|
|
157
|
+
SourceCapability.CONTAINERS,
|
|
158
|
+
"Enabled by default",
|
|
159
|
+
subtype_modifier=[
|
|
160
|
+
SourceCapabilityModifier.CATALOG,
|
|
161
|
+
SourceCapabilityModifier.SCHEMA,
|
|
162
|
+
],
|
|
163
|
+
)
|
|
142
164
|
@capability(SourceCapability.OWNERSHIP, "Supported via the `include_ownership` config")
|
|
143
165
|
@capability(
|
|
144
166
|
SourceCapability.DATA_PROFILING, "Supported via the `profiling.enabled` config"
|
|
145
167
|
)
|
|
146
168
|
@capability(
|
|
147
169
|
SourceCapability.DELETION_DETECTION,
|
|
148
|
-
"
|
|
170
|
+
"Enabled by default via stateful ingestion",
|
|
149
171
|
supported=True,
|
|
150
172
|
)
|
|
173
|
+
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
151
174
|
@support_status(SupportStatus.INCUBATING)
|
|
152
175
|
class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
153
176
|
"""
|
|
@@ -162,6 +185,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
162
185
|
platform: str = "databricks"
|
|
163
186
|
platform_instance_name: Optional[str]
|
|
164
187
|
sql_parser_schema_resolver: Optional[SchemaResolver] = None
|
|
188
|
+
platform_resource_repository: Optional[PlatformResourceRepository] = None
|
|
165
189
|
|
|
166
190
|
def get_report(self) -> UnityCatalogReport:
|
|
167
191
|
return self.report
|
|
@@ -211,6 +235,10 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
211
235
|
|
|
212
236
|
# Global map of tables, for profiling
|
|
213
237
|
self.tables: FileBackedDict[Table] = FileBackedDict()
|
|
238
|
+
if self.ctx.graph:
|
|
239
|
+
self.platform_resource_repository = PlatformResourceRepository(
|
|
240
|
+
self.ctx.graph
|
|
241
|
+
)
|
|
214
242
|
|
|
215
243
|
def init_hive_metastore_proxy(self):
|
|
216
244
|
self.hive_metastore_proxy: Optional[HiveMetastoreProxy] = None
|
|
@@ -506,13 +534,42 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
506
534
|
yield from self.add_table_to_dataset_container(dataset_urn, schema)
|
|
507
535
|
|
|
508
536
|
table_props = self._create_table_property_aspect(table)
|
|
537
|
+
tags = None
|
|
538
|
+
if not isinstance(table.table_type, HiveTableType) and self.config.include_tags:
|
|
539
|
+
try:
|
|
540
|
+
table_tags = self._get_table_tags(
|
|
541
|
+
table.ref.catalog, table.ref.schema, table.ref.table
|
|
542
|
+
)
|
|
543
|
+
if table_tags:
|
|
544
|
+
logger.debug(f"Table tags for {table.ref}: {table_tags}")
|
|
545
|
+
attribution = MetadataAttribution(
|
|
546
|
+
# source="unity-catalog",
|
|
547
|
+
actor="urn:li:corpuser:datahub",
|
|
548
|
+
time=int(time.time() * 1000),
|
|
549
|
+
)
|
|
550
|
+
tags = GlobalTags(
|
|
551
|
+
tags=[
|
|
552
|
+
TagAssociation(
|
|
553
|
+
tag=tag.to_datahub_tag_urn().urn(),
|
|
554
|
+
attribution=attribution,
|
|
555
|
+
)
|
|
556
|
+
for tag in table_tags
|
|
557
|
+
]
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
yield from self.gen_platform_resources(table_tags)
|
|
561
|
+
|
|
562
|
+
except Exception as e:
|
|
563
|
+
logger.exception(f"Error fetching table {table.ref} tags", exc_info=e)
|
|
509
564
|
|
|
510
565
|
view_props = None
|
|
511
566
|
if table.view_definition:
|
|
512
567
|
view_props = self._create_view_property_aspect(table)
|
|
513
568
|
|
|
514
569
|
sub_type = self._create_table_sub_type_aspect(table)
|
|
515
|
-
schema_metadata = self._create_schema_metadata_aspect(table)
|
|
570
|
+
schema_metadata, platform_resources = self._create_schema_metadata_aspect(table)
|
|
571
|
+
yield from platform_resources
|
|
572
|
+
|
|
516
573
|
domain = self._get_domain_aspect(dataset_name=table.ref.qualified_table_name)
|
|
517
574
|
ownership = self._create_table_ownership_aspect(table)
|
|
518
575
|
data_platform_instance = self._create_data_platform_instance_aspect()
|
|
@@ -585,6 +642,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
585
642
|
domain,
|
|
586
643
|
data_platform_instance,
|
|
587
644
|
lineage,
|
|
645
|
+
tags,
|
|
588
646
|
],
|
|
589
647
|
)
|
|
590
648
|
]
|
|
@@ -718,6 +776,14 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
718
776
|
|
|
719
777
|
def gen_schema_containers(self, schema: Schema) -> Iterable[MetadataWorkUnit]:
|
|
720
778
|
domain_urn = self._gen_domain_urn(f"{schema.catalog.name}.{schema.name}")
|
|
779
|
+
schema_tags = []
|
|
780
|
+
if self.config.include_tags:
|
|
781
|
+
schema_tags = self.unity_catalog_api_proxy.get_schema_tags(
|
|
782
|
+
schema.catalog.name
|
|
783
|
+
).get(f"{schema.catalog.name}.{schema.name}", [])
|
|
784
|
+
logger.debug(f"Schema tags for {schema.name}: {schema_tags}")
|
|
785
|
+
# Generate platform resources for schema tags
|
|
786
|
+
yield from self.gen_platform_resources(schema_tags)
|
|
721
787
|
|
|
722
788
|
schema_container_key = self.gen_schema_key(schema)
|
|
723
789
|
yield from gen_containers(
|
|
@@ -729,6 +795,9 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
729
795
|
description=schema.comment,
|
|
730
796
|
owner_urn=self.get_owner_urn(schema.owner),
|
|
731
797
|
external_url=f"{self.external_url_base}/{schema.catalog.name}/{schema.name}",
|
|
798
|
+
tags=[tag.to_datahub_tag_urn().name for tag in schema_tags]
|
|
799
|
+
if schema_tags
|
|
800
|
+
else None,
|
|
732
801
|
)
|
|
733
802
|
|
|
734
803
|
def gen_metastore_containers(
|
|
@@ -749,6 +818,14 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
749
818
|
|
|
750
819
|
def gen_catalog_containers(self, catalog: Catalog) -> Iterable[MetadataWorkUnit]:
|
|
751
820
|
domain_urn = self._gen_domain_urn(catalog.name)
|
|
821
|
+
catalog_tags = []
|
|
822
|
+
if self.config.include_tags:
|
|
823
|
+
catalog_tags = self.unity_catalog_api_proxy.get_catalog_tags(
|
|
824
|
+
catalog.name
|
|
825
|
+
).get(catalog.name, [])
|
|
826
|
+
logger.debug(f"Schema tags for {catalog.name}: {catalog_tags}")
|
|
827
|
+
# Generate platform resources for schema tags
|
|
828
|
+
yield from self.gen_platform_resources(catalog_tags)
|
|
752
829
|
|
|
753
830
|
catalog_container_key = self.gen_catalog_key(catalog)
|
|
754
831
|
yield from gen_containers(
|
|
@@ -764,6 +841,9 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
764
841
|
description=catalog.comment,
|
|
765
842
|
owner_urn=self.get_owner_urn(catalog.owner),
|
|
766
843
|
external_url=f"{self.external_url_base}/{catalog.name}",
|
|
844
|
+
tags=[tag.to_datahub_tag_urn().name for tag in catalog_tags]
|
|
845
|
+
if catalog_tags
|
|
846
|
+
else None,
|
|
767
847
|
)
|
|
768
848
|
|
|
769
849
|
def gen_schema_key(self, schema: Schema) -> ContainerKey:
|
|
@@ -832,6 +912,30 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
832
912
|
dataset_urn=dataset_urn,
|
|
833
913
|
)
|
|
834
914
|
|
|
915
|
+
def _get_catalog_tags(
|
|
916
|
+
self, catalog: str, schema: str, table: str
|
|
917
|
+
) -> List[UnityCatalogTag]:
|
|
918
|
+
all_tags = self.unity_catalog_api_proxy.get_catalog_tags(catalog)
|
|
919
|
+
return all_tags.get(f"{catalog}", [])
|
|
920
|
+
|
|
921
|
+
def _get_schema_tags(
|
|
922
|
+
self, catalog: str, schema: str, table: str
|
|
923
|
+
) -> List[UnityCatalogTag]:
|
|
924
|
+
all_tags = self.unity_catalog_api_proxy.get_schema_tags(catalog)
|
|
925
|
+
return all_tags.get(f"{catalog}.{schema}", [])
|
|
926
|
+
|
|
927
|
+
def _get_table_tags(
|
|
928
|
+
self, catalog: str, schema: str, table: str
|
|
929
|
+
) -> List[UnityCatalogTag]:
|
|
930
|
+
all_tags = self.unity_catalog_api_proxy.get_table_tags(catalog)
|
|
931
|
+
return all_tags.get(f"{catalog}.{schema}.{table}", [])
|
|
932
|
+
|
|
933
|
+
def _get_column_tags(
|
|
934
|
+
self, catalog: str, schema: str, table: str, column: str
|
|
935
|
+
) -> List[UnityCatalogTag]:
|
|
936
|
+
all_tags = self.unity_catalog_api_proxy.get_column_tags(catalog)
|
|
937
|
+
return all_tags.get(f"{catalog}.{schema}.{table}.{column}", [])
|
|
938
|
+
|
|
835
939
|
def _create_table_property_aspect(self, table: Table) -> DatasetPropertiesClass:
|
|
836
940
|
custom_properties: dict = {}
|
|
837
941
|
if table.storage_location is not None:
|
|
@@ -921,30 +1025,103 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
921
1025
|
materialized=False, viewLanguage="SQL", viewLogic=table.view_definition
|
|
922
1026
|
)
|
|
923
1027
|
|
|
924
|
-
def
|
|
925
|
-
|
|
1028
|
+
def gen_platform_resources(
|
|
1029
|
+
self, tags: List[UnityCatalogTag]
|
|
1030
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
1031
|
+
if self.ctx.graph and self.platform_resource_repository:
|
|
1032
|
+
for tag in tags:
|
|
1033
|
+
try:
|
|
1034
|
+
platform_resource_id = UnityCatalogTagPlatformResourceId.from_tag(
|
|
1035
|
+
platform_instance=self.platform_instance_name,
|
|
1036
|
+
platform_resource_repository=self.platform_resource_repository,
|
|
1037
|
+
tag=tag,
|
|
1038
|
+
)
|
|
1039
|
+
logger.debug(f"Created platform resource {platform_resource_id}")
|
|
926
1040
|
|
|
1041
|
+
unity_catalog_tag = (
|
|
1042
|
+
UnityCatalogTagPlatformResource.get_from_datahub(
|
|
1043
|
+
platform_resource_id,
|
|
1044
|
+
self.platform_resource_repository,
|
|
1045
|
+
False,
|
|
1046
|
+
)
|
|
1047
|
+
)
|
|
1048
|
+
if (
|
|
1049
|
+
tag.to_datahub_tag_urn().urn()
|
|
1050
|
+
not in unity_catalog_tag.datahub_linked_resources().urns
|
|
1051
|
+
):
|
|
1052
|
+
unity_catalog_tag.datahub_linked_resources().add(
|
|
1053
|
+
tag.to_datahub_tag_urn().urn()
|
|
1054
|
+
)
|
|
1055
|
+
platform_resource = unity_catalog_tag.as_platform_resource()
|
|
1056
|
+
for mcp in platform_resource.to_mcps():
|
|
1057
|
+
yield MetadataWorkUnit(
|
|
1058
|
+
id=f"platform_resource-{platform_resource.id}",
|
|
1059
|
+
mcp=mcp,
|
|
1060
|
+
)
|
|
1061
|
+
except Exception as e:
|
|
1062
|
+
logger.exception(
|
|
1063
|
+
f"Error processing platform resource for tag {tag}"
|
|
1064
|
+
)
|
|
1065
|
+
self.report.report_warning(
|
|
1066
|
+
message="Error processing platform resource for tag",
|
|
1067
|
+
context=str(tag),
|
|
1068
|
+
title="Error processing platform resource for tag",
|
|
1069
|
+
exc=e,
|
|
1070
|
+
)
|
|
1071
|
+
continue
|
|
1072
|
+
|
|
1073
|
+
def _create_schema_metadata_aspect(
|
|
1074
|
+
self, table: Table
|
|
1075
|
+
) -> Tuple[SchemaMetadataClass, Iterable[MetadataWorkUnit]]:
|
|
1076
|
+
schema_fields: List[SchemaFieldClass] = []
|
|
1077
|
+
unique_tags: Set[UnityCatalogTag] = set()
|
|
927
1078
|
for column in table.columns:
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
1079
|
+
tag_urns: Optional[List[TagUrn]] = None
|
|
1080
|
+
if self.config.include_tags:
|
|
1081
|
+
column_tags = self._get_column_tags(
|
|
1082
|
+
table.ref.catalog, table.ref.schema, table.ref.table, column.name
|
|
1083
|
+
)
|
|
1084
|
+
unique_tags.update(column_tags)
|
|
1085
|
+
tag_urns = [tag.to_datahub_tag_urn() for tag in column_tags]
|
|
1086
|
+
schema_fields.extend(self._create_schema_field(column, tag_urns))
|
|
1087
|
+
|
|
1088
|
+
platform_resources = self.gen_platform_resources(list(unique_tags))
|
|
1089
|
+
return (
|
|
1090
|
+
SchemaMetadataClass(
|
|
1091
|
+
schemaName=table.id,
|
|
1092
|
+
platform=make_data_platform_urn(self.platform),
|
|
1093
|
+
fields=schema_fields,
|
|
1094
|
+
hash="",
|
|
1095
|
+
version=0,
|
|
1096
|
+
platformSchema=MySqlDDLClass(tableSchema=""),
|
|
1097
|
+
),
|
|
1098
|
+
platform_resources,
|
|
937
1099
|
)
|
|
938
1100
|
|
|
939
1101
|
@staticmethod
|
|
940
|
-
def _create_schema_field(
|
|
1102
|
+
def _create_schema_field(
|
|
1103
|
+
column: Column, tags: Optional[List[TagUrn]]
|
|
1104
|
+
) -> List[SchemaFieldClass]:
|
|
941
1105
|
_COMPLEX_TYPE = re.compile("^(struct|array)")
|
|
942
|
-
|
|
1106
|
+
global_tags: Optional[GlobalTags] = None
|
|
943
1107
|
if _COMPLEX_TYPE.match(column.type_text.lower()):
|
|
944
1108
|
return get_schema_fields_for_hive_column(
|
|
945
1109
|
column.name, column.type_text.lower(), description=column.comment
|
|
946
1110
|
)
|
|
947
1111
|
else:
|
|
1112
|
+
if tags is not None:
|
|
1113
|
+
attribution = MetadataAttribution(
|
|
1114
|
+
source="urn:li:dataPlatform:unity-catalog",
|
|
1115
|
+
actor="urn:li:corpuser:datahub",
|
|
1116
|
+
time=int(time.time() * 1000),
|
|
1117
|
+
)
|
|
1118
|
+
global_tags = GlobalTags(
|
|
1119
|
+
tags=[
|
|
1120
|
+
TagAssociation(tag=tag.urn(), attribution=attribution)
|
|
1121
|
+
for tag in tags
|
|
1122
|
+
]
|
|
1123
|
+
)
|
|
1124
|
+
|
|
948
1125
|
return [
|
|
949
1126
|
SchemaFieldClass(
|
|
950
1127
|
fieldPath=column.name,
|
|
@@ -954,6 +1131,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
954
1131
|
nativeDataType=column.type_text,
|
|
955
1132
|
nullable=column.nullable,
|
|
956
1133
|
description=column.comment,
|
|
1134
|
+
globalTags=global_tags if tags else None,
|
|
957
1135
|
)
|
|
958
1136
|
]
|
|
959
1137
|
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from datahub.api.entities.external.external_entities import (
|
|
7
|
+
ExternalEntity,
|
|
8
|
+
ExternalEntityId,
|
|
9
|
+
LinkedResourceSet,
|
|
10
|
+
PlatformResourceRepository,
|
|
11
|
+
)
|
|
12
|
+
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
13
|
+
from datahub.api.entities.platformresource.platform_resource import (
|
|
14
|
+
PlatformResource,
|
|
15
|
+
PlatformResourceKey,
|
|
16
|
+
PlatformResourceSearchFields,
|
|
17
|
+
)
|
|
18
|
+
from datahub.ingestion.graph.client import DataHubGraph
|
|
19
|
+
from datahub.metadata.urns import TagUrn
|
|
20
|
+
from datahub.utilities.search_utils import ElasticDocumentQuery
|
|
21
|
+
from datahub.utilities.urns.urn import Urn
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class UnityCatalogTagSyncContext(BaseModel):
|
|
25
|
+
# it is intentionally empty
|
|
26
|
+
platform_instance: Optional[str] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
33
|
+
"""
|
|
34
|
+
A SnowflakeTagId is a unique identifier for a Snowflake tag.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
tag_key: str
|
|
38
|
+
tag_value: Optional[str] = None
|
|
39
|
+
platform_instance: Optional[str]
|
|
40
|
+
exists_in_unity_catalog: bool = False
|
|
41
|
+
persisted: bool = False
|
|
42
|
+
|
|
43
|
+
def __hash__(self) -> int:
|
|
44
|
+
return hash(self.to_platform_resource_key().id)
|
|
45
|
+
|
|
46
|
+
# this is a hack to make sure the property is a string and not private pydantic field
|
|
47
|
+
@staticmethod
|
|
48
|
+
def _RESOURCE_TYPE() -> str:
|
|
49
|
+
return "UnityCatalogTagPlatformResource"
|
|
50
|
+
|
|
51
|
+
def to_platform_resource_key(self) -> PlatformResourceKey:
|
|
52
|
+
return PlatformResourceKey(
|
|
53
|
+
platform="databricks",
|
|
54
|
+
resource_type=str(UnityCatalogTagPlatformResourceId._RESOURCE_TYPE()),
|
|
55
|
+
primary_key=f"{self.tag_key}:{self.tag_value}",
|
|
56
|
+
platform_instance=self.platform_instance,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_tag(
|
|
61
|
+
cls,
|
|
62
|
+
tag: UnityCatalogTag,
|
|
63
|
+
platform_instance: Optional[str],
|
|
64
|
+
platform_resource_repository: PlatformResourceRepository,
|
|
65
|
+
exists_in_unity_catalog: bool = False,
|
|
66
|
+
) -> "UnityCatalogTagPlatformResourceId":
|
|
67
|
+
"""
|
|
68
|
+
Creates a UnityCatalogTagPlatformResourceId from a UnityCatalogTag.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
existing_platform_resource = cls.search_by_urn(
|
|
72
|
+
tag.to_datahub_tag_urn().urn(),
|
|
73
|
+
platform_resource_repository=platform_resource_repository,
|
|
74
|
+
tag_sync_context=UnityCatalogTagSyncContext(
|
|
75
|
+
platform_instance=platform_instance
|
|
76
|
+
),
|
|
77
|
+
)
|
|
78
|
+
if existing_platform_resource:
|
|
79
|
+
logger.info(
|
|
80
|
+
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.original}: {existing_platform_resource}"
|
|
81
|
+
)
|
|
82
|
+
return existing_platform_resource
|
|
83
|
+
|
|
84
|
+
return UnityCatalogTagPlatformResourceId(
|
|
85
|
+
tag_key=tag.key.original,
|
|
86
|
+
tag_value=tag.value.original if tag.value is not None else None,
|
|
87
|
+
platform_instance=platform_instance,
|
|
88
|
+
exists_in_unity_catalog=exists_in_unity_catalog,
|
|
89
|
+
persisted=False,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def search_by_urn(
|
|
94
|
+
cls,
|
|
95
|
+
urn: str,
|
|
96
|
+
platform_resource_repository: PlatformResourceRepository,
|
|
97
|
+
tag_sync_context: UnityCatalogTagSyncContext,
|
|
98
|
+
) -> Optional["UnityCatalogTagPlatformResourceId"]:
|
|
99
|
+
mapped_tags = [
|
|
100
|
+
t
|
|
101
|
+
for t in platform_resource_repository.search_by_filter(
|
|
102
|
+
ElasticDocumentQuery.create_from(
|
|
103
|
+
(
|
|
104
|
+
PlatformResourceSearchFields.RESOURCE_TYPE,
|
|
105
|
+
str(UnityCatalogTagPlatformResourceId._RESOURCE_TYPE()),
|
|
106
|
+
),
|
|
107
|
+
(PlatformResourceSearchFields.SECONDARY_KEYS, urn),
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
]
|
|
111
|
+
logger.info(
|
|
112
|
+
f"Found {len(mapped_tags)} mapped tags for URN {urn}. {mapped_tags}"
|
|
113
|
+
)
|
|
114
|
+
if len(mapped_tags) > 0:
|
|
115
|
+
for platform_resource in mapped_tags:
|
|
116
|
+
if (
|
|
117
|
+
platform_resource.resource_info
|
|
118
|
+
and platform_resource.resource_info.value
|
|
119
|
+
):
|
|
120
|
+
unity_catalog_tag = UnityCatalogTagPlatformResource(
|
|
121
|
+
**platform_resource.resource_info.value.as_pydantic_object(
|
|
122
|
+
UnityCatalogTagPlatformResource
|
|
123
|
+
).dict()
|
|
124
|
+
)
|
|
125
|
+
if (
|
|
126
|
+
unity_catalog_tag.id.platform_instance
|
|
127
|
+
== tag_sync_context.platform_instance
|
|
128
|
+
):
|
|
129
|
+
unity_catalog_tag_id = unity_catalog_tag.id
|
|
130
|
+
unity_catalog_tag_id.exists_in_unity_catalog = True
|
|
131
|
+
unity_catalog_tag_id.persisted = True
|
|
132
|
+
return unity_catalog_tag_id
|
|
133
|
+
else:
|
|
134
|
+
logger.warning(
|
|
135
|
+
f"Platform resource {platform_resource} does not have a resource_info value"
|
|
136
|
+
)
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# If we reach here, it means we did not find a mapped tag for the URN
|
|
140
|
+
logger.info(
|
|
141
|
+
f"No mapped tag found for URN {urn} with platform instance {tag_sync_context.platform_instance}. Creating a new UnityCatalogTagPlatformResourceId."
|
|
142
|
+
)
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def from_datahub_urn(
|
|
147
|
+
cls,
|
|
148
|
+
urn: str,
|
|
149
|
+
platform_resource_repository: PlatformResourceRepository,
|
|
150
|
+
tag_sync_context: UnityCatalogTagSyncContext,
|
|
151
|
+
graph: DataHubGraph,
|
|
152
|
+
) -> "UnityCatalogTagPlatformResourceId":
|
|
153
|
+
"""
|
|
154
|
+
Creates a UnityCatalogTagPlatformResourceId from a DataHub URN.
|
|
155
|
+
"""
|
|
156
|
+
# First we check if we already have a mapped platform resource for this
|
|
157
|
+
# urn that is of the type UnityCatalogTagPlatformResource
|
|
158
|
+
# If we do, we can use it to create the UnityCatalogTagPlatformResourceId
|
|
159
|
+
# Else, we need to generate a new UnityCatalogTagPlatformResourceId
|
|
160
|
+
existing_platform_resource_id = cls.search_by_urn(
|
|
161
|
+
urn, platform_resource_repository, tag_sync_context
|
|
162
|
+
)
|
|
163
|
+
if existing_platform_resource_id:
|
|
164
|
+
logger.info(
|
|
165
|
+
f"Found existing UnityCatalogTagPlatformResourceId for URN {urn}: {existing_platform_resource_id}"
|
|
166
|
+
)
|
|
167
|
+
return existing_platform_resource_id
|
|
168
|
+
|
|
169
|
+
# Otherwise, we need to create a new UnityCatalogTagPlatformResourceId
|
|
170
|
+
new_unity_catalog_tag_id = cls.generate_tag_id(graph, tag_sync_context, urn)
|
|
171
|
+
if new_unity_catalog_tag_id:
|
|
172
|
+
# we then check if this tag has already been ingested as a platform
|
|
173
|
+
# resource in the platform resource repository
|
|
174
|
+
resource_key = platform_resource_repository.get(
|
|
175
|
+
new_unity_catalog_tag_id.to_platform_resource_key()
|
|
176
|
+
)
|
|
177
|
+
if resource_key:
|
|
178
|
+
logger.info(
|
|
179
|
+
f"Tag {new_unity_catalog_tag_id} already exists in platform resource repository with {resource_key}"
|
|
180
|
+
)
|
|
181
|
+
new_unity_catalog_tag_id.exists_in_unity_catalog = (
|
|
182
|
+
True # TODO: Check if this is a safe assumption
|
|
183
|
+
)
|
|
184
|
+
return new_unity_catalog_tag_id
|
|
185
|
+
raise ValueError(f"Unable to create SnowflakeTagId from DataHub URN: {urn}")
|
|
186
|
+
|
|
187
|
+
@classmethod
|
|
188
|
+
def generate_tag_id(
|
|
189
|
+
cls, graph: DataHubGraph, tag_sync_context: UnityCatalogTagSyncContext, urn: str
|
|
190
|
+
) -> "UnityCatalogTagPlatformResourceId":
|
|
191
|
+
parsed_urn = Urn.from_string(urn)
|
|
192
|
+
entity_type = parsed_urn.entity_type
|
|
193
|
+
if entity_type == "tag":
|
|
194
|
+
new_unity_catalog_tag_id = (
|
|
195
|
+
UnityCatalogTagPlatformResourceId.from_datahub_tag(
|
|
196
|
+
TagUrn.from_string(urn), tag_sync_context
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
else:
|
|
200
|
+
raise ValueError(f"Unsupported entity type {entity_type} for URN {urn}")
|
|
201
|
+
return new_unity_catalog_tag_id
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def from_datahub_tag(
|
|
205
|
+
cls, tag_urn: TagUrn, tag_sync_context: UnityCatalogTagSyncContext
|
|
206
|
+
) -> "UnityCatalogTagPlatformResourceId":
|
|
207
|
+
uc_tag = UnityCatalogTag.from_urn(tag_urn)
|
|
208
|
+
|
|
209
|
+
return UnityCatalogTagPlatformResourceId(
|
|
210
|
+
tag_key=str(uc_tag.key),
|
|
211
|
+
tag_value=str(uc_tag.value) if uc_tag.value is not None else None,
|
|
212
|
+
platform_instance=tag_sync_context.platform_instance,
|
|
213
|
+
exists_in_unity_catalog=False,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class UnityCatalogTagPlatformResource(BaseModel, ExternalEntity):
|
|
218
|
+
datahub_urns: LinkedResourceSet
|
|
219
|
+
managed_by_datahub: bool
|
|
220
|
+
id: UnityCatalogTagPlatformResourceId
|
|
221
|
+
allowed_values: Optional[List[str]]
|
|
222
|
+
|
|
223
|
+
def get_id(self) -> ExternalEntityId:
|
|
224
|
+
return self.id
|
|
225
|
+
|
|
226
|
+
def is_managed_by_datahub(self) -> bool:
|
|
227
|
+
return self.managed_by_datahub
|
|
228
|
+
|
|
229
|
+
def datahub_linked_resources(self) -> LinkedResourceSet:
|
|
230
|
+
return self.datahub_urns
|
|
231
|
+
|
|
232
|
+
def as_platform_resource(self) -> PlatformResource:
|
|
233
|
+
return PlatformResource.create(
|
|
234
|
+
key=self.id.to_platform_resource_key(),
|
|
235
|
+
secondary_keys=[u for u in self.datahub_urns.urns],
|
|
236
|
+
value=self,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
@classmethod
|
|
240
|
+
def get_from_datahub(
|
|
241
|
+
cls,
|
|
242
|
+
unity_catalog_tag_id: UnityCatalogTagPlatformResourceId,
|
|
243
|
+
platform_resource_repository: PlatformResourceRepository,
|
|
244
|
+
managed_by_datahub: bool = False,
|
|
245
|
+
) -> "UnityCatalogTagPlatformResource":
|
|
246
|
+
# Search for linked DataHub URNs
|
|
247
|
+
platform_resources = [
|
|
248
|
+
r
|
|
249
|
+
for r in platform_resource_repository.search_by_filter(
|
|
250
|
+
ElasticDocumentQuery.create_from(
|
|
251
|
+
(
|
|
252
|
+
PlatformResourceSearchFields.RESOURCE_TYPE,
|
|
253
|
+
str(UnityCatalogTagPlatformResourceId._RESOURCE_TYPE()),
|
|
254
|
+
),
|
|
255
|
+
(
|
|
256
|
+
PlatformResourceSearchFields.PRIMARY_KEY,
|
|
257
|
+
f"{unity_catalog_tag_id.tag_key}/{unity_catalog_tag_id.tag_value}",
|
|
258
|
+
),
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
]
|
|
262
|
+
if len(platform_resources) == 1:
|
|
263
|
+
platform_resource: PlatformResource = platform_resources[0]
|
|
264
|
+
if (
|
|
265
|
+
platform_resource.resource_info
|
|
266
|
+
and platform_resource.resource_info.value
|
|
267
|
+
):
|
|
268
|
+
unity_catalog_tag = UnityCatalogTagPlatformResource(
|
|
269
|
+
**platform_resource.resource_info.value.as_pydantic_object(
|
|
270
|
+
UnityCatalogTagPlatformResource
|
|
271
|
+
).dict()
|
|
272
|
+
)
|
|
273
|
+
return unity_catalog_tag
|
|
274
|
+
else:
|
|
275
|
+
for platform_resource in platform_resources:
|
|
276
|
+
if (
|
|
277
|
+
platform_resource.resource_info
|
|
278
|
+
and platform_resource.resource_info.value
|
|
279
|
+
):
|
|
280
|
+
unity_catalog_tag = UnityCatalogTagPlatformResource(
|
|
281
|
+
**platform_resource.resource_info.value.as_pydantic_object(
|
|
282
|
+
UnityCatalogTagPlatformResource
|
|
283
|
+
).dict()
|
|
284
|
+
)
|
|
285
|
+
if (
|
|
286
|
+
unity_catalog_tag.id.platform_instance
|
|
287
|
+
== unity_catalog_tag_id.platform_instance
|
|
288
|
+
):
|
|
289
|
+
return unity_catalog_tag
|
|
290
|
+
return cls(
|
|
291
|
+
id=unity_catalog_tag_id,
|
|
292
|
+
datahub_urns=LinkedResourceSet(urns=[]),
|
|
293
|
+
managed_by_datahub=managed_by_datahub,
|
|
294
|
+
allowed_values=None,
|
|
295
|
+
)
|
|
@@ -85,8 +85,11 @@ class ClickHouseUsageConfig(ClickHouseConfig, BaseUsageConfig, EnvConfigMixin):
|
|
|
85
85
|
@platform_name("ClickHouse")
|
|
86
86
|
@config_class(ClickHouseUsageConfig)
|
|
87
87
|
@support_status(SupportStatus.CERTIFIED)
|
|
88
|
-
@capability(
|
|
88
|
+
@capability(
|
|
89
|
+
SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
|
|
90
|
+
)
|
|
89
91
|
@capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
|
|
92
|
+
@capability(SourceCapability.USAGE_STATS, "Enabled by default to get usage stats")
|
|
90
93
|
@dataclasses.dataclass
|
|
91
94
|
class ClickHouseUsageSource(Source):
|
|
92
95
|
"""
|
|
@@ -15,7 +15,9 @@ from sqlalchemy.engine import Engine
|
|
|
15
15
|
import datahub.emitter.mce_builder as builder
|
|
16
16
|
from datahub.configuration.time_window_config import get_time_bucket
|
|
17
17
|
from datahub.ingestion.api.decorators import (
|
|
18
|
+
SourceCapability,
|
|
18
19
|
SupportStatus,
|
|
20
|
+
capability,
|
|
19
21
|
config_class,
|
|
20
22
|
platform_name,
|
|
21
23
|
support_status,
|
|
@@ -112,6 +114,7 @@ class TrinoUsageReport(SourceReport):
|
|
|
112
114
|
@platform_name("Trino")
|
|
113
115
|
@config_class(TrinoUsageConfig)
|
|
114
116
|
@support_status(SupportStatus.CERTIFIED)
|
|
117
|
+
@capability(SourceCapability.USAGE_STATS, "Enabled by default to get usage stats")
|
|
115
118
|
@dataclasses.dataclass
|
|
116
119
|
class TrinoUsageSource(Source):
|
|
117
120
|
"""
|