acryl-datahub 1.2.0.4rc4__py3-none-any.whl → 1.2.0.5rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/METADATA +2410 -2410
- {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/RECORD +38 -36
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +1 -1
- datahub/api/entities/external/external_entities.py +500 -15
- datahub/ingestion/source/aws/glue.py +18 -14
- datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
- datahub/ingestion/source/aws/tag_entities.py +82 -104
- datahub/ingestion/source/common/subtypes.py +1 -0
- datahub/ingestion/source/hex/api.py +2 -0
- datahub/ingestion/source/hex/mapper.py +16 -2
- datahub/ingestion/source/hex/model.py +2 -0
- datahub/ingestion/source/looker/looker_common.py +26 -0
- datahub/ingestion/source/snowflake/constants.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_query.py +50 -5
- datahub/ingestion/source/snowflake/snowflake_schema.py +173 -9
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +25 -2
- datahub/ingestion/source/snowflake/snowflake_utils.py +16 -3
- datahub/ingestion/source/snowflake/snowflake_v2.py +3 -1
- datahub/ingestion/source/sql/mssql/source.py +2 -25
- datahub/ingestion/source/sql/mysql.py +54 -0
- datahub/ingestion/source/sql/postgres.py +5 -134
- datahub/ingestion/source/sql/sql_common.py +137 -0
- datahub/ingestion/source/superset.py +140 -56
- datahub/ingestion/source/unity/config.py +11 -0
- datahub/ingestion/source/unity/connection_test.py +1 -0
- datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
- datahub/ingestion/source/unity/proxy.py +20 -6
- datahub/ingestion/source/unity/report.py +9 -1
- datahub/ingestion/source/unity/source.py +51 -16
- datahub/ingestion/source/unity/tag_entities.py +49 -147
- datahub/metadata/_internal_schema_classes.py +1 -1
- datahub/metadata/schema.avsc +4 -2
- datahub/metadata/schemas/Operation.avsc +4 -2
- {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.4rc4.dist-info → acryl_datahub-1.2.0.5rc1.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,6 @@ import time
|
|
|
4
4
|
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
5
5
|
from urllib.parse import urljoin
|
|
6
6
|
|
|
7
|
-
from datahub.api.entities.external.external_entities import PlatformResourceRepository
|
|
8
7
|
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
9
8
|
from datahub.emitter.mce_builder import (
|
|
10
9
|
UNKNOWN_USER,
|
|
@@ -77,6 +76,9 @@ from datahub.ingestion.source.unity.hive_metastore_proxy import (
|
|
|
77
76
|
HIVE_METASTORE,
|
|
78
77
|
HiveMetastoreProxy,
|
|
79
78
|
)
|
|
79
|
+
from datahub.ingestion.source.unity.platform_resource_repository import (
|
|
80
|
+
UnityCatalogPlatformResourceRepository,
|
|
81
|
+
)
|
|
80
82
|
from datahub.ingestion.source.unity.proxy import UnityCatalogApiProxy
|
|
81
83
|
from datahub.ingestion.source.unity.proxy_types import (
|
|
82
84
|
DATA_TYPE_REGISTRY,
|
|
@@ -187,7 +189,9 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
187
189
|
platform: str = "databricks"
|
|
188
190
|
platform_instance_name: Optional[str]
|
|
189
191
|
sql_parser_schema_resolver: Optional[SchemaResolver] = None
|
|
190
|
-
platform_resource_repository: Optional[
|
|
192
|
+
platform_resource_repository: Optional[UnityCatalogPlatformResourceRepository] = (
|
|
193
|
+
None
|
|
194
|
+
)
|
|
191
195
|
|
|
192
196
|
def get_report(self) -> UnityCatalogReport:
|
|
193
197
|
return self.report
|
|
@@ -207,6 +211,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
207
211
|
report=self.report,
|
|
208
212
|
hive_metastore_proxy=self.hive_metastore_proxy,
|
|
209
213
|
lineage_data_source=config.lineage_data_source,
|
|
214
|
+
databricks_api_page_size=config.databricks_api_page_size,
|
|
210
215
|
)
|
|
211
216
|
|
|
212
217
|
self.external_url_base = urljoin(self.config.workspace_url, "/explore/data")
|
|
@@ -239,9 +244,15 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
239
244
|
# Global map of tables, for profiling
|
|
240
245
|
self.tables: FileBackedDict[Table] = FileBackedDict()
|
|
241
246
|
if self.ctx.graph:
|
|
242
|
-
self.platform_resource_repository =
|
|
243
|
-
self.ctx.graph
|
|
247
|
+
self.platform_resource_repository = UnityCatalogPlatformResourceRepository(
|
|
248
|
+
self.ctx.graph, platform_instance=self.platform_instance_name
|
|
244
249
|
)
|
|
250
|
+
else:
|
|
251
|
+
self.platform_resource_repository = None
|
|
252
|
+
|
|
253
|
+
# Include platform resource repository in report for automatic cache statistics
|
|
254
|
+
if self.config.include_tags and self.platform_resource_repository:
|
|
255
|
+
self.report.tag_urn_resolver_cache = self.platform_resource_repository
|
|
245
256
|
|
|
246
257
|
def init_hive_metastore_proxy(self):
|
|
247
258
|
self.hive_metastore_proxy: Optional[HiveMetastoreProxy] = None
|
|
@@ -1079,25 +1090,49 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1079
1090
|
materialized=False, viewLanguage="SQL", viewLogic=table.view_definition
|
|
1080
1091
|
)
|
|
1081
1092
|
|
|
1093
|
+
def get_or_create_from_unity_tag(
|
|
1094
|
+
self,
|
|
1095
|
+
unity_tag: UnityCatalogTag,
|
|
1096
|
+
platform_instance: Optional[str],
|
|
1097
|
+
managed_by_datahub: bool = False,
|
|
1098
|
+
) -> UnityCatalogTagPlatformResource:
|
|
1099
|
+
"""
|
|
1100
|
+
Optimized helper to get or create a Unity Catalog tag platform resource.
|
|
1101
|
+
This eliminates the duplicate search by skipping the from_tag method which was
|
|
1102
|
+
doing a redundant search before get_from_datahub.
|
|
1103
|
+
"""
|
|
1104
|
+
# Create the platform resource ID directly without the from_tag search
|
|
1105
|
+
platform_resource_id = UnityCatalogTagPlatformResourceId(
|
|
1106
|
+
tag_key=unity_tag.key.raw_text,
|
|
1107
|
+
tag_value=unity_tag.value.raw_text if unity_tag.value is not None else None,
|
|
1108
|
+
platform_instance=platform_instance,
|
|
1109
|
+
exists_in_unity_catalog=True, # We got it from Unity Catalog
|
|
1110
|
+
persisted=False,
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
# Use the repository's get_entity_from_datahub method which handles
|
|
1114
|
+
# searching and caching internally - this is the ONLY search we need
|
|
1115
|
+
if self.platform_resource_repository is None:
|
|
1116
|
+
raise ValueError("Platform resource repository not initialized")
|
|
1117
|
+
return self.platform_resource_repository.get_entity_from_datahub(
|
|
1118
|
+
platform_resource_id,
|
|
1119
|
+
managed_by_datahub,
|
|
1120
|
+
)
|
|
1121
|
+
|
|
1082
1122
|
def gen_platform_resources(
|
|
1083
1123
|
self, tags: List[UnityCatalogTag]
|
|
1084
1124
|
) -> Iterable[MetadataWorkUnit]:
|
|
1085
1125
|
if self.ctx.graph and self.platform_resource_repository:
|
|
1086
1126
|
for tag in tags:
|
|
1087
1127
|
try:
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1128
|
+
# Use optimized helper method that combines ID creation and entity retrieval
|
|
1129
|
+
unity_catalog_tag = self.get_or_create_from_unity_tag(
|
|
1130
|
+
tag,
|
|
1131
|
+
self.platform_instance_name,
|
|
1132
|
+
managed_by_datahub=False,
|
|
1092
1133
|
)
|
|
1093
|
-
logger.debug(
|
|
1094
|
-
|
|
1095
|
-
unity_catalog_tag = (
|
|
1096
|
-
UnityCatalogTagPlatformResource.get_from_datahub(
|
|
1097
|
-
platform_resource_id,
|
|
1098
|
-
self.platform_resource_repository,
|
|
1099
|
-
False,
|
|
1100
|
-
)
|
|
1134
|
+
logger.debug(
|
|
1135
|
+
f"Retrieved/created platform resource for tag {tag.key.raw_text}"
|
|
1101
1136
|
)
|
|
1102
1137
|
if (
|
|
1103
1138
|
tag.to_datahub_tag_urn().urn()
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
3
|
+
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from datahub.ingestion.source.unity.platform_resource_repository import (
|
|
6
|
+
UnityCatalogPlatformResourceRepository,
|
|
7
|
+
)
|
|
3
8
|
|
|
4
9
|
from pydantic import BaseModel
|
|
5
10
|
|
|
@@ -7,17 +12,14 @@ from datahub.api.entities.external.external_entities import (
|
|
|
7
12
|
ExternalEntity,
|
|
8
13
|
ExternalEntityId,
|
|
9
14
|
LinkedResourceSet,
|
|
10
|
-
PlatformResourceRepository,
|
|
11
15
|
)
|
|
12
16
|
from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
|
|
13
17
|
from datahub.api.entities.platformresource.platform_resource import (
|
|
14
18
|
PlatformResource,
|
|
15
19
|
PlatformResourceKey,
|
|
16
|
-
PlatformResourceSearchFields,
|
|
17
20
|
)
|
|
18
21
|
from datahub.ingestion.graph.client import DataHubGraph
|
|
19
22
|
from datahub.metadata.urns import TagUrn
|
|
20
|
-
from datahub.utilities.search_utils import ElasticDocumentQuery
|
|
21
23
|
from datahub.utilities.urns.urn import Urn
|
|
22
24
|
|
|
23
25
|
|
|
@@ -29,9 +31,9 @@ class UnityCatalogTagSyncContext(BaseModel):
|
|
|
29
31
|
logger = logging.getLogger(__name__)
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
class UnityCatalogTagPlatformResourceId(
|
|
34
|
+
class UnityCatalogTagPlatformResourceId(ExternalEntityId):
|
|
33
35
|
"""
|
|
34
|
-
A
|
|
36
|
+
A Unity Catalog tag platform resource ID.
|
|
35
37
|
"""
|
|
36
38
|
|
|
37
39
|
tag_key: str
|
|
@@ -40,9 +42,6 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
40
42
|
exists_in_unity_catalog: bool = False
|
|
41
43
|
persisted: bool = False
|
|
42
44
|
|
|
43
|
-
def __hash__(self) -> int:
|
|
44
|
-
return hash(self.to_platform_resource_key().id)
|
|
45
|
-
|
|
46
45
|
# this is a hack to make sure the property is a string and not private pydantic field
|
|
47
46
|
@staticmethod
|
|
48
47
|
def _RESOURCE_TYPE() -> str:
|
|
@@ -57,26 +56,21 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
57
56
|
)
|
|
58
57
|
|
|
59
58
|
@classmethod
|
|
60
|
-
def
|
|
59
|
+
def get_or_create_from_tag(
|
|
61
60
|
cls,
|
|
62
61
|
tag: UnityCatalogTag,
|
|
63
|
-
|
|
64
|
-
platform_resource_repository: PlatformResourceRepository,
|
|
62
|
+
platform_resource_repository: "UnityCatalogPlatformResourceRepository",
|
|
65
63
|
exists_in_unity_catalog: bool = False,
|
|
66
64
|
) -> "UnityCatalogTagPlatformResourceId":
|
|
67
65
|
"""
|
|
68
66
|
Creates a UnityCatalogTagPlatformResourceId from a UnityCatalogTag.
|
|
69
67
|
"""
|
|
70
68
|
|
|
71
|
-
existing_platform_resource =
|
|
72
|
-
tag.to_datahub_tag_urn().urn()
|
|
73
|
-
platform_resource_repository=platform_resource_repository,
|
|
74
|
-
tag_sync_context=UnityCatalogTagSyncContext(
|
|
75
|
-
platform_instance=platform_instance
|
|
76
|
-
),
|
|
69
|
+
existing_platform_resource = platform_resource_repository.search_entity_by_urn(
|
|
70
|
+
tag.to_datahub_tag_urn().urn()
|
|
77
71
|
)
|
|
78
72
|
if existing_platform_resource:
|
|
79
|
-
logger.
|
|
73
|
+
logger.debug(
|
|
80
74
|
f"Found existing UnityCatalogTagPlatformResourceId for tag {tag.key.raw_text}: {existing_platform_resource}"
|
|
81
75
|
)
|
|
82
76
|
return existing_platform_resource
|
|
@@ -84,105 +78,46 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
84
78
|
return UnityCatalogTagPlatformResourceId(
|
|
85
79
|
tag_key=tag.key.raw_text,
|
|
86
80
|
tag_value=tag.value.raw_text if tag.value is not None else None,
|
|
87
|
-
platform_instance=platform_instance,
|
|
81
|
+
platform_instance=platform_resource_repository.platform_instance,
|
|
88
82
|
exists_in_unity_catalog=exists_in_unity_catalog,
|
|
89
83
|
persisted=False,
|
|
90
84
|
)
|
|
91
85
|
|
|
92
|
-
@classmethod
|
|
93
|
-
def search_by_urn(
|
|
94
|
-
cls,
|
|
95
|
-
urn: str,
|
|
96
|
-
platform_resource_repository: PlatformResourceRepository,
|
|
97
|
-
tag_sync_context: UnityCatalogTagSyncContext,
|
|
98
|
-
) -> Optional["UnityCatalogTagPlatformResourceId"]:
|
|
99
|
-
mapped_tags = [
|
|
100
|
-
t
|
|
101
|
-
for t in platform_resource_repository.search_by_filter(
|
|
102
|
-
ElasticDocumentQuery.create_from(
|
|
103
|
-
(
|
|
104
|
-
PlatformResourceSearchFields.RESOURCE_TYPE,
|
|
105
|
-
str(UnityCatalogTagPlatformResourceId._RESOURCE_TYPE()),
|
|
106
|
-
),
|
|
107
|
-
(PlatformResourceSearchFields.SECONDARY_KEYS, urn),
|
|
108
|
-
)
|
|
109
|
-
)
|
|
110
|
-
]
|
|
111
|
-
logger.info(
|
|
112
|
-
f"Found {len(mapped_tags)} mapped tags for URN {urn}. {mapped_tags}"
|
|
113
|
-
)
|
|
114
|
-
if len(mapped_tags) > 0:
|
|
115
|
-
for platform_resource in mapped_tags:
|
|
116
|
-
if (
|
|
117
|
-
platform_resource.resource_info
|
|
118
|
-
and platform_resource.resource_info.value
|
|
119
|
-
):
|
|
120
|
-
unity_catalog_tag = UnityCatalogTagPlatformResource(
|
|
121
|
-
**platform_resource.resource_info.value.as_pydantic_object(
|
|
122
|
-
UnityCatalogTagPlatformResource
|
|
123
|
-
).dict()
|
|
124
|
-
)
|
|
125
|
-
if (
|
|
126
|
-
unity_catalog_tag.id.platform_instance
|
|
127
|
-
== tag_sync_context.platform_instance
|
|
128
|
-
):
|
|
129
|
-
unity_catalog_tag_id = unity_catalog_tag.id
|
|
130
|
-
unity_catalog_tag_id.exists_in_unity_catalog = True
|
|
131
|
-
unity_catalog_tag_id.persisted = True
|
|
132
|
-
return unity_catalog_tag_id
|
|
133
|
-
else:
|
|
134
|
-
logger.warning(
|
|
135
|
-
f"Platform resource {platform_resource} does not have a resource_info value"
|
|
136
|
-
)
|
|
137
|
-
continue
|
|
138
|
-
|
|
139
|
-
# If we reach here, it means we did not find a mapped tag for the URN
|
|
140
|
-
logger.info(
|
|
141
|
-
f"No mapped tag found for URN {urn} with platform instance {tag_sync_context.platform_instance}. Creating a new UnityCatalogTagPlatformResourceId."
|
|
142
|
-
)
|
|
143
|
-
return None
|
|
144
|
-
|
|
145
86
|
@classmethod
|
|
146
87
|
def from_datahub_urn(
|
|
147
88
|
cls,
|
|
148
89
|
urn: str,
|
|
149
|
-
platform_resource_repository: PlatformResourceRepository,
|
|
150
90
|
tag_sync_context: UnityCatalogTagSyncContext,
|
|
91
|
+
platform_resource_repository: "UnityCatalogPlatformResourceRepository",
|
|
151
92
|
graph: DataHubGraph,
|
|
152
93
|
) -> "UnityCatalogTagPlatformResourceId":
|
|
153
94
|
"""
|
|
154
95
|
Creates a UnityCatalogTagPlatformResourceId from a DataHub URN.
|
|
155
96
|
"""
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# If we do, we can use it to create the UnityCatalogTagPlatformResourceId
|
|
159
|
-
# Else, we need to generate a new UnityCatalogTagPlatformResourceId
|
|
160
|
-
existing_platform_resource_id = cls.search_by_urn(
|
|
161
|
-
urn, platform_resource_repository, tag_sync_context
|
|
97
|
+
existing_platform_resource_id = (
|
|
98
|
+
platform_resource_repository.search_entity_by_urn(urn)
|
|
162
99
|
)
|
|
163
100
|
if existing_platform_resource_id:
|
|
164
|
-
logger.info(
|
|
165
|
-
f"Found existing UnityCatalogTagPlatformResourceId for URN {urn}: {existing_platform_resource_id}"
|
|
166
|
-
)
|
|
167
101
|
return existing_platform_resource_id
|
|
168
102
|
|
|
169
|
-
# Otherwise, we need to create a new UnityCatalogTagPlatformResourceId
|
|
170
103
|
new_unity_catalog_tag_id = cls.generate_tag_id(graph, tag_sync_context, urn)
|
|
171
104
|
if new_unity_catalog_tag_id:
|
|
172
|
-
# we then check if this tag has already been ingested as a platform
|
|
173
|
-
# resource in the platform resource repository
|
|
174
105
|
resource_key = platform_resource_repository.get(
|
|
175
106
|
new_unity_catalog_tag_id.to_platform_resource_key()
|
|
176
107
|
)
|
|
177
108
|
if resource_key:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
109
|
+
# Create a new ID with the correct state instead of mutating
|
|
110
|
+
return UnityCatalogTagPlatformResourceId(
|
|
111
|
+
tag_key=new_unity_catalog_tag_id.tag_key,
|
|
112
|
+
tag_value=new_unity_catalog_tag_id.tag_value,
|
|
113
|
+
platform_instance=new_unity_catalog_tag_id.platform_instance,
|
|
114
|
+
exists_in_unity_catalog=True, # This tag exists in Unity Catalog
|
|
115
|
+
persisted=new_unity_catalog_tag_id.persisted,
|
|
183
116
|
)
|
|
184
117
|
return new_unity_catalog_tag_id
|
|
185
|
-
raise ValueError(
|
|
118
|
+
raise ValueError(
|
|
119
|
+
f"Unable to create Unity Catalog tag ID from DataHub URN: {urn}"
|
|
120
|
+
)
|
|
186
121
|
|
|
187
122
|
@classmethod
|
|
188
123
|
def generate_tag_id(
|
|
@@ -191,14 +126,11 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
191
126
|
parsed_urn = Urn.from_string(urn)
|
|
192
127
|
entity_type = parsed_urn.entity_type
|
|
193
128
|
if entity_type == "tag":
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
TagUrn.from_string(urn), tag_sync_context
|
|
197
|
-
)
|
|
129
|
+
return UnityCatalogTagPlatformResourceId.from_datahub_tag(
|
|
130
|
+
TagUrn.from_string(urn), tag_sync_context
|
|
198
131
|
)
|
|
199
132
|
else:
|
|
200
133
|
raise ValueError(f"Unsupported entity type {entity_type} for URN {urn}")
|
|
201
|
-
return new_unity_catalog_tag_id
|
|
202
134
|
|
|
203
135
|
@classmethod
|
|
204
136
|
def from_datahub_tag(
|
|
@@ -214,7 +146,7 @@ class UnityCatalogTagPlatformResourceId(BaseModel, ExternalEntityId):
|
|
|
214
146
|
)
|
|
215
147
|
|
|
216
148
|
|
|
217
|
-
class UnityCatalogTagPlatformResource(
|
|
149
|
+
class UnityCatalogTagPlatformResource(ExternalEntity):
|
|
218
150
|
datahub_urns: LinkedResourceSet
|
|
219
151
|
managed_by_datahub: bool
|
|
220
152
|
id: UnityCatalogTagPlatformResourceId
|
|
@@ -237,58 +169,28 @@ class UnityCatalogTagPlatformResource(BaseModel, ExternalEntity):
|
|
|
237
169
|
)
|
|
238
170
|
|
|
239
171
|
@classmethod
|
|
240
|
-
def
|
|
172
|
+
def create_default(
|
|
241
173
|
cls,
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
managed_by_datahub: bool = False,
|
|
174
|
+
entity_id: ExternalEntityId,
|
|
175
|
+
managed_by_datahub: bool,
|
|
245
176
|
) -> "UnityCatalogTagPlatformResource":
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
]
|
|
262
|
-
if len(platform_resources) == 1:
|
|
263
|
-
platform_resource: PlatformResource = platform_resources[0]
|
|
264
|
-
if (
|
|
265
|
-
platform_resource.resource_info
|
|
266
|
-
and platform_resource.resource_info.value
|
|
267
|
-
):
|
|
268
|
-
unity_catalog_tag = UnityCatalogTagPlatformResource(
|
|
269
|
-
**platform_resource.resource_info.value.as_pydantic_object(
|
|
270
|
-
UnityCatalogTagPlatformResource
|
|
271
|
-
).dict()
|
|
272
|
-
)
|
|
273
|
-
return unity_catalog_tag
|
|
274
|
-
else:
|
|
275
|
-
for platform_resource in platform_resources:
|
|
276
|
-
if (
|
|
277
|
-
platform_resource.resource_info
|
|
278
|
-
and platform_resource.resource_info.value
|
|
279
|
-
):
|
|
280
|
-
unity_catalog_tag = UnityCatalogTagPlatformResource(
|
|
281
|
-
**platform_resource.resource_info.value.as_pydantic_object(
|
|
282
|
-
UnityCatalogTagPlatformResource
|
|
283
|
-
).dict()
|
|
284
|
-
)
|
|
285
|
-
if (
|
|
286
|
-
unity_catalog_tag.id.platform_instance
|
|
287
|
-
== unity_catalog_tag_id.platform_instance
|
|
288
|
-
):
|
|
289
|
-
return unity_catalog_tag
|
|
177
|
+
"""Create a default Unity Catalog tag entity when none found in DataHub."""
|
|
178
|
+
# Type narrowing: we know this will be a UnityCatalogTagPlatformResourceId
|
|
179
|
+
assert isinstance(entity_id, UnityCatalogTagPlatformResourceId), (
|
|
180
|
+
f"Expected UnityCatalogTagPlatformResourceId, got {type(entity_id)}"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Create a new entity ID with correct default state instead of mutating
|
|
184
|
+
default_entity_id = UnityCatalogTagPlatformResourceId(
|
|
185
|
+
tag_key=entity_id.tag_key,
|
|
186
|
+
tag_value=entity_id.tag_value,
|
|
187
|
+
platform_instance=entity_id.platform_instance,
|
|
188
|
+
exists_in_unity_catalog=False, # New entities don't exist in Unity Catalog yet
|
|
189
|
+
persisted=False, # New entities are not persisted yet
|
|
190
|
+
)
|
|
191
|
+
|
|
290
192
|
return cls(
|
|
291
|
-
id=
|
|
193
|
+
id=default_entity_id,
|
|
292
194
|
datahub_urns=LinkedResourceSet(urns=[]),
|
|
293
195
|
managed_by_datahub=managed_by_datahub,
|
|
294
196
|
allowed_values=None,
|
|
@@ -5800,7 +5800,7 @@ class OperationTypeClass(object):
|
|
|
5800
5800
|
"""Asset was dropped"""
|
|
5801
5801
|
|
|
5802
5802
|
CUSTOM = "CUSTOM"
|
|
5803
|
-
"""Custom asset operation"""
|
|
5803
|
+
"""Custom asset operation. If this is set, ensure customOperationType is filled out."""
|
|
5804
5804
|
|
|
5805
5805
|
UNKNOWN = "UNKNOWN"
|
|
5806
5806
|
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -9841,7 +9841,7 @@
|
|
|
9841
9841
|
"symbolDocs": {
|
|
9842
9842
|
"ALTER": "Asset was altered",
|
|
9843
9843
|
"CREATE": "Asset was created",
|
|
9844
|
-
"CUSTOM": "Custom asset operation",
|
|
9844
|
+
"CUSTOM": "Custom asset operation. If this is set, ensure customOperationType is filled out.",
|
|
9845
9845
|
"DELETE": "Rows were deleted",
|
|
9846
9846
|
"DROP": "Asset was dropped",
|
|
9847
9847
|
"INSERT": "Rows were inserted",
|
|
@@ -9941,7 +9941,9 @@
|
|
|
9941
9941
|
"fieldName": "lastOperationTime",
|
|
9942
9942
|
"fieldType": "DATETIME"
|
|
9943
9943
|
},
|
|
9944
|
-
"TimeseriesField": {
|
|
9944
|
+
"TimeseriesField": {
|
|
9945
|
+
"fieldType": "DATETIME"
|
|
9946
|
+
},
|
|
9945
9947
|
"type": "long",
|
|
9946
9948
|
"name": "lastUpdatedTimestamp",
|
|
9947
9949
|
"doc": "The time at which the operation occurred. Would be better named 'operationTime'"
|
|
@@ -150,7 +150,7 @@
|
|
|
150
150
|
"symbolDocs": {
|
|
151
151
|
"ALTER": "Asset was altered",
|
|
152
152
|
"CREATE": "Asset was created",
|
|
153
|
-
"CUSTOM": "Custom asset operation",
|
|
153
|
+
"CUSTOM": "Custom asset operation. If this is set, ensure customOperationType is filled out.",
|
|
154
154
|
"DELETE": "Rows were deleted",
|
|
155
155
|
"DROP": "Asset was dropped",
|
|
156
156
|
"INSERT": "Rows were inserted",
|
|
@@ -250,7 +250,9 @@
|
|
|
250
250
|
"fieldName": "lastOperationTime",
|
|
251
251
|
"fieldType": "DATETIME"
|
|
252
252
|
},
|
|
253
|
-
"TimeseriesField": {
|
|
253
|
+
"TimeseriesField": {
|
|
254
|
+
"fieldType": "DATETIME"
|
|
255
|
+
},
|
|
254
256
|
"type": "long",
|
|
255
257
|
"name": "lastUpdatedTimestamp",
|
|
256
258
|
"doc": "The time at which the operation occurred. Would be better named 'operationTime'"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|